| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Lingua::SoundChange; |
|
2
|
|
|
|
|
|
|
|
|
3
|
4
|
|
|
4
|
|
101155
|
use 5.005_03; |
|
|
4
|
|
|
|
|
14
|
|
|
|
4
|
|
|
|
|
185
|
|
|
4
|
4
|
|
|
4
|
|
22
|
use strict; |
|
|
4
|
|
|
|
|
7
|
|
|
|
4
|
|
|
|
|
142
|
|
|
5
|
4
|
|
|
4
|
|
26
|
use Carp; |
|
|
4
|
|
|
|
|
11
|
|
|
|
4
|
|
|
|
|
413
|
|
|
6
|
|
|
|
|
|
|
# use warnings; |
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
# for debugging: |
|
9
|
4
|
|
50
|
4
|
|
20
|
use constant PRINT_RULES => $ENV{LINGUA_SOUNDCHANGE_PRINTRULES} || 0; |
|
|
4
|
|
|
|
|
8
|
|
|
|
4
|
|
|
|
|
355
|
|
|
10
|
4
|
|
|
4
|
|
19
|
use constant DEBUG => 0; |
|
|
4
|
|
|
|
|
8
|
|
|
|
4
|
|
|
|
|
166
|
|
|
11
|
4
|
|
|
4
|
|
21
|
use vars qw($VERSION); |
|
|
4
|
|
|
|
|
7
|
|
|
|
4
|
|
|
|
|
6870
|
|
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
$VERSION = '0.05'; |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
sub compile_rules($$$$); |
|
16
|
|
|
|
|
|
|
sub compile_vars($$); |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
sub new { |
|
19
|
22
|
|
|
22
|
1
|
20944
|
my($class, $vars, $rules, $opts) = @_; |
|
20
|
|
|
|
|
|
|
|
|
21
|
22
|
|
100
|
|
|
109
|
$opts ||= { }; |
|
22
|
|
|
|
|
|
|
|
|
23
|
22
|
50
|
|
|
|
88
|
croak '$vars must be a hash reference!' unless ref $vars eq 'HASH'; |
|
24
|
22
|
50
|
|
|
|
68
|
croak '$rules must be an array reference!' unless ref $rules eq 'ARRAY'; |
|
25
|
|
|
|
|
|
|
|
|
26
|
22
|
|
|
|
|
95
|
my $obj = { |
|
27
|
|
|
|
|
|
|
raw_vars => $vars, |
|
28
|
|
|
|
|
|
|
raw_rules => $rules, |
|
29
|
|
|
|
|
|
|
opts => $opts, |
|
30
|
|
|
|
|
|
|
}; |
|
31
|
|
|
|
|
|
|
|
|
32
|
22
|
|
|
|
|
66
|
$obj->{vars} = compile_vars($vars, $opts); |
|
33
|
22
|
100
|
|
|
|
69
|
if($opts->{longVars}) { |
|
34
|
6
|
|
|
|
|
47
|
$obj->{raw_vars}->{"\Q<$_>\E"} = $vars->{$_} for keys %$vars; |
|
35
|
|
|
|
|
|
|
} |
|
36
|
22
|
|
|
|
|
181
|
( $obj->{rules}, |
|
37
|
|
|
|
|
|
|
$obj->{code} ) = compile_rules($rules, $obj->{vars}, $obj->{raw_vars}, $opts); |
|
38
|
|
|
|
|
|
|
|
|
39
|
22
|
|
|
|
|
112
|
bless $obj, $class; |
|
40
|
|
|
|
|
|
|
} |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
sub change { |
|
43
|
22
|
|
|
22
|
1
|
14483
|
my($self, $words) = @_; |
|
44
|
|
|
|
|
|
|
|
|
45
|
22
|
50
|
|
|
|
69
|
$words = [] unless defined $words; |
|
46
|
|
|
|
|
|
|
|
|
47
|
22
|
50
|
|
|
|
682
|
croak("change needs an array reference") unless ref($words) eq 'ARRAY'; |
|
48
|
|
|
|
|
|
|
|
|
49
|
22
|
|
|
|
|
48
|
foreach my $word (@$words) { |
|
50
|
54
|
|
|
|
|
77
|
my $origword = $word; |
|
51
|
54
|
|
|
|
|
64
|
my @rules; |
|
52
|
|
|
|
|
|
|
my $ruleout; |
|
53
|
54
|
|
|
|
|
65
|
foreach my $rule (@{$self->rules}) { |
|
|
54
|
|
|
|
|
131
|
|
|
54
|
168
|
|
|
|
|
5064
|
($word, $ruleout) = $rule->($word); |
|
55
|
168
|
100
|
|
|
|
573
|
push @rules, $ruleout if defined $ruleout; |
|
56
|
|
|
|
|
|
|
} |
|
57
|
|
|
|
|
|
|
|
|
58
|
54
|
|
|
|
|
321
|
$word = { orig => $origword, word => $word, rules => \@rules }; |
|
59
|
|
|
|
|
|
|
} |
|
60
|
|
|
|
|
|
|
|
|
61
|
22
|
|
|
|
|
87
|
$words; |
|
62
|
|
|
|
|
|
|
} |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# Private methods |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
sub compile_rules ($$$$) { |
|
68
|
22
|
|
|
22
|
0
|
49
|
my($rules, $vars, $varstring, $opt) = @_; |
|
69
|
|
|
|
|
|
|
|
|
70
|
22
|
50
|
|
|
|
67
|
croak "rules not an array reference" unless ref $rules eq 'ARRAY'; |
|
71
|
22
|
50
|
|
|
|
65
|
croak "vars not a hash reference" unless ref $vars eq 'HASH'; |
|
72
|
22
|
50
|
|
|
|
55
|
croak "varstring not a hash reference" unless ref $varstring eq 'HASH'; |
|
73
|
22
|
50
|
|
|
|
56
|
croak "opt not a hash reference" unless ref $opt eq 'HASH'; |
|
74
|
|
|
|
|
|
|
|
|
75
|
22
|
|
|
|
|
35
|
my @compiledrules; |
|
76
|
|
|
|
|
|
|
my %code; |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
# Rules: change from a sound (one or more letters) or a category |
|
79
|
|
|
|
|
|
|
# to another sound or category, in a certain environment. |
|
80
|
|
|
|
|
|
|
# Categories may only be one letter long and are usually uppercase. |
|
81
|
|
|
|
|
|
|
# Environments must contain a _ symbol to show where the replacement |
|
82
|
|
|
|
|
|
|
# takes place; it may also contain letters, categories, and the special |
|
83
|
|
|
|
|
|
|
# symbols ( ) (to enclose optional parts) and # (beginning or end of |
|
84
|
|
|
|
|
|
|
# word). |
|
85
|
|
|
|
|
|
|
# Rules can only change sounds to sounds, and categories to categories. |
|
86
|
|
|
|
|
|
|
# If a category is to be changed to another category, they should be |
|
87
|
|
|
|
|
|
|
# the same length. Otherwise the second category will have its laster |
|
88
|
|
|
|
|
|
|
# letter repeated until it has the same length as the first (if it is |
|
89
|
|
|
|
|
|
|
# shorter), or characters in the second category that don't match |
|
90
|
|
|
|
|
|
|
# characters in the first will not be produced by such a range. Note |
|
91
|
|
|
|
|
|
|
# that this is an artefact of the use of tr/// and is not guaranteed |
|
92
|
|
|
|
|
|
|
# behaviour. |
|
93
|
|
|
|
|
|
|
# Don't use regex metacharacters (except for the parentheses which |
|
94
|
|
|
|
|
|
|
# show optional elements) in the environment or in names of categories |
|
95
|
|
|
|
|
|
|
# or sounds. |
|
96
|
|
|
|
|
|
|
# These include: . * + ? ^ $ [ ] |
|
97
|
|
|
|
|
|
|
|
|
98
|
22
|
|
|
|
|
50
|
foreach my $rule (@$rules) { |
|
99
|
40
|
50
|
|
|
|
283
|
if( $rule =~ m{ |
|
100
|
|
|
|
|
|
|
^ |
|
101
|
|
|
|
|
|
|
( [^/]+ ) # "change from" to $1 |
|
102
|
|
|
|
|
|
|
/ # slash |
|
103
|
|
|
|
|
|
|
( [^/]* ) # "change to" to $2 (may be blank) |
|
104
|
|
|
|
|
|
|
/ # slash |
|
105
|
|
|
|
|
|
|
( .+ ) # "environment" to $3 (may not be blank) |
|
106
|
|
|
|
|
|
|
$ |
|
107
|
|
|
|
|
|
|
}x ) |
|
108
|
|
|
|
|
|
|
{ |
|
109
|
40
|
|
|
|
|
186
|
my($from, $to, $env) = ($1, $2, $3); |
|
110
|
40
|
|
|
|
|
76
|
my($subfrom, $subto) = ('', ''); |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
my $option = sub { |
|
113
|
80
|
|
|
80
|
|
128
|
my $word = shift; |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
# Change parentheses round one element to a question |
|
116
|
|
|
|
|
|
|
# mark following it, ... |
|
117
|
80
|
|
|
|
|
122
|
$word =~ s{ |
|
118
|
|
|
|
|
|
|
\( # open parenthesis |
|
119
|
|
|
|
|
|
|
(.) # one character, to $1 |
|
120
|
|
|
|
|
|
|
\) # close parenthesis |
|
121
|
|
|
|
|
|
|
}{ |
|
122
|
0
|
|
|
|
|
0
|
$1 . '?' |
|
123
|
|
|
|
|
|
|
}gex; |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
# ... and parentheses around multiple elements by |
|
126
|
|
|
|
|
|
|
# non-capturing parentheses followed by a question mark |
|
127
|
|
|
|
|
|
|
# (but nested parentheses are not allowed!) |
|
128
|
80
|
|
|
|
|
97
|
$word =~ s{ |
|
129
|
|
|
|
|
|
|
\( # open parenthesis |
|
130
|
|
|
|
|
|
|
( [^()]+ ) # one or more non-parenthesis characters, |
|
131
|
|
|
|
|
|
|
# to $1 |
|
132
|
|
|
|
|
|
|
\) # close parenthesis |
|
133
|
|
|
|
|
|
|
}{ |
|
134
|
0
|
|
|
|
|
0
|
'(?:' . $1 . ')?' |
|
135
|
|
|
|
|
|
|
}gex; |
|
136
|
|
|
|
|
|
|
|
|
137
|
80
|
|
|
|
|
189
|
$word; |
|
138
|
40
|
|
|
|
|
260
|
}; |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
# Escape dollar signs and at signs in $env, which would |
|
141
|
|
|
|
|
|
|
# otherwise try to interpolate a variable into the regular |
|
142
|
|
|
|
|
|
|
# expression |
|
143
|
40
|
|
|
|
|
86
|
$env =~ s/([\$\@])/\\$1/g; |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
# Get the bits before and after the underscore |
|
146
|
|
|
|
|
|
|
# and put them in capturing parentheses in $subfrom |
|
147
|
40
|
50
|
|
|
|
195
|
if($env =~ /^(#?)([^_#]*)(_)([^_#]*)(#?)$/) { |
|
148
|
|
|
|
|
|
|
# leading # |
|
149
|
40
|
100
|
66
|
|
|
226
|
$subfrom .= '^' if defined $1 && length $1; |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
# preceding stuff, so we don't need $` |
|
152
|
|
|
|
|
|
|
# non-greedy |
|
153
|
|
|
|
|
|
|
# If match anchored at beginning, don't add this |
|
154
|
40
|
100
|
66
|
|
|
214
|
if(defined $1 && length $1) { |
|
155
|
1
|
|
|
|
|
2
|
$subfrom .= '()'; |
|
156
|
|
|
|
|
|
|
} else { |
|
157
|
39
|
|
|
|
|
66
|
$subfrom .= '(.*?)'; |
|
158
|
|
|
|
|
|
|
} |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
# pre-environment |
|
161
|
40
|
|
|
|
|
146
|
$subfrom .= '(' . $option->(quotemeta $2) . ')'; |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
# underscore |
|
164
|
40
|
|
|
|
|
639
|
$subfrom .= "(\Q$from\E)"; |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
# post-environment |
|
167
|
40
|
|
|
|
|
112
|
$subfrom .= '(' . $option->(quotemeta $4) . ')'; |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
# trailing # |
|
170
|
40
|
100
|
66
|
|
|
229
|
$subfrom .= '$' if defined $5 && length $5; |
|
171
|
|
|
|
|
|
|
} |
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
# Now expand categories |
|
174
|
40
|
100
|
|
|
|
120
|
if($opt->{longVars}) { |
|
175
|
9
|
50
|
|
|
|
53
|
$subfrom =~ s{(\\<[^>]+\\>)}{$vars->{$1} || $1}eg; |
|
|
11
|
|
|
|
|
64
|
|
|
176
|
|
|
|
|
|
|
} else { |
|
177
|
31
|
100
|
|
|
|
153
|
$subfrom =~ s{(\\?)(.)}{$vars->{$2} || $1 . $2}eg; |
|
|
396
|
|
|
|
|
2036
|
|
|
178
|
|
|
|
|
|
|
} |
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
# Show where the rule matches |
|
181
|
40
|
|
|
|
|
135
|
$subto .= qq(\$rule = "\Q$from\E->\Q$to\E /\Q$env\E applies to \$word at " . (length(\$1)+1) . "\\n"; ); |
|
182
|
|
|
|
|
|
|
|
|
183
|
40
|
|
|
|
|
55
|
$subto .= '$1 . $2 . '; |
|
184
|
40
|
100
|
66
|
|
|
151
|
$subto .= ($vars->{quotemeta $from} |
|
185
|
|
|
|
|
|
|
? "do { my \$char = \$3; \$char =~ tr{$varstring->{quotemeta $from}}{" . ($varstring->{quotemeta $to} || $to) . "}; \$char }" |
|
186
|
|
|
|
|
|
|
: "q{" . $to . "}"); |
|
187
|
40
|
|
|
|
|
53
|
$subto .= ' . $4'; |
|
188
|
|
|
|
|
|
|
|
|
189
|
40
|
|
|
|
|
649
|
if(PRINT_RULES) { |
|
190
|
|
|
|
|
|
|
print "[", $#compiledrules + 1, "] $rule --> s{$subfrom}{$subto}eg\n"; |
|
191
|
|
|
|
|
|
|
} |
|
192
|
|
|
|
|
|
|
|
|
193
|
40
|
|
|
|
|
246
|
my $code = <<"EOF"; |
|
194
|
|
|
|
|
|
|
sub { |
|
195
|
|
|
|
|
|
|
my \$word = shift; |
|
196
|
|
|
|
|
|
|
my \$rule; |
|
197
|
|
|
|
|
|
|
my \$orig; |
|
198
|
|
|
|
|
|
|
# print qq(Working on '\$word'; \Q$from\E->\Q$to\E /\Q$env\E; from is '\Q$subfrom\E and to is '\Q$subto\E'\\n); |
|
199
|
|
|
|
|
|
|
1 while ((\$orig = \$word) =~ s{$subfrom}{$subto}e) && (\$orig ne \$word) && (\$word = \$orig); |
|
200
|
|
|
|
|
|
|
return ( \$word, \$rule ); |
|
201
|
|
|
|
|
|
|
} |
|
202
|
|
|
|
|
|
|
EOF |
|
203
|
|
|
|
|
|
|
|
|
204
|
40
|
|
|
|
|
8649
|
push @compiledrules, eval $code; |
|
205
|
40
|
50
|
|
|
|
131
|
croak "Problem with '$code'" unless $compiledrules[-1]; |
|
206
|
40
|
|
|
|
|
310
|
$code{$compiledrules[-1]} = $code; |
|
207
|
|
|
|
|
|
|
} |
|
208
|
|
|
|
|
|
|
} |
|
209
|
|
|
|
|
|
|
|
|
210
|
22
|
|
|
|
|
140
|
( \@compiledrules, \%code ); |
|
211
|
|
|
|
|
|
|
} |
|
212
|
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
sub compile_vars ($$) { |
|
214
|
22
|
|
|
22
|
0
|
44
|
my($vars, $opt) = @_; |
|
215
|
|
|
|
|
|
|
|
|
216
|
22
|
50
|
|
|
|
73
|
croak "\$vars is not a hash reference" unless ref $vars eq 'HASH'; |
|
217
|
22
|
50
|
|
|
|
71
|
croak "\$opt is not a hash reference" unless ref $opt eq 'HASH'; |
|
218
|
|
|
|
|
|
|
|
|
219
|
22
|
|
|
|
|
28
|
my %compiledvars; |
|
220
|
|
|
|
|
|
|
|
|
221
|
22
|
|
|
|
|
94
|
foreach my $var (keys %$vars) { |
|
222
|
26
|
|
|
|
|
46
|
my $list = $vars->{$var}; |
|
223
|
|
|
|
|
|
|
# Escape at signs and dollars in the list |
|
224
|
26
|
|
|
|
|
49
|
$list =~ s/([\$\@])/\\$1/g; |
|
225
|
|
|
|
|
|
|
|
|
226
|
26
|
100
|
|
|
|
62
|
if($opt->{longVars}) { |
|
227
|
12
|
|
|
|
|
226
|
$compiledvars{"\Q<$var>\E"} = qr/[$list]/; |
|
228
|
12
|
|
|
|
|
34
|
print qq[($var => $list // $compiledvars{"\Q<$var>\E"})\n] if PRINT_RULES; |
|
229
|
|
|
|
|
|
|
} else { |
|
230
|
14
|
|
|
|
|
249
|
$compiledvars{$var} = qr/[$list]/; |
|
231
|
14
|
|
|
|
|
42
|
print "($var => $list // $compiledvars{$var})\n" if PRINT_RULES; |
|
232
|
|
|
|
|
|
|
} |
|
233
|
|
|
|
|
|
|
} |
|
234
|
|
|
|
|
|
|
|
|
235
|
22
|
|
|
|
|
84
|
\%compiledvars; |
|
236
|
|
|
|
|
|
|
} |
|
237
|
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
sub rules { |
|
239
|
54
|
|
|
54
|
1
|
79
|
my($self) = @_; |
|
240
|
|
|
|
|
|
|
|
|
241
|
54
|
|
|
|
|
181
|
$self->{rules}; |
|
242
|
|
|
|
|
|
|
} |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
sub vars { |
|
245
|
0
|
|
|
0
|
0
|
|
my($self) = @_; |
|
246
|
|
|
|
|
|
|
|
|
247
|
0
|
|
|
|
|
|
$self->{vars}; |
|
248
|
|
|
|
|
|
|
} |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
sub varstring { |
|
251
|
0
|
|
|
0
|
0
|
|
my($self) = @_; |
|
252
|
|
|
|
|
|
|
|
|
253
|
0
|
|
|
|
|
|
$self->{raw_vars}; |
|
254
|
|
|
|
|
|
|
} |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
sub code { |
|
257
|
0
|
|
|
0
|
0
|
|
my($self, $token, $code) = @_; |
|
258
|
|
|
|
|
|
|
|
|
259
|
0
|
0
|
|
|
|
|
$self->{code}->{$token} = $code if $code; |
|
260
|
|
|
|
|
|
|
|
|
261
|
0
|
|
|
|
|
|
$self->{code}->{$token}; |
|
262
|
|
|
|
|
|
|
} |
|
263
|
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
1; |
|
266
|
|
|
|
|
|
|
__END__ |