line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Lingua::SoundChange; |
2
|
|
|
|
|
|
|
|
3
|
4
|
|
|
4
|
|
101155
|
use 5.005_03; |
|
4
|
|
|
|
|
14
|
|
|
4
|
|
|
|
|
185
|
|
4
|
4
|
|
|
4
|
|
22
|
use strict; |
|
4
|
|
|
|
|
7
|
|
|
4
|
|
|
|
|
142
|
|
5
|
4
|
|
|
4
|
|
26
|
use Carp; |
|
4
|
|
|
|
|
11
|
|
|
4
|
|
|
|
|
413
|
|
6
|
|
|
|
|
|
|
# use warnings; |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
# for debugging: |
9
|
4
|
|
50
|
4
|
|
20
|
use constant PRINT_RULES => $ENV{LINGUA_SOUNDCHANGE_PRINTRULES} || 0; |
|
4
|
|
|
|
|
8
|
|
|
4
|
|
|
|
|
355
|
|
10
|
4
|
|
|
4
|
|
19
|
use constant DEBUG => 0; |
|
4
|
|
|
|
|
8
|
|
|
4
|
|
|
|
|
166
|
|
11
|
4
|
|
|
4
|
|
21
|
use vars qw($VERSION); |
|
4
|
|
|
|
|
7
|
|
|
4
|
|
|
|
|
6870
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
$VERSION = '0.05'; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
sub compile_rules($$$$); |
16
|
|
|
|
|
|
|
sub compile_vars($$); |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
sub new { |
19
|
22
|
|
|
22
|
1
|
20944
|
my($class, $vars, $rules, $opts) = @_; |
20
|
|
|
|
|
|
|
|
21
|
22
|
|
100
|
|
|
109
|
$opts ||= { }; |
22
|
|
|
|
|
|
|
|
23
|
22
|
50
|
|
|
|
88
|
croak '$vars must be a hash reference!' unless ref $vars eq 'HASH'; |
24
|
22
|
50
|
|
|
|
68
|
croak '$rules must be an array reference!' unless ref $rules eq 'ARRAY'; |
25
|
|
|
|
|
|
|
|
26
|
22
|
|
|
|
|
95
|
my $obj = { |
27
|
|
|
|
|
|
|
raw_vars => $vars, |
28
|
|
|
|
|
|
|
raw_rules => $rules, |
29
|
|
|
|
|
|
|
opts => $opts, |
30
|
|
|
|
|
|
|
}; |
31
|
|
|
|
|
|
|
|
32
|
22
|
|
|
|
|
66
|
$obj->{vars} = compile_vars($vars, $opts); |
33
|
22
|
100
|
|
|
|
69
|
if($opts->{longVars}) { |
34
|
6
|
|
|
|
|
47
|
$obj->{raw_vars}->{"\Q<$_>\E"} = $vars->{$_} for keys %$vars; |
35
|
|
|
|
|
|
|
} |
36
|
22
|
|
|
|
|
181
|
( $obj->{rules}, |
37
|
|
|
|
|
|
|
$obj->{code} ) = compile_rules($rules, $obj->{vars}, $obj->{raw_vars}, $opts); |
38
|
|
|
|
|
|
|
|
39
|
22
|
|
|
|
|
112
|
bless $obj, $class; |
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
sub change { |
43
|
22
|
|
|
22
|
1
|
14483
|
my($self, $words) = @_; |
44
|
|
|
|
|
|
|
|
45
|
22
|
50
|
|
|
|
69
|
$words = [] unless defined $words; |
46
|
|
|
|
|
|
|
|
47
|
22
|
50
|
|
|
|
682
|
croak("change needs an array reference") unless ref($words) eq 'ARRAY'; |
48
|
|
|
|
|
|
|
|
49
|
22
|
|
|
|
|
48
|
foreach my $word (@$words) { |
50
|
54
|
|
|
|
|
77
|
my $origword = $word; |
51
|
54
|
|
|
|
|
64
|
my @rules; |
52
|
|
|
|
|
|
|
my $ruleout; |
53
|
54
|
|
|
|
|
65
|
foreach my $rule (@{$self->rules}) { |
|
54
|
|
|
|
|
131
|
|
54
|
168
|
|
|
|
|
5064
|
($word, $ruleout) = $rule->($word); |
55
|
168
|
100
|
|
|
|
573
|
push @rules, $ruleout if defined $ruleout; |
56
|
|
|
|
|
|
|
} |
57
|
|
|
|
|
|
|
|
58
|
54
|
|
|
|
|
321
|
$word = { orig => $origword, word => $word, rules => \@rules }; |
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
|
61
|
22
|
|
|
|
|
87
|
$words; |
62
|
|
|
|
|
|
|
} |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# Private methods |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
sub compile_rules ($$$$) { |
68
|
22
|
|
|
22
|
0
|
49
|
my($rules, $vars, $varstring, $opt) = @_; |
69
|
|
|
|
|
|
|
|
70
|
22
|
50
|
|
|
|
67
|
croak "rules not an array reference" unless ref $rules eq 'ARRAY'; |
71
|
22
|
50
|
|
|
|
65
|
croak "vars not a hash reference" unless ref $vars eq 'HASH'; |
72
|
22
|
50
|
|
|
|
55
|
croak "varstring not a hash reference" unless ref $varstring eq 'HASH'; |
73
|
22
|
50
|
|
|
|
56
|
croak "opt not a hash reference" unless ref $opt eq 'HASH'; |
74
|
|
|
|
|
|
|
|
75
|
22
|
|
|
|
|
35
|
my @compiledrules; |
76
|
|
|
|
|
|
|
my %code; |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
# Rules: change from a sound (one or more letters) or a category |
79
|
|
|
|
|
|
|
# to another sound or category, in a certain environment. |
80
|
|
|
|
|
|
|
# Categories may only be one letter long and are usually uppercase. |
81
|
|
|
|
|
|
|
# Environments must contain a _ symbol to show where the replacement |
82
|
|
|
|
|
|
|
# takes place; it may also contain letters, categories, and the special |
83
|
|
|
|
|
|
|
# symbols ( ) (to enclose optional parts) and # (beginning or end of |
84
|
|
|
|
|
|
|
# word). |
85
|
|
|
|
|
|
|
# Rules can only change sounds to sounds, and categories to categories. |
86
|
|
|
|
|
|
|
# If a category is to be changed to another category, they should be |
87
|
|
|
|
|
|
|
# the same length. Otherwise the second category will have its laster |
88
|
|
|
|
|
|
|
# letter repeated until it has the same length as the first (if it is |
89
|
|
|
|
|
|
|
# shorter), or characters in the second category that don't match |
90
|
|
|
|
|
|
|
# characters in the first will not be produced by such a range. Note |
91
|
|
|
|
|
|
|
# that this is an artefact of the use of tr/// and is not guaranteed |
92
|
|
|
|
|
|
|
# behaviour. |
93
|
|
|
|
|
|
|
# Don't use regex metacharacters (except for the parentheses which |
94
|
|
|
|
|
|
|
# show optional elements) in the environment or in names of categories |
95
|
|
|
|
|
|
|
# or sounds. |
96
|
|
|
|
|
|
|
# These include: . * + ? ^ $ [ ] |
97
|
|
|
|
|
|
|
|
98
|
22
|
|
|
|
|
50
|
foreach my $rule (@$rules) { |
99
|
40
|
50
|
|
|
|
283
|
if( $rule =~ m{ |
100
|
|
|
|
|
|
|
^ |
101
|
|
|
|
|
|
|
( [^/]+ ) # "change from" to $1 |
102
|
|
|
|
|
|
|
/ # slash |
103
|
|
|
|
|
|
|
( [^/]* ) # "change to" to $2 (may be blank) |
104
|
|
|
|
|
|
|
/ # slash |
105
|
|
|
|
|
|
|
( .+ ) # "environment" to $3 (may not be blank) |
106
|
|
|
|
|
|
|
$ |
107
|
|
|
|
|
|
|
}x ) |
108
|
|
|
|
|
|
|
{ |
109
|
40
|
|
|
|
|
186
|
my($from, $to, $env) = ($1, $2, $3); |
110
|
40
|
|
|
|
|
76
|
my($subfrom, $subto) = ('', ''); |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
my $option = sub { |
113
|
80
|
|
|
80
|
|
128
|
my $word = shift; |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
# Change parentheses round one element to a question |
116
|
|
|
|
|
|
|
# mark following it, ... |
117
|
80
|
|
|
|
|
122
|
$word =~ s{ |
118
|
|
|
|
|
|
|
\( # open parenthesis |
119
|
|
|
|
|
|
|
(.) # one character, to $1 |
120
|
|
|
|
|
|
|
\) # close parenthesis |
121
|
|
|
|
|
|
|
}{ |
122
|
0
|
|
|
|
|
0
|
$1 . '?' |
123
|
|
|
|
|
|
|
}gex; |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
# ... and parentheses around multiple elements by |
126
|
|
|
|
|
|
|
# non-capturing parentheses followed by a question mark |
127
|
|
|
|
|
|
|
# (but nested parentheses are not allowed!) |
128
|
80
|
|
|
|
|
97
|
$word =~ s{ |
129
|
|
|
|
|
|
|
\( # open parenthesis |
130
|
|
|
|
|
|
|
( [^()]+ ) # one or more non-parenthesis characters, |
131
|
|
|
|
|
|
|
# to $1 |
132
|
|
|
|
|
|
|
\) # close parenthesis |
133
|
|
|
|
|
|
|
}{ |
134
|
0
|
|
|
|
|
0
|
'(?:' . $1 . ')?' |
135
|
|
|
|
|
|
|
}gex; |
136
|
|
|
|
|
|
|
|
137
|
80
|
|
|
|
|
189
|
$word; |
138
|
40
|
|
|
|
|
260
|
}; |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
# Escape dollar signs and at signs in $env, which would |
141
|
|
|
|
|
|
|
# otherwise try to interpolate a variable into the regular |
142
|
|
|
|
|
|
|
# expression |
143
|
40
|
|
|
|
|
86
|
$env =~ s/([\$\@])/\\$1/g; |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
# Get the bits before and after the underscore |
146
|
|
|
|
|
|
|
# and put them in capturing parentheses in $subfrom |
147
|
40
|
50
|
|
|
|
195
|
if($env =~ /^(#?)([^_#]*)(_)([^_#]*)(#?)$/) { |
148
|
|
|
|
|
|
|
# leading # |
149
|
40
|
100
|
66
|
|
|
226
|
$subfrom .= '^' if defined $1 && length $1; |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
# preceding stuff, so we don't need $` |
152
|
|
|
|
|
|
|
# non-greedy |
153
|
|
|
|
|
|
|
# If match anchored at beginning, don't add this |
154
|
40
|
100
|
66
|
|
|
214
|
if(defined $1 && length $1) { |
155
|
1
|
|
|
|
|
2
|
$subfrom .= '()'; |
156
|
|
|
|
|
|
|
} else { |
157
|
39
|
|
|
|
|
66
|
$subfrom .= '(.*?)'; |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
# pre-environment |
161
|
40
|
|
|
|
|
146
|
$subfrom .= '(' . $option->(quotemeta $2) . ')'; |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
# underscore |
164
|
40
|
|
|
|
|
639
|
$subfrom .= "(\Q$from\E)"; |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
# post-environment |
167
|
40
|
|
|
|
|
112
|
$subfrom .= '(' . $option->(quotemeta $4) . ')'; |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
# trailing # |
170
|
40
|
100
|
66
|
|
|
229
|
$subfrom .= '$' if defined $5 && length $5; |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
# Now expand categories |
174
|
40
|
100
|
|
|
|
120
|
if($opt->{longVars}) { |
175
|
9
|
50
|
|
|
|
53
|
$subfrom =~ s{(\\<[^>]+\\>)}{$vars->{$1} || $1}eg; |
|
11
|
|
|
|
|
64
|
|
176
|
|
|
|
|
|
|
} else { |
177
|
31
|
100
|
|
|
|
153
|
$subfrom =~ s{(\\?)(.)}{$vars->{$2} || $1 . $2}eg; |
|
396
|
|
|
|
|
2036
|
|
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
# Show where the rule matches |
181
|
40
|
|
|
|
|
135
|
$subto .= qq(\$rule = "\Q$from\E->\Q$to\E /\Q$env\E applies to \$word at " . (length(\$1)+1) . "\\n"; ); |
182
|
|
|
|
|
|
|
|
183
|
40
|
|
|
|
|
55
|
$subto .= '$1 . $2 . '; |
184
|
40
|
100
|
66
|
|
|
151
|
$subto .= ($vars->{quotemeta $from} |
185
|
|
|
|
|
|
|
? "do { my \$char = \$3; \$char =~ tr{$varstring->{quotemeta $from}}{" . ($varstring->{quotemeta $to} || $to) . "}; \$char }" |
186
|
|
|
|
|
|
|
: "q{" . $to . "}"); |
187
|
40
|
|
|
|
|
53
|
$subto .= ' . $4'; |
188
|
|
|
|
|
|
|
|
189
|
40
|
|
|
|
|
649
|
if(PRINT_RULES) { |
190
|
|
|
|
|
|
|
print "[", $#compiledrules + 1, "] $rule --> s{$subfrom}{$subto}eg\n"; |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
|
193
|
40
|
|
|
|
|
246
|
my $code = <<"EOF"; |
194
|
|
|
|
|
|
|
sub { |
195
|
|
|
|
|
|
|
my \$word = shift; |
196
|
|
|
|
|
|
|
my \$rule; |
197
|
|
|
|
|
|
|
my \$orig; |
198
|
|
|
|
|
|
|
# print qq(Working on '\$word'; \Q$from\E->\Q$to\E /\Q$env\E; from is '\Q$subfrom\E and to is '\Q$subto\E'\\n); |
199
|
|
|
|
|
|
|
1 while ((\$orig = \$word) =~ s{$subfrom}{$subto}e) && (\$orig ne \$word) && (\$word = \$orig); |
200
|
|
|
|
|
|
|
return ( \$word, \$rule ); |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
EOF |
203
|
|
|
|
|
|
|
|
204
|
40
|
|
|
|
|
8649
|
push @compiledrules, eval $code; |
205
|
40
|
50
|
|
|
|
131
|
croak "Problem with '$code'" unless $compiledrules[-1]; |
206
|
40
|
|
|
|
|
310
|
$code{$compiledrules[-1]} = $code; |
207
|
|
|
|
|
|
|
} |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
|
210
|
22
|
|
|
|
|
140
|
( \@compiledrules, \%code ); |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
sub compile_vars ($$) { |
214
|
22
|
|
|
22
|
0
|
44
|
my($vars, $opt) = @_; |
215
|
|
|
|
|
|
|
|
216
|
22
|
50
|
|
|
|
73
|
croak "\$vars is not a hash reference" unless ref $vars eq 'HASH'; |
217
|
22
|
50
|
|
|
|
71
|
croak "\$opt is not a hash reference" unless ref $opt eq 'HASH'; |
218
|
|
|
|
|
|
|
|
219
|
22
|
|
|
|
|
28
|
my %compiledvars; |
220
|
|
|
|
|
|
|
|
221
|
22
|
|
|
|
|
94
|
foreach my $var (keys %$vars) { |
222
|
26
|
|
|
|
|
46
|
my $list = $vars->{$var}; |
223
|
|
|
|
|
|
|
# Escape at signs and dollars in the list |
224
|
26
|
|
|
|
|
49
|
$list =~ s/([\$\@])/\\$1/g; |
225
|
|
|
|
|
|
|
|
226
|
26
|
100
|
|
|
|
62
|
if($opt->{longVars}) { |
227
|
12
|
|
|
|
|
226
|
$compiledvars{"\Q<$var>\E"} = qr/[$list]/; |
228
|
12
|
|
|
|
|
34
|
print qq[($var => $list // $compiledvars{"\Q<$var>\E"})\n] if PRINT_RULES; |
229
|
|
|
|
|
|
|
} else { |
230
|
14
|
|
|
|
|
249
|
$compiledvars{$var} = qr/[$list]/; |
231
|
14
|
|
|
|
|
42
|
print "($var => $list // $compiledvars{$var})\n" if PRINT_RULES; |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
|
235
|
22
|
|
|
|
|
84
|
\%compiledvars; |
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
sub rules { |
239
|
54
|
|
|
54
|
1
|
79
|
my($self) = @_; |
240
|
|
|
|
|
|
|
|
241
|
54
|
|
|
|
|
181
|
$self->{rules}; |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
sub vars { |
245
|
0
|
|
|
0
|
0
|
|
my($self) = @_; |
246
|
|
|
|
|
|
|
|
247
|
0
|
|
|
|
|
|
$self->{vars}; |
248
|
|
|
|
|
|
|
} |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
sub varstring { |
251
|
0
|
|
|
0
|
0
|
|
my($self) = @_; |
252
|
|
|
|
|
|
|
|
253
|
0
|
|
|
|
|
|
$self->{raw_vars}; |
254
|
|
|
|
|
|
|
} |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
sub code { |
257
|
0
|
|
|
0
|
0
|
|
my($self, $token, $code) = @_; |
258
|
|
|
|
|
|
|
|
259
|
0
|
0
|
|
|
|
|
$self->{code}->{$token} = $code if $code; |
260
|
|
|
|
|
|
|
|
261
|
0
|
|
|
|
|
|
$self->{code}->{$token}; |
262
|
|
|
|
|
|
|
} |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
1; |
266
|
|
|
|
|
|
|
__END__ |