line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package WordLists::Inflect::Simple;
|
2
|
2
|
|
|
2
|
|
3028
|
use utf8;
|
|
2
|
|
|
|
|
9
|
|
|
2
|
|
|
|
|
10
|
|
3
|
2
|
|
|
2
|
|
51
|
use strict;
|
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
51
|
|
4
|
2
|
|
|
2
|
|
10
|
use warnings;
|
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
50
|
|
5
|
2
|
|
|
2
|
|
558
|
use WordLists::Base;
|
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
6107
|
|
6
|
|
|
|
|
|
|
our $VERSION = $WordLists::Base::VERSION;
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
our %sTypes = (
|
9
|
|
|
|
|
|
|
n=>
|
10
|
|
|
|
|
|
|
[qw(
|
11
|
|
|
|
|
|
|
singular
|
12
|
|
|
|
|
|
|
plural
|
13
|
|
|
|
|
|
|
)],
|
14
|
|
|
|
|
|
|
v=>
|
15
|
|
|
|
|
|
|
[qw(
|
16
|
|
|
|
|
|
|
present_1st_person
|
17
|
|
|
|
|
|
|
present_2nd_person
|
18
|
|
|
|
|
|
|
present_3rd_person
|
19
|
|
|
|
|
|
|
present_1st_person_plural
|
20
|
|
|
|
|
|
|
present_2nd_person_plural
|
21
|
|
|
|
|
|
|
present_3rd_person_plural
|
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
present_participle
|
24
|
|
|
|
|
|
|
past_tense
|
25
|
|
|
|
|
|
|
past_participle
|
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
infinitive
|
28
|
|
|
|
|
|
|
)],
|
29
|
|
|
|
|
|
|
adj=>
|
30
|
|
|
|
|
|
|
[qw(
|
31
|
|
|
|
|
|
|
comparative
|
32
|
|
|
|
|
|
|
superlative
|
33
|
|
|
|
|
|
|
)],
|
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
); # but everything should start as infinitive!
|
36
|
|
|
|
|
|
|
our $VOWELS = "aeiou";
|
37
|
|
|
|
|
|
|
our $iDEBUG = 5;
|
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
sub new
|
40
|
|
|
|
|
|
|
{
|
41
|
1
|
|
|
1
|
0
|
80
|
my $class = shift;
|
42
|
1
|
|
|
|
|
1
|
my $args = shift;
|
43
|
1
|
50
|
|
|
|
5
|
$args = {} unless defined $args;
|
44
|
1
|
|
|
|
|
4
|
my $self =
|
45
|
|
|
|
|
|
|
{
|
46
|
|
|
|
|
|
|
special_case => [qw(man woman person child e y s of in)], # general O' -in-law
|
47
|
|
|
|
|
|
|
irregular => {},
|
48
|
1
|
|
|
|
|
3
|
%{$args}
|
49
|
|
|
|
|
|
|
};
|
50
|
1
|
|
|
|
|
6
|
bless $self, $class;
|
51
|
|
|
|
|
|
|
}
|
52
|
|
|
|
|
|
|
sub possible_special_cases
|
53
|
|
|
|
|
|
|
{
|
54
|
0
|
|
|
0
|
0
|
0
|
return qw(man woman person child e y s of in general O' -in-law);
|
55
|
|
|
|
|
|
|
}
|
56
|
|
|
|
|
|
|
sub special_cases
|
57
|
|
|
|
|
|
|
{
|
58
|
0
|
|
|
0
|
0
|
0
|
my $self = shift;
|
59
|
0
|
|
|
|
|
0
|
my $new = shift;
|
60
|
0
|
0
|
0
|
|
|
0
|
if (defined $new and ref $new eq ref [])
|
61
|
|
|
|
|
|
|
{
|
62
|
0
|
|
|
|
|
0
|
$self->{'special_case'} = $new;
|
63
|
|
|
|
|
|
|
}
|
64
|
0
|
|
|
|
|
0
|
return @{$self->{'special_case'}}
|
|
0
|
|
|
|
|
0
|
|
65
|
|
|
|
|
|
|
}
|
66
|
|
|
|
|
|
|
sub is_special_cased
|
67
|
|
|
|
|
|
|
{
|
68
|
198
|
|
|
198
|
0
|
214
|
my ($self , $sCase ) = @_;
|
69
|
198
|
|
|
|
|
164
|
return grep {$_ eq $sCase} @{$self->{'special_case'}};
|
|
1813
|
|
|
|
|
2811
|
|
|
198
|
|
|
|
|
298
|
|
70
|
|
|
|
|
|
|
}
|
71
|
|
|
|
|
|
|
sub add_special_case
|
72
|
|
|
|
|
|
|
{
|
73
|
1
|
|
|
1
|
0
|
309
|
my ($self , $sCase ) = @_;
|
74
|
1
|
|
|
|
|
2
|
$self->{'special_case'} = [ $sCase, @{$self->{'special_case'}} ];
|
|
1
|
|
|
|
|
6
|
|
75
|
|
|
|
|
|
|
}
|
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
sub remove_special_case
|
78
|
|
|
|
|
|
|
{
|
79
|
0
|
|
|
0
|
0
|
0
|
my ($self , $sCase ) = @_;
|
80
|
0
|
|
|
|
|
0
|
$self->{'special_case'} = [ grep {$_ ne $sCase} @{$self->{'special_case'}} ];
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
81
|
|
|
|
|
|
|
}
|
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
sub pos_from_type
|
84
|
|
|
|
|
|
|
{
|
85
|
1
|
|
|
1
|
0
|
2
|
my ($self, $sType) = @_;
|
86
|
1
|
|
|
|
|
3
|
foreach my $sPos (keys %sTypes)
|
87
|
|
|
|
|
|
|
{
|
88
|
1
|
50
|
|
|
|
2
|
if (grep {$_ eq $sType} @{$sTypes{$sPos}})
|
|
10
|
|
|
|
|
13
|
|
|
1
|
|
|
|
|
2
|
|
89
|
|
|
|
|
|
|
{
|
90
|
1
|
|
|
|
|
4
|
return $sPos;
|
91
|
|
|
|
|
|
|
}
|
92
|
|
|
|
|
|
|
}
|
93
|
0
|
|
|
|
|
0
|
return '';
|
94
|
|
|
|
|
|
|
}
|
95
|
|
|
|
|
|
|
sub add_irregular_word
|
96
|
|
|
|
|
|
|
{
|
97
|
2
|
|
|
2
|
0
|
4
|
my ($self, $args) = @_;
|
98
|
2
|
|
|
|
|
3
|
my $sW = $args->{'w'};
|
99
|
2
|
|
|
|
|
2
|
foreach my $key (keys %{$args})
|
|
2
|
|
|
|
|
6
|
|
100
|
|
|
|
|
|
|
{
|
101
|
4
|
100
|
|
|
|
6
|
if (grep {$_ eq $key} keys %sTypes) # key is a pos
|
|
12
|
100
|
|
|
|
23
|
|
102
|
42
|
|
|
|
|
54
|
{
|
103
|
1
|
|
|
|
|
1
|
foreach my $sType (keys %{$args->{$key}})
|
|
1
|
|
|
|
|
3
|
|
104
|
|
|
|
|
|
|
{
|
105
|
1
|
|
|
|
|
6
|
$self->add_irregular_inflection({w=>$sW, 'pos'=>$key, type=>$sType, inflection=>$args->{$key}{$sType}});
|
106
|
|
|
|
|
|
|
}
|
107
|
|
|
|
|
|
|
}
|
108
|
9
|
|
|
|
|
9
|
elsif (grep { grep{$_ eq $key} @{$sTypes{$_}} } keys %sTypes)
|
|
9
|
|
|
|
|
15
|
|
109
|
|
|
|
|
|
|
{
|
110
|
1
|
|
|
|
|
7
|
$self->add_irregular_inflection({w=>$sW, type=>$key, inflection=>$args->{$key}});
|
111
|
|
|
|
|
|
|
}
|
112
|
|
|
|
|
|
|
}
|
113
|
2
|
|
|
|
|
7
|
return all_irregular_inflections({w=>$sW});
|
114
|
|
|
|
|
|
|
}
|
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
sub add_irregular_inflection
|
117
|
|
|
|
|
|
|
{
|
118
|
2
|
|
|
2
|
0
|
3
|
my ($self, $args) = @_;
|
119
|
2
|
|
|
|
|
3
|
my $sW = $args->{'w'};
|
120
|
2
|
|
|
|
|
4
|
my $sPos = $args->{'pos'};
|
121
|
2
|
|
|
|
|
4
|
my $sType = $args->{'type'};
|
122
|
2
|
|
|
|
|
26
|
my $sInf = $args->{'inflection'};
|
123
|
2
|
|
66
|
|
|
8
|
$sPos ||= $self->pos_from_type($sType);
|
124
|
2
|
50
|
|
|
|
5
|
if (!$sPos)
|
125
|
|
|
|
|
|
|
{
|
126
|
0
|
|
|
|
|
0
|
warn "Pos required! ($sW, ?, $sType)";
|
127
|
0
|
|
|
|
|
0
|
return undef;
|
128
|
|
|
|
|
|
|
}
|
129
|
2
|
|
|
|
|
3
|
push (@{$self->{irregular}{$sW}{$sPos}{$sType}}, $sInf);
|
|
2
|
|
|
|
|
11
|
|
130
|
|
|
|
|
|
|
}
|
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
sub regular_inflection
|
133
|
|
|
|
|
|
|
{
|
134
|
50
|
|
|
50
|
0
|
9404
|
my ($self, $args) = @_;
|
135
|
50
|
|
|
|
|
62
|
my $sW = $args->{'w'};
|
136
|
50
|
|
|
|
|
62
|
my $sPos = $args->{'pos'};
|
137
|
50
|
|
|
|
|
47
|
my $sType = $args->{'type'};
|
138
|
50
|
|
|
|
|
58
|
my $sInf = $sW;
|
139
|
50
|
|
|
|
|
320
|
my $three_syllables = qr/[$VOWELS]+[^$VOWELS]+[$VOWELS]+[^$VOWELS]+(?:y|[$VOWELS]+[^$VOWELS]+|[$VOWELS])$/;
|
140
|
50
|
100
|
100
|
|
|
566
|
if (
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
141
|
|
|
|
|
|
|
($sPos eq 'n' and $sType eq 'plural')
|
142
|
|
|
|
|
|
|
or
|
143
|
|
|
|
|
|
|
($sPos eq 'v' and $sType eq 'present_3rd_person')
|
144
|
|
|
|
|
|
|
)
|
145
|
|
|
|
|
|
|
{
|
146
|
14
|
|
|
|
|
18
|
$sInf = $sW.'s';
|
147
|
14
|
50
|
|
|
|
23
|
$sInf =~ s/siss$/eses/ if $self->is_special_cased('sis'); #thesis => theses
|
148
|
14
|
50
|
|
|
|
24
|
$sInf =~ s/(s|x|sh|ch|z)s$/$1es/ if $self->is_special_cased('s');
|
149
|
14
|
50
|
|
|
|
28
|
$sInf =~ s/([^$VOWELS])ys$/$1ies/ if $self->is_special_cased('y');
|
150
|
14
|
50
|
33
|
|
|
54
|
$sInf =~ s/womans$/women/ if $self->is_special_cased('woman') and $sInf !~ /^[[:upper:]]/;
|
151
|
14
|
50
|
|
|
|
25
|
$sInf =~ s/Womans$/Women/ if $self->is_special_cased('woman');
|
152
|
14
|
50
|
33
|
|
|
27
|
$sInf =~ s/mans$/men/ if $self->is_special_cased('man') and $sInf !~ /^[[:upper:]]/; # German Germans
|
153
|
14
|
50
|
|
|
|
26
|
$sInf =~ s/Mans$/Men/ if $self->is_special_cased('man'); # Man O'War
|
154
|
14
|
50
|
33
|
|
|
26
|
$sInf =~ s/persons$/people/ if $self->is_special_cased('person') and $sInf !~ /^[[:upper:]]/;
|
155
|
14
|
50
|
|
|
|
25
|
$sInf =~ s/^Persons$/People/ if $self->is_special_cased('person'); # Person of Colour
|
156
|
14
|
50
|
33
|
|
|
26
|
$sInf =~ s/childs$/children/ if $self->is_special_cased('child') and $sInf !~ /^[[:upper:]]/; # Rothschild Rothschilds
|
157
|
14
|
50
|
|
|
|
25
|
$sInf =~ s/^Childs$/Children/ if $self->is_special_cased('child'); # Child of the 60s
|
158
|
|
|
|
|
|
|
}
|
159
|
|
|
|
|
|
|
elsif (
|
160
|
|
|
|
|
|
|
($sPos eq 'v' and $sType eq 'past_tense')
|
161
|
|
|
|
|
|
|
or
|
162
|
|
|
|
|
|
|
($sPos eq 'v' and $sType eq 'past_participle')
|
163
|
|
|
|
|
|
|
)
|
164
|
|
|
|
|
|
|
{
|
165
|
10
|
|
|
|
|
16
|
$sInf = $sW.'ed';
|
166
|
10
|
50
|
|
|
|
17
|
$sInf =~ s/eed$/ed/ if $self->is_special_cased('e');
|
167
|
10
|
50
|
|
|
|
22
|
$sInf =~ s/([^$VOWELS])yed$/$1ied/ if $self->is_special_cased('y');
|
168
|
|
|
|
|
|
|
}
|
169
|
|
|
|
|
|
|
elsif (
|
170
|
|
|
|
|
|
|
($sPos eq 'v' and $sType eq 'present_participle')
|
171
|
|
|
|
|
|
|
)
|
172
|
|
|
|
|
|
|
{
|
173
|
11
|
|
|
|
|
15
|
$sInf = $sW.'ing';
|
174
|
|
|
|
|
|
|
#$sInf =~ s/([^$VOWELS])eing$/$1ing/ if $self->is_special_cased('e');
|
175
|
11
|
50
|
|
|
|
22
|
$sInf =~ s/([^aeio])eing$/$1ing/ if $self->is_special_cased('e');
|
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
}
|
178
|
|
|
|
|
|
|
elsif (
|
179
|
|
|
|
|
|
|
($sPos eq 'adj' and $sType eq 'comparative')
|
180
|
|
|
|
|
|
|
)
|
181
|
|
|
|
|
|
|
{
|
182
|
|
|
|
|
|
|
|
183
|
3
|
50
|
|
|
|
13
|
if ($sW =~ /$three_syllables/)
|
184
|
|
|
|
|
|
|
{
|
185
|
0
|
|
|
|
|
0
|
return "more $sW";
|
186
|
|
|
|
|
|
|
}
|
187
|
|
|
|
|
|
|
else
|
188
|
|
|
|
|
|
|
{
|
189
|
3
|
|
|
|
|
5
|
$sInf = $sW.'er';
|
190
|
3
|
50
|
|
|
|
9
|
$sInf =~ s/eer$/er/ if $self->is_special_cased('e');
|
191
|
3
|
50
|
|
|
|
8
|
$sInf =~ s/([^$VOWELS])yer$/$1ier/ if $self->is_special_cased('y');
|
192
|
|
|
|
|
|
|
}
|
193
|
|
|
|
|
|
|
}
|
194
|
|
|
|
|
|
|
elsif (
|
195
|
|
|
|
|
|
|
($sPos eq 'adj' and $sType eq 'superlative')
|
196
|
|
|
|
|
|
|
)
|
197
|
|
|
|
|
|
|
{
|
198
|
4
|
100
|
|
|
|
35
|
if ($sW =~ /$three_syllables/)
|
199
|
|
|
|
|
|
|
{
|
200
|
1
|
|
|
|
|
8
|
return "most $sW";
|
201
|
|
|
|
|
|
|
}
|
202
|
|
|
|
|
|
|
else
|
203
|
|
|
|
|
|
|
{
|
204
|
3
|
|
|
|
|
6
|
$sInf = $sW.'est';
|
205
|
3
|
50
|
|
|
|
6
|
$sInf =~ s/([^$VOWELS])yest$/$1iest/ if $self->is_special_cased('y');
|
206
|
3
|
50
|
|
|
|
8
|
$sInf =~ s/eest$/est/ if $self->is_special_cased('e');
|
207
|
|
|
|
|
|
|
}
|
208
|
|
|
|
|
|
|
}
|
209
|
49
|
|
|
|
|
299
|
return $sInf;
|
210
|
|
|
|
|
|
|
}
|
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
sub get_irregular_inflections
|
213
|
|
|
|
|
|
|
{
|
214
|
22
|
|
|
22
|
0
|
22
|
my ($self, $args) = @_;
|
215
|
22
|
|
|
|
|
26
|
my $sW = $args->{'w'};
|
216
|
22
|
|
|
|
|
26
|
my $sPos = $args->{'pos'};
|
217
|
22
|
|
|
|
|
24
|
my $sType = $args->{'type'};
|
218
|
22
|
|
|
|
|
24
|
my $sInf = $args->{'inflection'};
|
219
|
22
|
|
33
|
|
|
35
|
$sPos ||= pos_from_type($sType);
|
220
|
22
|
50
|
|
|
|
33
|
if (!$sPos)
|
221
|
|
|
|
|
|
|
{
|
222
|
0
|
|
|
|
|
0
|
warn "Pos required! ($sW, ?, $sType)";
|
223
|
0
|
|
|
|
|
0
|
return undef;
|
224
|
|
|
|
|
|
|
}
|
225
|
22
|
100
|
|
|
|
62
|
return @{$self->{'irregular'}{$sW}{$sPos}{$sType}} if defined $self->{'irregular'}{$sW}{$sPos}{$sType};
|
|
2
|
|
|
|
|
8
|
|
226
|
20
|
|
|
|
|
48
|
return undef;
|
227
|
|
|
|
|
|
|
}
|
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
sub irregular_inflection
|
230
|
|
|
|
|
|
|
{
|
231
|
22
|
|
|
22
|
0
|
24
|
my ($self, $args) = @_;
|
232
|
22
|
|
|
|
|
18
|
my $sInf = ${[$self->get_irregular_inflections($args)]}[0];
|
|
22
|
|
|
|
|
36
|
|
233
|
22
|
100
|
|
|
|
46
|
unless (defined $sInf)
|
234
|
|
|
|
|
|
|
{
|
235
|
20
|
|
|
|
|
34
|
$sInf = $self->regular_inflection($args);
|
236
|
|
|
|
|
|
|
}
|
237
|
22
|
|
|
|
|
46
|
return $sInf;
|
238
|
|
|
|
|
|
|
}
|
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
sub phrase_inflection
|
241
|
|
|
|
|
|
|
{
|
242
|
24
|
|
|
24
|
0
|
2498
|
my ($self, $args) = @_;
|
243
|
24
|
|
|
|
|
29
|
my $sPhrase = $args->{'w'};
|
244
|
24
|
|
|
|
|
28
|
my $sPos = $args->{'pos'};
|
245
|
|
|
|
|
|
|
|
246
|
24
|
|
100
|
|
|
71
|
$args->{'inflect'} ||= \&WordLists::Inflect::Simple::irregular_inflection;
|
247
|
|
|
|
|
|
|
|
248
|
24
|
|
|
|
|
71
|
my @sTokens = split(/\s/, $sPhrase); # Even in "top-up card" and "Man O'War" we never want to split by /-/ or /'/. This is only an issue in irregulars anyway.
|
249
|
|
|
|
|
|
|
|
250
|
24
|
100
|
66
|
|
|
328
|
if ($sPos eq 'v')
|
|
|
50
|
33
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
|
|
50
|
66
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
|
|
100
|
33
|
|
|
|
|
|
|
50
|
66
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
|
|
50
|
66
|
|
|
|
|
|
|
100
|
33
|
|
|
|
|
|
|
100
|
33
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
|
50
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
251
|
|
|
|
|
|
|
{
|
252
|
14
|
|
|
|
|
19
|
$args->{'w'} = $sTokens[0];
|
253
|
14
|
|
|
|
|
14
|
$sTokens[0] = &{$args->{'inflect'}}($self, $args);
|
|
14
|
|
|
|
|
23
|
|
254
|
|
|
|
|
|
|
}
|
255
|
|
|
|
|
|
|
elsif ($sPos eq 'n' and $sTokens[-1]=~/^O['’]/ and defined $sTokens[-2] and $self->is_special_cased("O'"))
|
256
|
|
|
|
|
|
|
{
|
257
|
0
|
|
|
|
|
0
|
$args->{'w'} = $sTokens[-2];
|
258
|
0
|
|
|
|
|
0
|
$sTokens[-2] = &{$args->{'inflect'}}($self, $args);
|
|
0
|
|
|
|
|
0
|
|
259
|
|
|
|
|
|
|
}
|
260
|
|
|
|
|
|
|
elsif ($sPos eq 'n' and $sTokens[-1]=~/^[Gg]eneral/ and defined $sTokens[-2] and $self->is_special_cased('general'))
|
261
|
|
|
|
|
|
|
{
|
262
|
0
|
|
|
|
|
0
|
$args->{'w'} = $sTokens[-2];
|
263
|
0
|
|
|
|
|
0
|
$sTokens[-2] = &{$args->{'inflect'}}($self, $args);
|
|
0
|
|
|
|
|
0
|
|
264
|
|
|
|
|
|
|
}
|
265
|
|
|
|
|
|
|
elsif ($sPos eq 'n' and $sTokens[-1]=~/(.*)-in-law/ and $self->is_special_cased('-in-law'))
|
266
|
|
|
|
|
|
|
{
|
267
|
0
|
|
|
|
|
0
|
$args->{'w'} = $1;
|
268
|
0
|
|
|
|
|
0
|
$sTokens[-1] = &{$args->{'inflect'}}($self, $args).'-in-law';
|
|
0
|
|
|
|
|
0
|
|
269
|
|
|
|
|
|
|
}
|
270
|
|
|
|
|
|
|
elsif ($sPos eq 'n' and $sTokens[-1] eq 'law' and defined $sTokens[-2] and $sTokens[-2] eq 'in' and defined $sTokens[-3] and $self->is_special_cased('-in-law'))
|
271
|
|
|
|
|
|
|
{
|
272
|
0
|
|
|
|
|
0
|
$args->{'w'} = $sTokens[-3];
|
273
|
0
|
|
|
|
|
0
|
$sTokens[-3] = &{$args->{'inflect'}}($self, $args);
|
|
0
|
|
|
|
|
0
|
|
274
|
|
|
|
|
|
|
}
|
275
|
|
|
|
|
|
|
elsif ($sPos eq 'n' and defined $sTokens[-3] and $sTokens[-2]=~/\bof\b/ and $self->is_special_cased('of'))
|
276
|
|
|
|
|
|
|
{
|
277
|
1
|
|
|
|
|
2
|
$args->{'w'} = $sTokens[-3];
|
278
|
1
|
|
|
|
|
2
|
$sTokens[-3] = &{$args->{'inflect'}}($self, $args);
|
|
1
|
|
|
|
|
2
|
|
279
|
|
|
|
|
|
|
}
|
280
|
|
|
|
|
|
|
elsif ($sPos eq 'n' and defined $sTokens[-4] and $sTokens[-3]=~/\bof\b/ and $sTokens[-2]=~/\bthe\b/ and $self->is_special_cased('of'))
|
281
|
|
|
|
|
|
|
{
|
282
|
0
|
|
|
|
|
0
|
$args->{'w'} = $sTokens[-4];
|
283
|
0
|
|
|
|
|
0
|
$sTokens[-4] = &{$args->{'inflect'}}($self, $args);
|
|
0
|
|
|
|
|
0
|
|
284
|
|
|
|
|
|
|
}
|
285
|
|
|
|
|
|
|
elsif ($sPos eq 'n' and defined $sTokens[-3] and $sTokens[-2]=~/\bin\b/ and $self->is_special_cased('in'))
|
286
|
|
|
|
|
|
|
{
|
287
|
0
|
|
|
|
|
0
|
$args->{'w'} = $sTokens[-3];
|
288
|
0
|
|
|
|
|
0
|
$sTokens[-3] = &{$args->{'inflect'}}($self, $args);
|
|
0
|
|
|
|
|
0
|
|
289
|
|
|
|
|
|
|
}
|
290
|
|
|
|
|
|
|
elsif ($sPos eq 'n' and defined $sTokens[-4] and $sTokens[-3]=~/\bin\b/ and $sTokens[-2]=~/\bthe\b/ and $self->is_special_cased('in'))
|
291
|
|
|
|
|
|
|
{
|
292
|
0
|
|
|
|
|
0
|
$args->{'w'} = $sTokens[-4];
|
293
|
0
|
|
|
|
|
0
|
$sTokens[-4] = &{$args->{'inflect'}}($self, $args);
|
|
0
|
|
|
|
|
0
|
|
294
|
|
|
|
|
|
|
}
|
295
|
|
|
|
|
|
|
elsif ($sPos eq 'n')
|
296
|
|
|
|
|
|
|
{
|
297
|
5
|
|
|
|
|
7
|
$args->{'w'} = $sTokens[-1];
|
298
|
5
|
|
|
|
|
7
|
$sTokens[-1] = &{$args->{'inflect'}}($self, $args);
|
|
5
|
|
|
|
|
12
|
|
299
|
|
|
|
|
|
|
}
|
300
|
|
|
|
|
|
|
elsif ($sPos eq 'adj' and $#sTokens==0)
|
301
|
|
|
|
|
|
|
{
|
302
|
2
|
|
|
|
|
4
|
$args->{'w'} = $sTokens[0];
|
303
|
2
|
|
|
|
|
3
|
$sTokens[0] = &{$args->{'inflect'}}($self, $args);
|
|
2
|
|
|
|
|
4
|
|
304
|
|
|
|
|
|
|
}
|
305
|
|
|
|
|
|
|
elsif ($sPos eq 'adj' and $args->{'type'} eq 'comparative')
|
306
|
|
|
|
|
|
|
{
|
307
|
1
|
|
|
|
|
3
|
unshift @sTokens, 'more';
|
308
|
1
|
|
|
|
|
9
|
return join (' ', @sTokens);
|
309
|
|
|
|
|
|
|
}
|
310
|
|
|
|
|
|
|
elsif ($sPos eq 'adj' and $args->{'type'} eq 'superlative')
|
311
|
|
|
|
|
|
|
{
|
312
|
1
|
|
|
|
|
3
|
unshift @sTokens, 'most';
|
313
|
1
|
|
|
|
|
7
|
return join (' ', @sTokens);
|
314
|
|
|
|
|
|
|
}
|
315
|
22
|
|
|
|
|
34
|
$args->{'w'} = $sPhrase;
|
316
|
22
|
|
|
|
|
115
|
return join (' ', @sTokens);
|
317
|
|
|
|
|
|
|
}
|
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
sub all_inflections
|
320
|
|
|
|
|
|
|
{
|
321
|
4
|
|
|
4
|
0
|
11
|
my ($self, $args) = @_;
|
322
|
4
|
|
|
|
|
5
|
my $result ={};
|
323
|
4
|
|
|
|
|
10
|
my $sPos = $args->{'pos'};
|
324
|
4
|
100
|
|
|
|
7
|
if (defined $sPos)
|
325
|
|
|
|
|
|
|
{
|
326
|
|
|
|
|
|
|
|
327
|
3
|
|
|
|
|
3
|
foreach my $sType (@{$sTypes{$sPos}})
|
|
3
|
|
|
|
|
8
|
|
328
|
|
|
|
|
|
|
{
|
329
|
14
|
|
|
|
|
20
|
$args->{'type'} = $sType;
|
330
|
14
|
|
|
|
|
25
|
$result->{$sPos}{$sType} = $self->phrase_inflection($args);
|
331
|
|
|
|
|
|
|
}
|
332
|
|
|
|
|
|
|
}
|
333
|
|
|
|
|
|
|
else
|
334
|
|
|
|
|
|
|
{
|
335
|
1
|
|
|
|
|
5
|
foreach (keys %sTypes)
|
336
|
|
|
|
|
|
|
{
|
337
|
3
|
|
|
|
|
8
|
$args->{'pos'} = $_;
|
338
|
3
|
|
|
|
|
4
|
$result->{$_}=${$self->all_inflections($args)}{$_};
|
|
3
|
|
|
|
|
9
|
|
339
|
|
|
|
|
|
|
}
|
340
|
|
|
|
|
|
|
}
|
341
|
4
|
|
|
|
|
28
|
return $result;
|
342
|
|
|
|
|
|
|
}
|
343
|
|
|
|
|
|
|
sub all_irregular_inflections
|
344
|
|
|
|
|
|
|
{
|
345
|
2
|
|
|
2
|
0
|
3
|
my ($self, $args) = @_;
|
346
|
2
|
|
|
|
|
2
|
my $result ={};
|
347
|
2
|
50
|
|
|
|
8
|
return $result unless defined $args->{'w'};
|
348
|
0
|
|
|
|
|
|
return $result = $self->{'irregular'}{$args->{'w'}};
|
349
|
|
|
|
|
|
|
}
|
350
|
|
|
|
|
|
|
1;
|
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
=pod
|
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
=head1 NAME
|
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
WordLists::Inflect::Simple
|
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
=head1 SYNOPSIS
|
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
$inflector = WordLists::Inflect::Simple->new;
|
361
|
|
|
|
|
|
|
$sPlural = $inflector->regular_inflection({w=>'sky', pos=>'n', type=>'plural'});
|
362
|
|
|
|
|
|
|
$inflector->add_special_case('general');
|
363
|
|
|
|
|
|
|
$sPlural = $inflector->phrase_inflection({w=>'Director General', pos=>'n', type=>'plural'});
|
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
=head1 DESCRIPTION
|
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
This module provides an object which can be used to generate regular and semi-regular English inflections.
|
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
By default, it comes with several defaults for semi-regular special cases - dealing with word-final 'e', 'y', and sibilants, dealing with words ending 'man'. This behaviour can be turned on and off.
|
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
It deliberately does not deal with irregular forms, even important ones like the verb 'be'. However, it does provide an interface for user-specified irregular inflections, and it is trivial to write a wrapper module (subclass) which loads a pre-written set of inflections.
|
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
It does not deal with semi-regular patterns which require knowledge of the behaviour of individual words - for example, there is no reliable way of inspecting 'abet' and discerning that the 't' must be doubled in the present participle. Similarly, there is no attempt made to identify Latin '-us/i' plurals, as this would require making exceptions for words like 'minibus', 'omnibus', and 'octopus'. These must be entered as irregular inflections.
|
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
=head2 Special Cases
|
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
=head3 e
|
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
Words ending in e, when given inflections like 'ed', 'er', 'est' do not get a second 'e', e.g. blue => bluer, not blue => blueer. Verbs ending in e also lose the e in the present participle, unless the e is preceded by [aeio] (e.g. argue => arguing but see => seeing).
|
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
=head3 y
|
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
Words ending in y, when given 's' inflections or 'e' inflections are subject to a conversion of the 'y' to 'i'/'ie', unless the 'y' is preceded by a vowel, e.g. sky => skies, but day => days.
|
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
=head3 s
|
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
Words ending in sibilants (s, x, sh, ch, z), when given 's' inflections gain an 'e', e.g. 'sash'=>'sashes'.
|
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
=head3 of
|
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
When a phrase of the form X of Y is inflected as a noun, it is X rather than Y which is inflected. This also applies to the pattern X of the Y.
|
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
=head3 in
|
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
When a phrase of the form X in Y is inflected as a noun, it is X rather than Y which is inflected. This also applies to the pattern X in the Y.
|
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
=head3 general
|
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
When a phrase of the form X General is inflected as a noun, it is X rather than General which is inflected.
|
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
=head3 O'
|
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
When a phrase of the form X O'Y is inflected as a noun, it is X rather than Y which is inflected.
|
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
=head3 man
|
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
Nouns ending in 'man' which do not begin with a capital are pluralised 'men' (postman => postmen but German => Germans).
|
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
=head3 woman
|
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
Nouns ending in 'woman' which do not begin with a capital are pluralised 'women'.
|
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
=head3 person
|
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
Nouns ending in 'person' which do not begin with a capital are pluralised 'people'.
|
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
=head3 child
|
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
Nouns ending in 'child' which do not begin with a capital are pluralised 'children'.
|
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=head3 -in-law
|
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
When a phrase of the form X-in-law is inflected as a noun, it is X rather than -in-law which is inflected.
|
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
=head2 Miscellaneous notes
|
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
There are three parts of speech which can be inflected with this, C, C, and C.
|
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
=head1 TODO
|
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
Improve the accessors for the special cases so a user can query the object for useful special cases to add, specify a fixed list of special cases so new cases don't affect functionality, etc.
|
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
Add pos normalisation and an interface for customising the pos normalisation routine.
|
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
Document all methods.
|
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
=head1 BUGS
|
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
English is buggy. Newspeak is doubleplusgoodlier; consider upgrading.
|
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
Some potentially unexpected results may arise, e.g. with 'man' special cased, human is incorrectly pluralised as humen, not the 'more regular' (and correct) humans.
|
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
Please use the Github issues tracker for other bugs.
|
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
=head1 LICENSE
|
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
Copyright 2011-2012 © Cambridge University Press. This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
|
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
=cut
|