line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Lingua::EN::Grammarian; |
2
|
|
|
|
|
|
|
our $VERSION = '0.000005'; |
3
|
|
|
|
|
|
|
|
4
|
1
|
|
|
1
|
|
31522
|
use 5.010; use warnings; |
|
1
|
|
|
1
|
|
5
|
|
|
1
|
|
|
|
|
38
|
|
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
30
|
|
5
|
1
|
|
|
1
|
|
5
|
use Carp; |
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
286
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
# Standard config files... |
8
|
|
|
|
|
|
|
my $CAUTIONS_FILE = 'grammarian_cautions'; |
9
|
|
|
|
|
|
|
my $ERRORS_FILE = 'grammarian_errors'; |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
# Standard config file search path... |
12
|
|
|
|
|
|
|
my @CONFIG_PATH = ( '/usr/local/share/grammarian/', "$ENV{HOME}/", "$ENV{PWD}/" ); |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
# Export interface... |
16
|
|
|
|
|
|
|
my @DEF_EXPORTS = qw< |
17
|
|
|
|
|
|
|
extract_cautions_from |
18
|
|
|
|
|
|
|
extract_errors_from |
19
|
|
|
|
|
|
|
>; |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
my @ALL_EXPORTS = ( |
22
|
|
|
|
|
|
|
@DEF_EXPORTS, |
23
|
|
|
|
|
|
|
qw< |
24
|
|
|
|
|
|
|
get_coverage_stats |
25
|
|
|
|
|
|
|
get_error_at |
26
|
|
|
|
|
|
|
get_next_error_at |
27
|
|
|
|
|
|
|
get_caution_at |
28
|
|
|
|
|
|
|
get_next_caution_at |
29
|
|
|
|
|
|
|
get_vim_error_regexes |
30
|
|
|
|
|
|
|
get_vim_caution_regexes |
31
|
|
|
|
|
|
|
> |
32
|
|
|
|
|
|
|
); |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
sub import { |
35
|
1
|
|
|
1
|
|
18
|
my $self = shift; |
36
|
1
|
50
|
|
|
|
9
|
my @exports = @_ ? @_ : @DEF_EXPORTS; |
37
|
1
|
|
|
|
|
4
|
my $caller = caller; |
38
|
1
|
50
|
|
|
|
3
|
for my $exported_sub (map { /^:all$/i ? @ALL_EXPORTS : $_ } @exports) { |
|
2
|
|
|
|
|
17
|
|
39
|
1
|
|
|
1
|
|
5
|
no strict 'refs'; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
1270
|
|
40
|
2
|
|
|
|
|
3
|
my $impl = *{$exported_sub}{CODE}; |
|
2
|
|
|
|
|
9
|
|
41
|
2
|
50
|
33
|
|
|
21
|
croak "$self does not provide $exported_sub()" |
42
|
|
|
|
|
|
|
if $exported_sub !~ /^(?:get_|extract_)/ || !$impl; |
43
|
2
|
|
|
|
|
2
|
*{$caller.'::'.$exported_sub} = $impl; |
|
2
|
|
|
|
|
50
|
|
44
|
|
|
|
|
|
|
} |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
# The data extracted from those files... |
48
|
|
|
|
|
|
|
my %CAUTIONS_FOR; |
49
|
|
|
|
|
|
|
my $CAUTIONS_REGEX; |
50
|
|
|
|
|
|
|
my @VIM_CAUTION_REGEX_COMPONENTS; |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
my %CORRECTIONS_FOR; |
53
|
|
|
|
|
|
|
my %EXPLANATION_FOR; |
54
|
|
|
|
|
|
|
my $ERRORS_REGEX; |
55
|
|
|
|
|
|
|
my @VIM_ERROR_REGEX_COMPONENTS; |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
my $VIM_REGEX_MAX_LEN = 20000; |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# Improved \b... |
60
|
|
|
|
|
|
|
my $SPACE_TRANSITION = qr{ |
61
|
|
|
|
|
|
|
# Preceded by... And followed by... |
62
|
|
|
|
|
|
|
(?<=[[:space:][:punct:]]) (?=[^[:space:][:punct:]]) |
63
|
|
|
|
|
|
|
| \A (?=[^[:space:][:punct:]]) |
64
|
|
|
|
|
|
|
| (?<=[^[:space:][:punct:]]) (?=[[:space:][:punct:]]|\z) |
65
|
|
|
|
|
|
|
}xms; |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
# Extract that data... |
68
|
|
|
|
|
|
|
if (! _load_cautions()) { |
69
|
|
|
|
|
|
|
warn qq{No "grammarian_cautions" file found in config search path:\n} |
70
|
|
|
|
|
|
|
. qq{\n} |
71
|
|
|
|
|
|
|
. join(q{}, map { qq{ $_\n} } @CONFIG_PATH) |
72
|
|
|
|
|
|
|
. qq{\n} |
73
|
|
|
|
|
|
|
. qq{(Did you forget to install it from the distribution?)\n}; |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
if (! _load_errors()) { |
77
|
|
|
|
|
|
|
warn qq{No "grammarian_errors" file found in config search path:\n} |
78
|
|
|
|
|
|
|
. qq{\n} |
79
|
|
|
|
|
|
|
. join(q{}, map { qq{ $_\n} } @CONFIG_PATH) |
80
|
|
|
|
|
|
|
. qq{\n} |
81
|
|
|
|
|
|
|
. qq{(Did you forget to install it from the distribution?)\n}; |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
sub _rewrite (&$) { |
85
|
50106
|
|
|
50106
|
|
127925
|
my ($transform, $text) = @_; |
86
|
50106
|
|
|
|
|
121596
|
$transform->() for $text; |
87
|
50106
|
|
|
|
|
244389
|
return $text; |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
sub _inflect_term { |
91
|
10612
|
|
|
10612
|
|
24315
|
my ($term) = @_; |
92
|
|
|
|
|
|
|
|
93
|
10612
|
|
|
|
|
44691
|
my $PRONOUN_MARKER = qr{ |
94
|
|
|
|
|
|
|
< (?: I | s?he | we | me | him | us | my | his | our | mine | hers | ours) > |
95
|
|
|
|
|
|
|
}xms; |
96
|
|
|
|
|
|
|
|
97
|
10612
|
|
|
|
|
104904
|
my %PRONOUN_EXPANSION_FOR = ( |
98
|
|
|
|
|
|
|
'' => q{I,you,she,he,it,we,they}, |
99
|
|
|
|
|
|
|
'' => q{she,he}, |
100
|
|
|
|
|
|
|
'' => q{he,she}, |
101
|
|
|
|
|
|
|
'' => q{we,you,they}, |
102
|
|
|
|
|
|
|
'' => q{me,you,her,him,it,us,them}, |
103
|
|
|
|
|
|
|
'' => q{her,him}, |
104
|
|
|
|
|
|
|
'' => q{him,her}, |
105
|
|
|
|
|
|
|
'' => q{us,you,them}, |
106
|
|
|
|
|
|
|
'' => q{my,your,hers,his,its,our,their}, |
107
|
|
|
|
|
|
|
'' => q{her,his}, |
108
|
|
|
|
|
|
|
'' => q{our,your,their}, |
109
|
|
|
|
|
|
|
'' => q{mine,yours,hers,his,its,ours,theirs}, |
110
|
|
|
|
|
|
|
'' => q{hers,his}, |
111
|
|
|
|
|
|
|
'' => q{ours,yours,theirs}, |
112
|
|
|
|
|
|
|
); |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
# Preprocess expansions... |
116
|
10612
|
|
|
|
|
58380
|
my @components = split /($PRONOUN_MARKER)/, $term; |
117
|
10612
|
|
|
|
|
15611
|
$term = q{}; |
118
|
10612
|
|
|
|
|
14714
|
my $in_parens = 0; |
119
|
10612
|
|
|
|
|
28486
|
while (@components) { |
120
|
10714
|
|
|
|
|
31348
|
my ($prefix, $pronoun) = splice(@components, 0, 2); |
121
|
10714
|
|
|
|
|
25680
|
$in_parens += ($prefix=~tr/(//) - ($prefix=~tr/)//); |
122
|
10714
|
|
|
|
|
23116
|
$term .= $prefix; |
123
|
10714
|
100
|
|
|
|
44286
|
if ($pronoun) { |
124
|
170
|
100
|
33
|
|
|
994
|
$term .= ($in_parens ? '' : '(') |
|
|
100
|
|
|
|
|
|
125
|
|
|
|
|
|
|
. ($PRONOUN_EXPANSION_FOR{$pronoun} // $pronoun) |
126
|
|
|
|
|
|
|
. ($in_parens ? '' : ')') |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
# Convert any parenthesized or starred set of alternatives... |
131
|
10612
|
|
|
|
|
14273
|
my @inflexions; |
132
|
10612
|
|
|
|
|
424855
|
$term =~ s{ (? \S*? (? \S? ) ) |
133
|
|
|
|
|
|
|
(?: |
134
|
|
|
|
|
|
|
(? e [*] ) |
135
|
|
|
|
|
|
|
| (? ch [*] ) |
136
|
|
|
|
|
|
|
| (? (?<= [^aeiou] ) y [*] ) |
137
|
|
|
|
|
|
|
| (? (?<= [^aeiou] ) y [(] s [)] ) |
138
|
|
|
|
|
|
|
| (? [*][*] ) |
139
|
|
|
|
|
|
|
| (? [*] ) |
140
|
|
|
|
|
|
|
| [(] (? [^)]+ ) [)] |
141
|
|
|
|
|
|
|
) |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
{ |
144
|
1
|
|
|
1
|
|
1070
|
my $ll = $+{last_letter}; |
|
1
|
|
|
|
|
591
|
|
|
1
|
|
|
|
|
830
|
|
|
6665
|
|
|
|
|
44447
|
|
145
|
6665
|
100
|
|
|
|
251889
|
@inflexions = $+{e_star} ? ( 'e', 'es', 'ed', 'ing') |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
146
|
|
|
|
|
|
|
: $+{ch_star} ? ( 'ch', 'ches', 'ched', 'ching') |
147
|
|
|
|
|
|
|
: $+{y_star} ? ( 'y', 'ies', 'ied', 'ying') |
148
|
|
|
|
|
|
|
: $+{y_s} ? ( 'y', 'ies', ) |
149
|
|
|
|
|
|
|
: $+{double_star} ? ( '', 's', $ll.'ed', $ll.'ing') |
150
|
|
|
|
|
|
|
: $+{star} ? ( '', 's', 'ed', 'ing') |
151
|
|
|
|
|
|
|
: $+{alts} ? ( ($+{root} ? '' : ()), split(',', $+{alts}) ) |
152
|
|
|
|
|
|
|
: (); |
153
|
|
|
|
|
|
|
|
154
|
6665
|
|
|
|
|
83706
|
qq{$+{root}*}; |
155
|
|
|
|
|
|
|
}xmse; |
156
|
|
|
|
|
|
|
|
157
|
10612
|
100
|
|
|
|
85767
|
return @inflexions ? map { my $infl = $term; $infl =~ s{[*]}{$_}; $infl} @inflexions |
|
27030
|
|
|
|
|
42548
|
|
|
27030
|
|
|
|
|
74617
|
|
|
27030
|
|
|
|
|
133599
|
|
158
|
|
|
|
|
|
|
: $term; |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
# Parse cautions file and convert to internal data structures... |
162
|
|
|
|
|
|
|
sub _load_cautions { |
163
|
|
|
|
|
|
|
# Gather config from current directory and home directory... |
164
|
6
|
|
|
|
|
142
|
local @ARGV = grep { -e } |
|
3
|
|
|
|
|
13
|
|
165
|
1
|
|
|
1
|
|
3
|
map { ("$_.$CAUTIONS_FILE", "$_$CAUTIONS_FILE") } |
166
|
|
|
|
|
|
|
@CONFIG_PATH; |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
# If no config, we're done... |
169
|
1
|
50
|
|
|
|
9
|
return if !@ARGV; |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
# Store sets of terms together... |
172
|
1
|
|
|
|
|
6
|
my @term_sets = { terms => [], defns => [], inflexions => [] }; |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
# Parse configuration file... |
175
|
|
|
|
|
|
|
LINE: |
176
|
1
|
|
|
|
|
79
|
while (my $next_line = readline) { |
177
|
|
|
|
|
|
|
# Ignore comments... |
178
|
646
|
100
|
|
|
|
1594
|
next LINE if $next_line =~ m{ \A \h* [#] }xms; |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
# Blank lines delimit new term sets... |
181
|
637
|
100
|
|
|
|
2024
|
if ($next_line =~ m{\A \h* \Z}xms) { |
182
|
208
|
|
|
|
|
808
|
push @term_sets, { terms => [], defns => [], inflexions => [] }; |
183
|
208
|
|
|
|
|
899
|
next LINE; |
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
# Parse config line... |
187
|
429
|
|
|
|
|
4714
|
$next_line =~ m{ |
188
|
|
|
|
|
|
|
\A |
189
|
|
|
|
|
|
|
(? -? ) |
190
|
|
|
|
|
|
|
\h* (? [^:]*? ) |
191
|
|
|
|
|
|
|
(?: |
192
|
|
|
|
|
|
|
\h* : |
193
|
|
|
|
|
|
|
\h* (? .*? ) |
194
|
|
|
|
|
|
|
)? |
195
|
|
|
|
|
|
|
\h* |
196
|
|
|
|
|
|
|
\Z |
197
|
|
|
|
|
|
|
}xms; |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
# Unpack components... |
200
|
429
|
|
|
|
|
2267
|
my $term = $+{term}; |
201
|
429
|
|
100
|
|
|
2675
|
my $defn = $+{defn} // q{}; |
202
|
429
|
|
|
|
|
2034
|
my $is_silent = length($+{is_silent}); |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
# Warn of bad config... |
205
|
429
|
50
|
|
|
|
1193
|
if (!defined $term) { |
206
|
0
|
|
|
|
|
0
|
warn "Invalid entry in grammarian_cautions: $next_line"; |
207
|
0
|
|
|
|
|
0
|
next LINE; |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
# Unpack any inflexions... |
211
|
429
|
|
|
|
|
804
|
my @inflexions = _inflect_term($term); |
212
|
|
|
|
|
|
|
|
213
|
429
|
|
|
|
|
834
|
my $original = shift @inflexions; |
214
|
429
|
100
|
|
|
|
1429
|
if ($defn =~ /\S/) { |
215
|
422
|
|
|
|
|
441
|
push @{$term_sets[-1]{terms}}, $original; |
|
422
|
|
|
|
|
1072
|
|
216
|
422
|
|
|
|
|
612
|
push @{$term_sets[-1]{defns}}, $defn; |
|
422
|
|
|
|
|
831
|
|
217
|
|
|
|
|
|
|
} |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
# Store patterns to be matched... |
220
|
429
|
|
|
|
|
533
|
my $order = 0; |
221
|
429
|
|
|
|
|
703
|
for my $next_inflexion ($original, @inflexions) { |
222
|
877
|
|
|
|
|
918
|
push @{ $term_sets[-1]{inflexions}[$order++] }, {silent => $is_silent, term => $next_inflexion}; |
|
877
|
|
|
|
|
5398
|
|
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
# Compile list of cautions and the matching regex... |
228
|
1
|
|
|
|
|
2
|
my @regex_components; |
229
|
|
|
|
|
|
|
TERM_SET: |
230
|
1
|
|
|
|
|
3
|
for my $term_set (@term_sets) { |
231
|
209
|
100
|
|
|
|
210
|
next TERM_SET if !@{ $term_set->{terms} }; |
|
209
|
|
|
|
|
860
|
|
232
|
|
|
|
|
|
|
|
233
|
1
|
|
|
1
|
|
7
|
use List::Util 'max'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
3932
|
|
234
|
204
|
|
|
|
|
211
|
my $term_width = max map { length } @{ $term_set->{terms} }; |
|
422
|
|
|
|
|
903
|
|
|
204
|
|
|
|
|
356
|
|
235
|
|
|
|
|
|
|
|
236
|
422
|
|
|
|
|
2470
|
my $caution |
237
|
|
|
|
|
|
|
= join q{}, |
238
|
204
|
|
|
|
|
395
|
map { sprintf("%-*s : %s\n", $term_width, $term_set->{terms}[$_], $term_set->{defns}[$_]) } |
239
|
204
|
|
|
|
|
380
|
0..$#{ $term_set->{terms} }; |
240
|
|
|
|
|
|
|
|
241
|
204
|
|
|
|
|
354
|
for my $inflexion_set (@{ $term_set->{inflexions} }) { |
|
204
|
|
|
|
|
732
|
|
242
|
419
|
|
|
|
|
473
|
my $inflexions = [ map { $_->{term} } @{ $inflexion_set } ]; |
|
877
|
|
|
|
|
2327
|
|
|
419
|
|
|
|
|
650
|
|
243
|
419
|
|
|
|
|
829
|
for my $term_data (@{ $inflexion_set }) { |
|
419
|
|
|
|
|
594
|
|
244
|
877
|
|
|
|
|
1239
|
my $term = $term_data->{term}; |
245
|
877
|
|
|
|
|
1453
|
my $silent = $term_data->{silent}; |
246
|
877
|
|
|
|
|
5774
|
$CAUTIONS_FOR{lc $term} = { |
247
|
|
|
|
|
|
|
display => $silent, |
248
|
|
|
|
|
|
|
explanation => $caution, |
249
|
|
|
|
|
|
|
inflexions => $inflexions |
250
|
|
|
|
|
|
|
}; |
251
|
877
|
100
|
|
|
|
2194
|
if (!$silent) { |
252
|
752
|
|
|
752
|
|
18186
|
push @regex_components, _rewrite { s{\h+}{\\s+}g } $term; |
|
752
|
|
|
|
|
7544
|
|
253
|
752
|
|
|
752
|
|
14848
|
push @VIM_CAUTION_REGEX_COMPONENTS, _rewrite { s{\h+}{\\_s\\+}g } $term; |
|
752
|
|
|
|
|
1788
|
|
254
|
|
|
|
|
|
|
} |
255
|
|
|
|
|
|
|
} |
256
|
|
|
|
|
|
|
} |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
} |
259
|
|
|
|
|
|
|
|
260
|
1
|
|
|
|
|
471
|
my $cautions_regex = '\b(?' . join('|', reverse sort @regex_components) . ')\b'; |
261
|
1
|
|
|
|
|
2163
|
$CAUTIONS_REGEX = qr{$cautions_regex}i; |
262
|
|
|
|
|
|
|
|
263
|
1
|
|
|
|
|
816
|
return 1; |
264
|
|
|
|
|
|
|
} |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
sub _gen_pres_participle_for { |
267
|
250
|
|
|
250
|
|
1730
|
my ($verb) = @_; |
268
|
|
|
|
|
|
|
|
269
|
250
|
100
|
100
|
|
|
10763
|
$verb =~ s/ie$/y/ |
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
270
|
|
|
|
|
|
|
or $verb =~ s/ue$/u/ |
271
|
|
|
|
|
|
|
or $verb =~ s/([auy])e$/$1/ |
272
|
|
|
|
|
|
|
or $verb =~ s/ski$/ski/ |
273
|
|
|
|
|
|
|
or $verb =~ s/[^b]i$// |
274
|
|
|
|
|
|
|
or $verb =~ s/^(are|were)$/be/ |
275
|
|
|
|
|
|
|
or $verb =~ s/^(had)$/hav/ |
276
|
|
|
|
|
|
|
or $verb =~ s/(hoe)$/$1/ |
277
|
|
|
|
|
|
|
or $verb =~ s/([^e])e$/$1/ |
278
|
|
|
|
|
|
|
or $verb =~ m/er$/ |
279
|
|
|
|
|
|
|
or $verb =~ m/open$/ |
280
|
|
|
|
|
|
|
or $verb =~ s/([^aeiou][aeiouy]([bdgmnprst]))$/$1$2/; |
281
|
|
|
|
|
|
|
|
282
|
250
|
|
|
|
|
1198
|
return "${verb}ing"; |
283
|
|
|
|
|
|
|
} |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
sub _gen_verb_errors { |
286
|
250
|
|
|
250
|
|
3914
|
my ($pres, $third, $past, $pastp, $presp) = @_; |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
return ( |
289
|
250
|
100
|
|
|
|
9755
|
($pres ne $third ? ( |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
290
|
|
|
|
|
|
|
"====[ Incorrect inflexion of verb for the specified pronoun ]=================", |
291
|
|
|
|
|
|
|
" (he,she,it) $pres --> (he,she,it) $third ", |
292
|
|
|
|
|
|
|
" (I,you,we,they) $third --> (I,you,we,they) $pres ", |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
"====[ Incorrect inflexion of verb after a negated auxiliary ]=================", |
295
|
|
|
|
|
|
|
"(did,would,should,could,must,might)n't $third " |
296
|
|
|
|
|
|
|
." --> (did,would,should,could,must,might)n't $pres", |
297
|
|
|
|
|
|
|
):()), |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
"====[ Incorrect inflexion of verb after a negated auxiliary ]=================", |
300
|
|
|
|
|
|
|
"(did,would,should,could,must,might)n't $past " |
301
|
|
|
|
|
|
|
." --> (did,would,should,could,must,might)n't $pres", |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
($past ne $pastp ? ( |
304
|
|
|
|
|
|
|
"====[ Incorrect use of participle instead of simple past or past perfect ]====", |
305
|
|
|
|
|
|
|
" (I,you,we,they) $pastp --> (I,you,we,they) $past " |
306
|
|
|
|
|
|
|
." --> (I,you,we,they) have $pastp " |
307
|
|
|
|
|
|
|
." --> (I,you,we,they) had $pastp ", |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
" (he,she,it) $pastp --> (he,she,it) $past " |
310
|
|
|
|
|
|
|
." --> (he,she,it) has $pastp " |
311
|
|
|
|
|
|
|
." --> (he,she,it) had $pastp ", |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
"====[ Incorrect use of simple past instead of past participle ]================", |
314
|
|
|
|
|
|
|
" (be,being,been,was,were) $past --> (be,being,been,was,were) $pastp ", |
315
|
|
|
|
|
|
|
" (has,had,have,having) $past --> (has,had,have,having) $pastp ", |
316
|
|
|
|
|
|
|
" (be,being,been,was,were) $past --> (be,being,been,was,were) $pastp ", |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
"====[ Incorrect inflexion of verb after a negated auxiliary ]=================", |
319
|
|
|
|
|
|
|
"(did,would,should,could,must,might)n't $pastp " |
320
|
|
|
|
|
|
|
." --> (did,would,should,could,must,might)n't $pres", |
321
|
|
|
|
|
|
|
):()), |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
"====[ Incorrect use of infinitive instead of past participle ]=================", |
324
|
|
|
|
|
|
|
" (be,being,been,was,were) $pres --> (be,being,been,was,were) $pastp ", |
325
|
|
|
|
|
|
|
" (has,had,have,having) $pres --> (has,had,have,having) $pastp ", |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
"====[ Incorrect use of participle instead of infinitive ]=================", |
328
|
|
|
|
|
|
|
" to ($pastp,$presp) --> to $pres ", |
329
|
|
|
|
|
|
|
($third ne $pres ? |
330
|
|
|
|
|
|
|
" to $third --> to $pres " |
331
|
|
|
|
|
|
|
:()), |
332
|
|
|
|
|
|
|
($past ne $pastp ? |
333
|
|
|
|
|
|
|
" to $past --> to $pres " |
334
|
|
|
|
|
|
|
:()), |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
"====[ Incorrect use of present participle instead of past participle ]=========", |
337
|
|
|
|
|
|
|
" being $presp --> being $pastp ", |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
"====[ Incorrect use of \"try and\" instead of \"try to\" ]=====================", |
340
|
|
|
|
|
|
|
"try and ($pres,$past,$pastp,$presp) --> try to $pres ", |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
"====[ Incorrect inflexion of verb after \"try to\" ]===========================", |
343
|
|
|
|
|
|
|
" try to ($past,$pastp,$presp) --> try to $pres ", |
344
|
|
|
|
|
|
|
" tried to ($past,$pastp,$presp) --> tried to $pres ", |
345
|
|
|
|
|
|
|
" trying to ($past,$pastp,$presp) --> trying to $pres ", |
346
|
|
|
|
|
|
|
); |
347
|
|
|
|
|
|
|
} |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
sub _gen_absolute_adjective_errors { |
350
|
19
|
|
|
19
|
|
163
|
my ($adj, $modifier) = @_; |
351
|
19
|
|
100
|
|
|
78
|
$modifier //= ''; |
352
|
|
|
|
|
|
|
|
353
|
19
|
|
|
|
|
55
|
my @QUALIFIERS = qw< |
354
|
|
|
|
|
|
|
somewhat highly extremely totally completely absolutely utterly |
355
|
|
|
|
|
|
|
>; |
356
|
19
|
|
|
|
|
69
|
my $QUALIFIERS = '(' . join(',', @QUALIFIERS) . ')'; |
357
|
|
|
|
|
|
|
|
358
|
19
|
|
|
|
|
146
|
my @errors = ( |
359
|
|
|
|
|
|
|
"====[ Incorrect use of modifier with ungradeable adjective ]===================", |
360
|
|
|
|
|
|
|
" more $adj --> $adj ", |
361
|
|
|
|
|
|
|
" most $adj --> $adj ", |
362
|
|
|
|
|
|
|
" quite $adj --> $adj ", |
363
|
|
|
|
|
|
|
" rather $adj --> $adj ", |
364
|
|
|
|
|
|
|
" very $adj --> $adj ", |
365
|
|
|
|
|
|
|
" $QUALIFIERS $adj --> $adj ", |
366
|
|
|
|
|
|
|
); |
367
|
|
|
|
|
|
|
|
368
|
19
|
100
|
|
|
|
39
|
if ($modifier) { |
369
|
5
|
|
|
|
|
21
|
$modifier =~ s{ \A [(] | [)] \z}{}xgms; |
370
|
5
|
|
|
|
|
15
|
for my $mod (split(',', $modifier)) { |
371
|
6
|
|
|
|
|
13
|
$errors[1] .= " --> more $mod $adj"; |
372
|
6
|
|
|
|
|
14
|
$errors[2] .= " --> most $mod $adj"; |
373
|
6
|
|
|
|
|
14
|
$errors[3] .= " --> quite $mod $adj"; |
374
|
6
|
|
|
|
|
23
|
$errors[4] .= " --> rather $mod $adj"; |
375
|
6
|
|
|
|
|
18
|
$errors[5] .= " --> very $mod $adj"; |
376
|
|
|
|
|
|
|
} |
377
|
|
|
|
|
|
|
} |
378
|
|
|
|
|
|
|
|
379
|
19
|
|
|
|
|
85
|
return @errors; |
380
|
|
|
|
|
|
|
} |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
sub _load_errors { |
383
|
|
|
|
|
|
|
# Gather config from search path |
384
|
6
|
|
|
|
|
157
|
local @ARGV = grep { -e } |
|
3
|
|
|
|
|
13
|
|
385
|
1
|
|
|
1
|
|
4
|
map { ("$_.$ERRORS_FILE", "$_$ERRORS_FILE") } |
386
|
|
|
|
|
|
|
@CONFIG_PATH; |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
# If no config, we're done... |
389
|
1
|
50
|
|
|
|
7
|
return if !@ARGV; |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
# Extract corrections... |
392
|
1
|
|
|
|
|
1
|
my @regex_components; |
393
|
1
|
|
|
|
|
3
|
my $explanation = '????'; |
394
|
1
|
|
|
|
|
3
|
my $last_was_explanation = 1; |
395
|
1
|
|
|
|
|
3
|
my @insertions; |
396
|
|
|
|
|
|
|
LINE: |
397
|
1
|
|
100
|
|
|
267
|
while (my $next_line = shift(@insertions) // readline) { |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
# Ignore comment and empty lines... |
400
|
7666
|
100
|
|
|
|
46512
|
next LINE if $next_line =~ m{\A \h* (?: [#] | \Z )}xms; |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
# Handle explanation lines... |
403
|
7398
|
100
|
|
|
|
81944
|
if ($next_line =~ m{\A \h* ===\S* \h* (.*?) \h* \S*===.* \Z }xms) { |
404
|
2316
|
100
|
|
|
|
11121
|
$explanation = $last_was_explanation ? "$explanation\n$1" : $1; |
405
|
2316
|
|
|
|
|
3116
|
$last_was_explanation = 1; |
406
|
2316
|
|
|
|
|
11556
|
next LINE; |
407
|
|
|
|
|
|
|
} |
408
|
5082
|
|
|
|
|
6830
|
$last_was_explanation = 0; |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
# Generate errors from a specification... |
411
|
5082
|
100
|
|
|
|
13548
|
if ($next_line =~ m{\A\h* \h* (?\S+) \h* (?\S+) \h* (?\S+) \h* (?\S+)}xms) { |
412
|
250
|
|
|
|
|
2789
|
push @insertions, _gen_verb_errors(@+{qw}, _gen_pres_participle_for($+{pres})); |
413
|
250
|
|
|
|
|
2120
|
next LINE; |
414
|
|
|
|
|
|
|
} |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
# Generate errors from an specification... |
417
|
4832
|
100
|
|
|
|
11787
|
if ($next_line =~ m{\A\h* \S+) \h*)?> \h* (?\S+) }xms) { |
418
|
19
|
|
|
|
|
445
|
push @insertions, _gen_absolute_adjective_errors( @+{qw< adjective modifier >} ); |
419
|
19
|
|
|
|
|
106
|
next LINE; |
420
|
|
|
|
|
|
|
} |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
# Extract error --> correction pair... |
423
|
4813
|
|
|
|
|
68665
|
$next_line =~ m{ |
424
|
|
|
|
|
|
|
\A \h* |
425
|
|
|
|
|
|
|
(? .*? ) |
426
|
|
|
|
|
|
|
\h* --> \h* |
427
|
|
|
|
|
|
|
(? .*? ) |
428
|
|
|
|
|
|
|
\h* \Z |
429
|
|
|
|
|
|
|
}xms; |
430
|
4813
|
|
|
|
|
75361
|
my ($error, $correction) = @+{'error', 'correction'}; |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
# Ignore invalid lines... |
433
|
4813
|
50
|
|
|
|
29962
|
next LINE if !defined $error; |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
# Expand inflected forms... |
436
|
4813
|
|
|
|
|
12941
|
my @error_inflexions = _inflect_term($error); |
437
|
|
|
|
|
|
|
my @corrections_inflections |
438
|
4813
|
|
|
|
|
19187
|
= map {[_inflect_term($_)]} |
|
5370
|
|
|
|
|
11205
|
|
439
|
|
|
|
|
|
|
split /\h+-->\h+/, |
440
|
|
|
|
|
|
|
$correction; |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
# Iterated inflections in parallel... |
443
|
4813
|
|
|
|
|
15051
|
for my $next (0..$#error_inflexions) { |
444
|
15639
|
|
|
|
|
66596
|
my $error = $error_inflexions[$next]; |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
# Build normalized transform from error to each correction... |
447
|
15639
|
|
|
|
|
26267
|
for my $correction (@corrections_inflections) { |
448
|
17324
|
|
|
17324
|
|
86622
|
my $normalized_error = _rewrite { s{\h+}{ }gxms } lc $error; |
|
17324
|
|
|
|
|
132697
|
|
449
|
17324
|
|
66
|
|
|
62389
|
push @{$CORRECTIONS_FOR{$normalized_error}}, |
|
17324
|
|
|
|
|
120321
|
|
450
|
|
|
|
|
|
|
$correction->[$next] // $correction->[-1]; |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
# Record explanation... |
453
|
17324
|
|
|
|
|
92679
|
$EXPLANATION_FOR{$normalized_error} = $explanation; |
454
|
|
|
|
|
|
|
} |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
# Remember error for eventual regexes (with generalized whitespace)... |
457
|
15639
|
|
|
15639
|
|
74232
|
push @regex_components, _rewrite { s{\h+}{\\s+}g } $error; |
|
15639
|
|
|
|
|
105595
|
|
458
|
15639
|
|
|
15639
|
|
87011
|
push @VIM_ERROR_REGEX_COMPONENTS, _rewrite { s{\h+}{\\_s\\+}g } $error; |
|
15639
|
|
|
|
|
112133
|
|
459
|
|
|
|
|
|
|
} |
460
|
|
|
|
|
|
|
} |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
# Build error-detecting regex... |
463
|
1
|
|
|
|
|
32206
|
my $ERRORONEOUS_TERM = join('|', reverse sort @regex_components); |
464
|
1
|
|
|
|
|
133706
|
$ERRORS_REGEX = qr{ |
465
|
|
|
|
|
|
|
$SPACE_TRANSITION |
466
|
|
|
|
|
|
|
( $ERRORONEOUS_TERM | (?&REPEATED_WORD) ) |
467
|
|
|
|
|
|
|
$SPACE_TRANSITION |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
(?(DEFINE) |
470
|
|
|
|
|
|
|
(? (? \S++) \s++ \k ) |
471
|
|
|
|
|
|
|
) |
472
|
|
|
|
|
|
|
}ixms; |
473
|
|
|
|
|
|
|
|
474
|
1
|
|
|
|
|
13371
|
return 1; |
475
|
|
|
|
|
|
|
} |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
# Apply regexes to detect offending terms... |
478
|
|
|
|
|
|
|
sub extract_cautions_from { |
479
|
0
|
|
|
0
|
1
|
|
my ($text) = @_; |
480
|
|
|
|
|
|
|
|
481
|
0
|
|
|
|
|
|
state %cautions_cache; |
482
|
0
|
0
|
|
|
|
|
if (!exists $cautions_cache{$text}) { |
483
|
0
|
|
|
|
|
|
my $cache = $cautions_cache{$text} = []; |
484
|
0
|
|
|
|
|
|
while ($text =~ m{\G .*? $CAUTIONS_REGEX}gcxms) { |
485
|
0
|
|
|
|
|
|
push @{$cache}, Lingua::EN::Grammarian::Caution->new($1,\$text); |
|
0
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
} |
487
|
|
|
|
|
|
|
} |
488
|
|
|
|
|
|
|
|
489
|
0
|
|
|
|
|
|
return @{ $cautions_cache{$text} }; |
|
0
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
} |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
sub extract_errors_from { |
493
|
0
|
|
|
0
|
1
|
|
my ($text) = @_; |
494
|
|
|
|
|
|
|
|
495
|
0
|
|
|
|
|
|
state %errors_cache; |
496
|
0
|
0
|
|
|
|
|
if (!exists $errors_cache{$text}) { |
497
|
0
|
|
|
|
|
|
my $cache = $errors_cache{$text} = []; |
498
|
0
|
|
|
|
|
|
while ($text =~ m{\G .*? $ERRORS_REGEX}gcxms) { |
499
|
0
|
|
|
|
|
|
push @{$cache}, Lingua::EN::Grammarian::Error->new($1,\$text); |
|
0
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
} |
501
|
|
|
|
|
|
|
} |
502
|
|
|
|
|
|
|
|
503
|
0
|
|
|
|
|
|
return @{ $errors_cache{$text} }; |
|
0
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
} |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
# Report coverage... |
507
|
|
|
|
|
|
|
sub get_coverage_stats { |
508
|
|
|
|
|
|
|
return { |
509
|
0
|
|
|
0
|
1
|
|
cautions => scalar keys %CAUTIONS_FOR, |
510
|
|
|
|
|
|
|
errors => scalar keys %CORRECTIONS_FOR, |
511
|
|
|
|
|
|
|
} |
512
|
|
|
|
|
|
|
} |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
# Identify offences (if any) at a particular location... |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
sub get_error_at { |
517
|
0
|
|
|
0
|
1
|
|
my ($text, $index_or_line, $col) = @_; |
518
|
0
|
|
|
|
|
|
return _problem_in($text, [extract_errors_from($text)], $index_or_line, $col,\do{my $no_next}); |
|
0
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
} |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
sub get_next_error_at { |
522
|
0
|
|
|
0
|
0
|
|
my ($text, $index_or_line, $col) = @_; |
523
|
0
|
|
|
|
|
|
state $prev_error_index = -1; |
524
|
0
|
|
|
|
|
|
return _problem_in($text, [extract_errors_from($text)], $index_or_line, $col,\$prev_error_index); |
525
|
|
|
|
|
|
|
} |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
sub get_caution_at { |
528
|
0
|
|
|
0
|
1
|
|
my ($text, $index_or_line, $col) = @_; |
529
|
0
|
|
|
|
|
|
return _problem_in($text, [extract_cautions_from($text)], $index_or_line, $col,\do{my $no_next}); |
|
0
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
} |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
sub get_next_caution_at { |
533
|
0
|
|
|
0
|
0
|
|
my ($text, $index_or_line, $col) = @_; |
534
|
0
|
|
|
|
|
|
state $prev_caution_index = -1; |
535
|
0
|
|
|
|
|
|
return _problem_in($text, [extract_cautions_from($text)], $index_or_line, $col,\$prev_caution_index); |
536
|
|
|
|
|
|
|
} |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
sub _problem_in { |
539
|
0
|
|
|
0
|
|
|
my ($text, $problems_ref, $index_or_line, $col, $prev_findex_ref) = @_; |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
# Convert line/col to index... |
542
|
0
|
0
|
|
|
|
|
if (defined $col) { |
543
|
0
|
|
|
|
|
|
$index_or_line -= 1; |
544
|
0
|
0
|
|
|
|
|
$text =~ m{( \A (?: [^\n]* \n){$index_or_line} [^\n]{$col} )}xms |
545
|
|
|
|
|
|
|
or return; |
546
|
0
|
|
|
|
|
|
$index_or_line = length($1); |
547
|
|
|
|
|
|
|
} |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
# Look for a hit... |
550
|
0
|
|
|
|
|
|
for my $problem (@{$problems_ref}) { |
|
0
|
|
|
|
|
|
|
551
|
0
|
|
|
|
|
|
my $findex = $problem->from->{index}; |
552
|
0
|
|
|
|
|
|
my $tindex = $problem->to->{index}; |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
# Cursor is "in" a problem... |
555
|
0
|
0
|
0
|
|
|
|
if ($findex <= $index_or_line && $index_or_line <= $tindex && $findex != (${$prev_findex_ref} // -1)) { |
|
0
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
556
|
0
|
|
|
|
|
|
${$prev_findex_ref} = $findex; |
|
0
|
|
|
|
|
|
|
557
|
0
|
0
|
|
|
|
|
return wantarray ? ($problem, 1) # There's a problem and the cursor *is* over it |
558
|
|
|
|
|
|
|
: $problem; |
559
|
|
|
|
|
|
|
} |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
# Cursor not in a problem, so return next problem... |
562
|
|
|
|
|
|
|
elsif ($findex > $index_or_line) { |
563
|
0
|
|
|
|
|
|
${$prev_findex_ref} = $findex; |
|
0
|
|
|
|
|
|
|
564
|
0
|
0
|
|
|
|
|
return wantarray ? ($problem, 0) # There's a problem and the cursor *isn't* over it |
565
|
|
|
|
|
|
|
: undef; |
566
|
|
|
|
|
|
|
} |
567
|
|
|
|
|
|
|
} |
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
# Otherwise, it's a miss... |
570
|
0
|
|
|
|
|
|
return; |
571
|
|
|
|
|
|
|
} |
572
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
# Provide regexes for matching grammar problems in Vim... |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
sub get_vim_error_regexes { |
577
|
0
|
|
|
0
|
1
|
|
_build_vim_regex_from(@VIM_ERROR_REGEX_COMPONENTS); |
578
|
|
|
|
|
|
|
} |
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
sub get_vim_caution_regexes { |
581
|
0
|
|
|
0
|
1
|
|
_build_vim_regex_from(@VIM_CAUTION_REGEX_COMPONENTS); |
582
|
|
|
|
|
|
|
} |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
sub _build_vim_regex_from { |
585
|
0
|
|
|
0
|
|
|
my @regex_components = reverse sort @_; |
586
|
|
|
|
|
|
|
|
587
|
0
|
|
|
|
|
|
my @regexes; |
588
|
0
|
|
|
|
|
|
for my $alternative (@regex_components) { |
589
|
0
|
|
|
|
|
|
$alternative =~ s/'/''/g; |
590
|
0
|
0
|
0
|
|
|
|
if (@regexes && length($regexes[-1]) + length($alternative) + 10 < $VIM_REGEX_MAX_LEN) { |
591
|
0
|
|
|
|
|
|
$regexes[-1] .= '\\|' . $alternative; |
592
|
|
|
|
|
|
|
} |
593
|
|
|
|
|
|
|
else { |
594
|
0
|
|
|
|
|
|
push @regexes, '\\c' . $alternative; |
595
|
|
|
|
|
|
|
} |
596
|
|
|
|
|
|
|
} |
597
|
0
|
|
|
|
|
|
return map { '\<\%('.$_.'\)\>' } @regexes; |
|
0
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
} |
599
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
my $UPPER_CASE_PAT = qr{\A [[:upper:]]* \Z}xms; |
602
|
|
|
|
|
|
|
my $LOWER_CASE_PAT = qr{\A [[:lower:]]* \Z}xms; |
603
|
|
|
|
|
|
|
my $TITLE_CASE_PAT = qr{\A [[:upper:]][[:lower:]]* \Z}xms; |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
# Convert a term to have the same capitalization as an original paradigm... |
606
|
|
|
|
|
|
|
my $_recase_like = sub { |
607
|
|
|
|
|
|
|
my ($paradigm, $target) = @_; |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
# Process two strings word-by-word... |
610
|
|
|
|
|
|
|
my @paradigm_words = split($SPACE_TRANSITION, $paradigm); |
611
|
|
|
|
|
|
|
my @target_words = split($SPACE_TRANSITION, $target ); |
612
|
|
|
|
|
|
|
|
613
|
|
|
|
|
|
|
while (@paradigm_words < @target_words) { |
614
|
|
|
|
|
|
|
push @paradigm_words, $paradigm_words[-1]; |
615
|
|
|
|
|
|
|
} |
616
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
# Accumulate modified target by transforming each word... |
618
|
|
|
|
|
|
|
my $modified_target = ""; |
619
|
|
|
|
|
|
|
for my $next_paradigm (@paradigm_words) { |
620
|
|
|
|
|
|
|
# If target completely processed, we're done... |
621
|
|
|
|
|
|
|
last if !@target_words; |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
# Otherwise, convert target according to pattern of paradigm... |
624
|
|
|
|
|
|
|
$modified_target .= $next_paradigm =~ $UPPER_CASE_PAT ? uc(shift @target_words) |
625
|
|
|
|
|
|
|
: $next_paradigm =~ $LOWER_CASE_PAT ? lc(shift @target_words) |
626
|
|
|
|
|
|
|
: $next_paradigm =~ $TITLE_CASE_PAT ? ucfirst(lc(shift @target_words)) |
627
|
|
|
|
|
|
|
: shift @target_words |
628
|
|
|
|
|
|
|
; |
629
|
|
|
|
|
|
|
} |
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
return $modified_target; |
632
|
|
|
|
|
|
|
}; |
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
package Lingua::EN::Grammarian::Error; { |
635
|
1
|
|
|
1
|
|
1849
|
use Hash::Util::FieldHash 'fieldhash'; |
|
1
|
|
|
|
|
1227
|
|
|
1
|
|
|
|
|
556
|
|
636
|
|
|
|
|
|
|
*_rewrite = *Lingua::EN::Grammarian::_rewrite; |
637
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
fieldhash my %match_for; |
639
|
|
|
|
|
|
|
fieldhash my %startpos_for; |
640
|
|
|
|
|
|
|
fieldhash my %endpos_for; |
641
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
sub new { |
643
|
0
|
|
|
0
|
|
|
my ($class, $term, $source_ref) = @_; |
644
|
|
|
|
|
|
|
|
645
|
0
|
|
|
|
|
|
my $newobj = bless \do{my $scalar}, $class; |
|
0
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
|
647
|
0
|
|
|
|
|
|
my $endindex = pos(${$source_ref}) - 1; |
|
0
|
|
|
|
|
|
|
648
|
0
|
|
|
|
|
|
my $startindex = pos(${$source_ref}) - length($term); |
|
0
|
|
|
|
|
|
|
649
|
0
|
|
|
|
|
|
my $startline = 1 + substr(${$source_ref},0,$startindex) =~ tr/\n//; |
|
0
|
|
|
|
|
|
|
650
|
0
|
|
|
|
|
|
my $endline = 1 + substr(${$source_ref},0,$endindex) =~ tr/\n//; |
|
0
|
|
|
|
|
|
|
651
|
0
|
|
|
0
|
|
|
my $startcol = 1 + length(Lingua::EN::Grammarian::_rewrite {s{\A.*\n}{}xms} substr(${$source_ref},0,$startindex)); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
652
|
0
|
|
|
0
|
|
|
my $endcol = 1 + length(Lingua::EN::Grammarian::_rewrite {s{\A.*\n}{}xms} substr(${$source_ref},0,$endindex)); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
|
654
|
0
|
|
|
|
|
|
$match_for{$newobj} = $term; |
655
|
0
|
|
|
|
|
|
$startpos_for{$newobj} = { index => $startindex, line => $startline, column => $startcol }; |
656
|
0
|
|
|
|
|
|
$endpos_for{$newobj} = { index => $endindex, line => $endline, column => $endcol }; |
657
|
|
|
|
|
|
|
|
658
|
0
|
|
|
|
|
|
return $newobj; |
659
|
|
|
|
|
|
|
} |
660
|
|
|
|
|
|
|
|
661
|
0
|
|
|
0
|
|
|
sub match { my $self = shift; return $match_for{$self} } |
|
0
|
|
|
|
|
|
|
662
|
1
|
|
|
1
|
|
2015
|
use overload q{""} => sub { my $self = shift; return $match_for{$self} }; |
|
1
|
|
|
0
|
|
9433
|
|
|
1
|
|
|
|
|
15
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
663
|
|
|
|
|
|
|
|
664
|
0
|
|
|
0
|
|
|
sub from { my $self = shift; return $startpos_for{$self} } |
|
0
|
|
|
|
|
|
|
665
|
0
|
|
|
0
|
|
|
sub to { my $self = shift; return $endpos_for{$self} } |
|
0
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
sub explanation { |
668
|
0
|
|
|
0
|
|
|
my $self = shift; |
669
|
0
|
|
0
|
0
|
|
|
return $EXPLANATION_FOR{lc Lingua::EN::Grammarian::_rewrite {s{\s+}{ }g} $match_for{$self}} |
|
0
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
// "Repeated word"; |
671
|
|
|
|
|
|
|
} |
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
sub explanation_hash { |
674
|
0
|
|
|
0
|
|
|
return {}; |
675
|
|
|
|
|
|
|
} |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
sub suggestions { |
678
|
0
|
|
|
0
|
|
|
my $self = shift; |
679
|
0
|
|
|
|
|
|
my $term = $match_for{$self}; |
680
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
# Locate suggestions... |
682
|
0
|
|
|
0
|
|
|
my $corrections_ref |
683
|
0
|
0
|
0
|
|
|
|
= $CORRECTIONS_FOR{lc Lingua::EN::Grammarian::_rewrite {s{\s+}{ }g} $term} |
684
|
|
|
|
|
|
|
// [$term =~ m{\A (\S+) \s+ \1 \z}ixms ? $1 : () ]; |
685
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
# Adjust their casings... |
687
|
0
|
|
|
|
|
|
return map { $_recase_like->($term, $_) } @{$corrections_ref}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
} |
689
|
|
|
|
|
|
|
} |
690
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
package Lingua::EN::Grammarian::Caution; { |
692
|
1
|
|
|
1
|
|
393
|
use Hash::Util::FieldHash 'fieldhash'; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
555
|
|
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
fieldhash my %match_for; |
695
|
|
|
|
|
|
|
fieldhash my %startpos_for; |
696
|
|
|
|
|
|
|
fieldhash my %endpos_for; |
697
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
sub new { |
699
|
0
|
|
|
0
|
|
|
my ($class, $term, $source_ref) = @_; |
700
|
|
|
|
|
|
|
|
701
|
0
|
|
|
|
|
|
my $newobj = bless \do{my $scalar}, $class; |
|
0
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
|
703
|
0
|
|
|
|
|
|
my $endindex = pos(${$source_ref}) - 1; |
|
0
|
|
|
|
|
|
|
704
|
0
|
|
|
|
|
|
my $startindex = pos(${$source_ref}) - length($term); |
|
0
|
|
|
|
|
|
|
705
|
0
|
|
|
|
|
|
my $startline = 1 + substr(${$source_ref},0,$startindex) =~ tr/\n//; |
|
0
|
|
|
|
|
|
|
706
|
0
|
|
|
|
|
|
my $endline = 1 + substr(${$source_ref},0,$endindex) =~ tr/\n//; |
|
0
|
|
|
|
|
|
|
707
|
0
|
|
|
0
|
|
|
my $startcol = 1 + length(Lingua::EN::Grammarian::_rewrite { s{\A.*\n}{}xms } substr(${$source_ref},0,$startindex)); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
708
|
0
|
|
|
0
|
|
|
my $endcol = 1 + length(Lingua::EN::Grammarian::_rewrite { s{\A.*\n}{}xms } substr(${$source_ref},0,$endindex)); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
|
710
|
0
|
|
|
|
|
|
$match_for{$newobj} = $term; |
711
|
0
|
|
|
|
|
|
$startpos_for{$newobj} = { index => $startindex, line => $startline, column => $startcol }; |
712
|
0
|
|
|
|
|
|
$endpos_for{$newobj} = { index => $endindex, line => $endline, column => $endcol }; |
713
|
|
|
|
|
|
|
|
714
|
0
|
|
|
|
|
|
return $newobj; |
715
|
|
|
|
|
|
|
} |
716
|
|
|
|
|
|
|
|
717
|
0
|
|
|
0
|
|
|
sub match { my $self = shift; return $match_for{$self} } |
|
0
|
|
|
|
|
|
|
718
|
1
|
|
|
1
|
|
7
|
use overload q{""} => sub { my $self = shift; return $match_for{$self} }; |
|
1
|
|
|
0
|
|
2
|
|
|
1
|
|
|
|
|
8
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
719
|
|
|
|
|
|
|
|
720
|
0
|
|
|
0
|
|
|
sub from { my $self = shift; return $startpos_for{$self} } |
|
0
|
|
|
|
|
|
|
721
|
0
|
|
|
0
|
|
|
sub to { my $self = shift; return $endpos_for{$self} } |
|
0
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
sub explanation { |
724
|
0
|
|
|
0
|
|
|
my $self = shift; |
725
|
0
|
|
|
0
|
|
|
my $target = lc Lingua::EN::Grammarian::_rewrite {s{\s+}{ }g} $match_for{$self}; |
|
0
|
|
|
|
|
|
|
726
|
0
|
|
|
|
|
|
my $suggested = $CAUTIONS_FOR{$target}; |
727
|
0
|
0
|
|
|
|
|
return if !defined $suggested; |
728
|
0
|
|
|
|
|
|
return $suggested->{explanation}; |
729
|
|
|
|
|
|
|
} |
730
|
|
|
|
|
|
|
|
731
|
|
|
|
|
|
|
sub explanation_hash { |
732
|
0
|
|
|
0
|
|
|
my $self = shift; |
733
|
0
|
|
|
0
|
|
|
my $target = lc Lingua::EN::Grammarian::_rewrite {s{\s+}{ }g} $match_for{$self}; |
|
0
|
|
|
|
|
|
|
734
|
0
|
|
|
|
|
|
my $suggested = $CAUTIONS_FOR{$target}; |
735
|
0
|
0
|
|
|
|
|
return if !defined $suggested; |
736
|
0
|
|
|
|
|
|
return { split /\s+:\s+|\s*\n/, $suggested->{explanation} }; |
737
|
|
|
|
|
|
|
} |
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
sub suggestions { |
740
|
0
|
|
|
0
|
|
|
my $self = shift; |
741
|
0
|
|
|
0
|
|
|
my $target = lc Lingua::EN::Grammarian::_rewrite {s{\s+}{ }g} $match_for{$self}; |
|
0
|
|
|
|
|
|
|
742
|
0
|
|
|
|
|
|
my $suggested = $CAUTIONS_FOR{$target}; |
743
|
0
|
0
|
|
|
|
|
return if !defined $suggested; |
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
# Reorder suggestions by relevance to term... |
746
|
0
|
0
|
|
|
|
|
return map { $_recase_like->($match_for{$self}, $_) } |
|
0
|
0
|
|
|
|
|
|
747
|
|
|
|
|
|
|
sort { |
748
|
0
|
|
|
|
|
|
$a eq $target ? -1 |
749
|
|
|
|
|
|
|
: $b eq $target ? +1 |
750
|
|
|
|
|
|
|
: $a cmp $b |
751
|
|
|
|
|
|
|
} |
752
|
0
|
|
|
|
|
|
@{ $suggested->{inflexions} } |
753
|
|
|
|
|
|
|
} |
754
|
|
|
|
|
|
|
} |
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
|
757
|
|
|
|
|
|
|
1; # Magic true value required at end of module |
758
|
|
|
|
|
|
|
__END__ |