File Coverage

blib/lib/Lingua/EN/Grammarian.pm

Criterion	Covered	Total	%
statement	179	331	54.0
branch	62	100	62.0
condition	32	61	52.4
subroutine	23	59	38.9
pod	7	9	77.7
total	303	560	54.1

line	stmt	bran	cond	sub	pod	time	code
1							package Lingua::EN::Grammarian;
2							our $VERSION = '0.000005';
3
4	1			1		31522	use 5.010; use warnings;
	1			1		5
	1					38
	1					7
	1					2
	1					30
5	1			1		5	use Carp;
	1					6
	1					286
6
7							# Standard config files...
8							my $CAUTIONS_FILE = 'grammarian_cautions';
9							my $ERRORS_FILE = 'grammarian_errors';
10
11							# Standard config file search path...
12							my @CONFIG_PATH = ( '/usr/local/share/grammarian/', "$ENV{HOME}/", "$ENV{PWD}/" );
13
14
15							# Export interface...
16							my @DEF_EXPORTS = qw<
17							extract_cautions_from
18							extract_errors_from
19							>;
20
21							my @ALL_EXPORTS = (
22							@DEF_EXPORTS,
23							qw<
24							get_coverage_stats
25							get_error_at
26							get_next_error_at
27							get_caution_at
28							get_next_caution_at
29							get_vim_error_regexes
30							get_vim_caution_regexes
31							>
32							);
33
34							sub import {
35	1			1		18	my $self = shift;
36	1	50				9	my @exports = @_ ? @_ : @DEF_EXPORTS;
37	1					4	my $caller = caller;
38	1	50				3	for my $exported_sub (map { /^:all$/i ? @ALL_EXPORTS : $_ } @exports) {
	2					17
39	1			1		5	no strict 'refs';
	1					1
	1					1270
40	2					3	my $impl = *{$exported_sub}{CODE};
	2					9
41	2	50	33			21	croak "$self does not provide $exported_sub()"
42							if $exported_sub !~ /^(?:get_\|extract_)/ \|\| !$impl;
43	2					2	*{$caller.'::'.$exported_sub} = $impl;
	2					50
44							}
45							}
46
47							# The data extracted from those files...
48							my %CAUTIONS_FOR;
49							my $CAUTIONS_REGEX;
50							my @VIM_CAUTION_REGEX_COMPONENTS;
51
52							my %CORRECTIONS_FOR;
53							my %EXPLANATION_FOR;
54							my $ERRORS_REGEX;
55							my @VIM_ERROR_REGEX_COMPONENTS;
56
57							my $VIM_REGEX_MAX_LEN = 20000;
58
59							# Improved \b...
60							my $SPACE_TRANSITION = qr{
61							# Preceded by... And followed by...
62							(?<=[[:space:][:punct:]]) (?=[^[:space:][:punct:]])
63							\| \A (?=[^[:space:][:punct:]])
64							\| (?<=[^[:space:][:punct:]]) (?=[[:space:][:punct:]]\|\z)
65							}xms;
66
67							# Extract that data...
68							if (! _load_cautions()) {
69							warn qq{No "grammarian_cautions" file found in config search path:\n}
70							. qq{\n}
71							. join(q{}, map { qq{ $_\n} } @CONFIG_PATH)
72							. qq{\n}
73							. qq{(Did you forget to install it from the distribution?)\n};
74							}
75
76							if (! _load_errors()) {
77							warn qq{No "grammarian_errors" file found in config search path:\n}
78							. qq{\n}
79							. join(q{}, map { qq{ $_\n} } @CONFIG_PATH)
80							. qq{\n}
81							. qq{(Did you forget to install it from the distribution?)\n};
82							}
83
84							sub _rewrite (&$) {
85	50106			50106		127925	my ($transform, $text) = @_;
86	50106					121596	$transform->() for $text;
87	50106					244389	return $text;
88							}
89
90							sub _inflect_term {
91	10612			10612		24315	my ($term) = @_;
92
93	10612					44691	my $PRONOUN_MARKER = qr{
94							< (?: I \| s?he \| we \| me \| him \| us \| my \| his \| our \| mine \| hers \| ours) >
95							}xms;
96
97	10612					104904	my %PRONOUN_EXPANSION_FOR = (
98							'' => q{I,you,she,he,it,we,they},
99							'' => q{she,he},
100							'' => q{he,she},
101							'' => q{we,you,they},
102							'' => q{me,you,her,him,it,us,them},
103							'' => q{her,him},
104							'' => q{him,her},
105							'' => q{us,you,them},
106							'' => q{my,your,hers,his,its,our,their},
107							'' => q{her,his},
108							'' => q{our,your,their},
109							'' => q{mine,yours,hers,his,its,ours,theirs},
110							'' => q{hers,his},
111							'' => q{ours,yours,theirs},
112							);
113
114
115							# Preprocess expansions...
116	10612					58380	my @components = split /($PRONOUN_MARKER)/, $term;
117	10612					15611	$term = q{};
118	10612					14714	my $in_parens = 0;
119	10612					28486	while (@components) {
120	10714					31348	my ($prefix, $pronoun) = splice(@components, 0, 2);
121	10714					25680	$in_parens += ($prefix=~tr/(//) - ($prefix=~tr/)//);
122	10714					23116	$term .= $prefix;
123	10714	100				44286	if ($pronoun) {
124	170	100	33			994	$term .= ($in_parens ? '' : '(')
		100
125							. ($PRONOUN_EXPANSION_FOR{$pronoun} // $pronoun)
126							. ($in_parens ? '' : ')')
127							}
128							}
129
130							# Convert any parenthesized or starred set of alternatives...
131	10612					14273	my @inflexions;
132	10612					424855	$term =~ s{ (? \S*? (? \S? ) )
133							(?:
134							(? e [*] )
135							\| (? ch [*] )
136							\| (? (?<= [^aeiou] ) y [*] )
137							\| (? (?<= [^aeiou] ) y [(] s [)] )
138							\| (? [][] )
139							\| (? [*] )
140							\| [(] (? [^)]+ ) [)]
141							)
142							}
143							{
144	1			1		1070	my $ll = $+{last_letter};
	1					591
	1					830
	6665					44447
145	6665	100				251889	@inflexions = $+{e_star} ? ( 'e', 'es', 'ed', 'ing')
		50
		100
		100
		100
		100
		100
		100
146							: $+{ch_star} ? ( 'ch', 'ches', 'ched', 'ching')
147							: $+{y_star} ? ( 'y', 'ies', 'ied', 'ying')
148							: $+{y_s} ? ( 'y', 'ies', )
149							: $+{double_star} ? ( '', 's', $ll.'ed', $ll.'ing')
150							: $+{star} ? ( '', 's', 'ed', 'ing')
151							: $+{alts} ? ( ($+{root} ? '' : ()), split(',', $+{alts}) )
152							: ();
153
154	6665					83706	qq{$+{root}*};
155							}xmse;
156
157	10612	100				85767	return @inflexions ? map { my $infl = $term; $infl =~ s{[*]}{$_}; $infl} @inflexions
	27030					42548
	27030					74617
	27030					133599
158							: $term;
159							}
160
161							# Parse cautions file and convert to internal data structures...
162							sub _load_cautions {
163							# Gather config from current directory and home directory...
164	6					142	local @ARGV = grep { -e }
	3					13
165	1			1		3	map { ("$_.$CAUTIONS_FILE", "$_$CAUTIONS_FILE") }
166							@CONFIG_PATH;
167
168							# If no config, we're done...
169	1	50				9	return if !@ARGV;
170
171							# Store sets of terms together...
172	1					6	my @term_sets = { terms => [], defns => [], inflexions => [] };
173
174							# Parse configuration file...
175							LINE:
176	1					79	while (my $next_line = readline) {
177							# Ignore comments...
178	646	100				1594	next LINE if $next_line =~ m{ \A \h* [#] }xms;
179
180							# Blank lines delimit new term sets...
181	637	100				2024	if ($next_line =~ m{\A \h* \Z}xms) {
182	208					808	push @term_sets, { terms => [], defns => [], inflexions => [] };
183	208					899	next LINE;
184							}
185
186							# Parse config line...
187	429					4714	$next_line =~ m{
188							\A
189							(? -? )
190							\h* (? [^:]*? )
191							(?:
192							\h* :
193							\h* (? .*? )
194							)?
195							\h*
196							\Z
197							}xms;
198
199							# Unpack components...
200	429					2267	my $term = $+{term};
201	429		100			2675	my $defn = $+{defn} // q{};
202	429					2034	my $is_silent = length($+{is_silent});
203
204							# Warn of bad config...
205	429	50				1193	if (!defined $term) {
206	0					0	warn "Invalid entry in grammarian_cautions: $next_line";
207	0					0	next LINE;
208							}
209
210							# Unpack any inflexions...
211	429					804	my @inflexions = _inflect_term($term);
212
213	429					834	my $original = shift @inflexions;
214	429	100				1429	if ($defn =~ /\S/) {
215	422					441	push @{$term_sets[-1]{terms}}, $original;
	422					1072
216	422					612	push @{$term_sets[-1]{defns}}, $defn;
	422					831
217							}
218
219							# Store patterns to be matched...
220	429					533	my $order = 0;
221	429					703	for my $next_inflexion ($original, @inflexions) {
222	877					918	push @{ $term_sets[-1]{inflexions}[$order++] }, {silent => $is_silent, term => $next_inflexion};
	877					5398
223							}
224							}
225
226
227							# Compile list of cautions and the matching regex...
228	1					2	my @regex_components;
229							TERM_SET:
230	1					3	for my $term_set (@term_sets) {
231	209	100				210	next TERM_SET if !@{ $term_set->{terms} };
	209					860
232
233	1			1		7	use List::Util 'max';
	1					2
	1					3932
234	204					211	my $term_width = max map { length } @{ $term_set->{terms} };
	422					903
	204					356
235
236	422					2470	my $caution
237							= join q{},
238	204					395	map { sprintf("%-*s : %s\n", $term_width, $term_set->{terms}[$_], $term_set->{defns}[$_]) }
239	204					380	0..$#{ $term_set->{terms} };
240
241	204					354	for my $inflexion_set (@{ $term_set->{inflexions} }) {
	204					732
242	419					473	my $inflexions = [ map { $_->{term} } @{ $inflexion_set } ];
	877					2327
	419					650
243	419					829	for my $term_data (@{ $inflexion_set }) {
	419					594
244	877					1239	my $term = $term_data->{term};
245	877					1453	my $silent = $term_data->{silent};
246	877					5774	$CAUTIONS_FOR{lc $term} = {
247							display => $silent,
248							explanation => $caution,
249							inflexions => $inflexions
250							};
251	877	100				2194	if (!$silent) {
252	752			752		18186	push @regex_components, _rewrite { s{\h+}{\\s+}g } $term;
	752					7544
253	752			752		14848	push @VIM_CAUTION_REGEX_COMPONENTS, _rewrite { s{\h+}{\\_s\\+}g } $term;
	752					1788
254							}
255							}
256							}
257
258							}
259
260	1					471	my $cautions_regex = '\b(?' . join('\|', reverse sort @regex_components) . ')\b';
261	1					2163	$CAUTIONS_REGEX = qr{$cautions_regex}i;
262
263	1					816	return 1;
264							}
265
266							sub _gen_pres_participle_for {
267	250			250		1730	my ($verb) = @_;
268
269	250	100	100			10763	$verb =~ s/ie$/y/
			66
			66
			33
			33
			33
			66
			100
			100
			100
270							or $verb =~ s/ue$/u/
271							or $verb =~ s/([auy])e$/$1/
272							or $verb =~ s/ski$/ski/
273							or $verb =~ s/[^b]i$//
274							or $verb =~ s/^(are\|were)$/be/
275							or $verb =~ s/^(had)$/hav/
276							or $verb =~ s/(hoe)$/$1/
277							or $verb =~ s/([^e])e$/$1/
278							or $verb =~ m/er$/
279							or $verb =~ m/open$/
280							or $verb =~ s/([^aeiou][aeiouy]([bdgmnprst]))$/$1$2/;
281
282	250					1198	return "${verb}ing";
283							}
284
285							sub _gen_verb_errors {
286	250			250		3914	my ($pres, $third, $past, $pastp, $presp) = @_;
287
288							return (
289	250	100				9755	($pres ne $third ? (
		100
		100
		100
290							"====[ Incorrect inflexion of verb for the specified pronoun ]=================",
291							" (he,she,it) $pres --> (he,she,it) $third ",
292							" (I,you,we,they) $third --> (I,you,we,they) $pres ",
293
294							"====[ Incorrect inflexion of verb after a negated auxiliary ]=================",
295							"(did,would,should,could,must,might)n't $third "
296							." --> (did,would,should,could,must,might)n't $pres",
297							):()),
298
299							"====[ Incorrect inflexion of verb after a negated auxiliary ]=================",
300							"(did,would,should,could,must,might)n't $past "
301							." --> (did,would,should,could,must,might)n't $pres",
302
303							($past ne $pastp ? (
304							"====[ Incorrect use of participle instead of simple past or past perfect ]====",
305							" (I,you,we,they) $pastp --> (I,you,we,they) $past "
306							." --> (I,you,we,they) have $pastp "
307							." --> (I,you,we,they) had $pastp ",
308
309							" (he,she,it) $pastp --> (he,she,it) $past "
310							." --> (he,she,it) has $pastp "
311							." --> (he,she,it) had $pastp ",
312
313							"====[ Incorrect use of simple past instead of past participle ]================",
314							" (be,being,been,was,were) $past --> (be,being,been,was,were) $pastp ",
315							" (has,had,have,having) $past --> (has,had,have,having) $pastp ",
316							" (be,being,been,was,were) $past --> (be,being,been,was,were) $pastp ",
317
318							"====[ Incorrect inflexion of verb after a negated auxiliary ]=================",
319							"(did,would,should,could,must,might)n't $pastp "
320							." --> (did,would,should,could,must,might)n't $pres",
321							):()),
322
323							"====[ Incorrect use of infinitive instead of past participle ]=================",
324							" (be,being,been,was,were) $pres --> (be,being,been,was,were) $pastp ",
325							" (has,had,have,having) $pres --> (has,had,have,having) $pastp ",
326
327							"====[ Incorrect use of participle instead of infinitive ]=================",
328							" to ($pastp,$presp) --> to $pres ",
329							($third ne $pres ?
330							" to $third --> to $pres "
331							:()),
332							($past ne $pastp ?
333							" to $past --> to $pres "
334							:()),
335
336							"====[ Incorrect use of present participle instead of past participle ]=========",
337							" being $presp --> being $pastp ",
338
339							"====[ Incorrect use of \"try and\" instead of \"try to\" ]=====================",
340							"try and ($pres,$past,$pastp,$presp) --> try to $pres ",
341
342							"====[ Incorrect inflexion of verb after \"try to\" ]===========================",
343							" try to ($past,$pastp,$presp) --> try to $pres ",
344							" tried to ($past,$pastp,$presp) --> tried to $pres ",
345							" trying to ($past,$pastp,$presp) --> trying to $pres ",
346							);
347							}
348
349							sub _gen_absolute_adjective_errors {
350	19			19		163	my ($adj, $modifier) = @_;
351	19		100			78	$modifier //= '';
352
353	19					55	my @QUALIFIERS = qw<
354							somewhat highly extremely totally completely absolutely utterly
355							>;
356	19					69	my $QUALIFIERS = '(' . join(',', @QUALIFIERS) . ')';
357
358	19					146	my @errors = (
359							"====[ Incorrect use of modifier with ungradeable adjective ]===================",
360							" more $adj --> $adj ",
361							" most $adj --> $adj ",
362							" quite $adj --> $adj ",
363							" rather $adj --> $adj ",
364							" very $adj --> $adj ",
365							" $QUALIFIERS $adj --> $adj ",
366							);
367
368	19	100				39	if ($modifier) {
369	5					21	$modifier =~ s{ \A [(] \| [)] \z}{}xgms;
370	5					15	for my $mod (split(',', $modifier)) {
371	6					13	$errors[1] .= " --> more $mod $adj";
372	6					14	$errors[2] .= " --> most $mod $adj";
373	6					14	$errors[3] .= " --> quite $mod $adj";
374	6					23	$errors[4] .= " --> rather $mod $adj";
375	6					18	$errors[5] .= " --> very $mod $adj";
376							}
377							}
378
379	19					85	return @errors;
380							}
381
382							sub _load_errors {
383							# Gather config from search path
384	6					157	local @ARGV = grep { -e }
	3					13
385	1			1		4	map { ("$_.$ERRORS_FILE", "$_$ERRORS_FILE") }
386							@CONFIG_PATH;
387
388							# If no config, we're done...
389	1	50				7	return if !@ARGV;
390
391							# Extract corrections...
392	1					1	my @regex_components;
393	1					3	my $explanation = '????';
394	1					3	my $last_was_explanation = 1;
395	1					3	my @insertions;
396							LINE:
397	1		100			267	while (my $next_line = shift(@insertions) // readline) {
398
399							# Ignore comment and empty lines...
400	7666	100				46512	next LINE if $next_line =~ m{\A \h* (?: [#] \| \Z )}xms;
401
402							# Handle explanation lines...
403	7398	100				81944	if ($next_line =~ m{\A \h* ===\S* \h* (.?) \h \S===. \Z }xms) {
404	2316	100				11121	$explanation = $last_was_explanation ? "$explanation\n$1" : $1;
405	2316					3116	$last_was_explanation = 1;
406	2316					11556	next LINE;
407							}
408	5082					6830	$last_was_explanation = 0;
409
410							# Generate errors from a specification...
411	5082	100				13548	if ($next_line =~ m{\A\h* \h* (?\S+) \h* (?\S+) \h* (?\S+) \h* (?\S+)}xms) {
412	250					2789	push @insertions, _gen_verb_errors(@+{qw}, _gen_pres_participle_for($+{pres}));
413	250					2120	next LINE;
414							}
415
416							# Generate errors from an specification...
417	4832	100				11787	if ($next_line =~ m{\A\h* \S+) \h)?> \h (?\S+) }xms) {
418	19					445	push @insertions, _gen_absolute_adjective_errors( @+{qw< adjective modifier >} );
419	19					106	next LINE;
420							}
421
422							# Extract error --> correction pair...
423	4813					68665	$next_line =~ m{
424							\A \h*
425							(? .*? )
426							\h* --> \h*
427							(? .*? )
428							\h* \Z
429							}xms;
430	4813					75361	my ($error, $correction) = @+{'error', 'correction'};
431
432							# Ignore invalid lines...
433	4813	50				29962	next LINE if !defined $error;
434
435							# Expand inflected forms...
436	4813					12941	my @error_inflexions = _inflect_term($error);
437							my @corrections_inflections
438	4813					19187	= map {[_inflect_term($_)]}
	5370					11205
439							split /\h+-->\h+/,
440							$correction;
441
442							# Iterated inflections in parallel...
443	4813					15051	for my $next (0..$#error_inflexions) {
444	15639					66596	my $error = $error_inflexions[$next];
445
446							# Build normalized transform from error to each correction...
447	15639					26267	for my $correction (@corrections_inflections) {
448	17324			17324		86622	my $normalized_error = _rewrite { s{\h+}{ }gxms } lc $error;
	17324					132697
449	17324		66			62389	push @{$CORRECTIONS_FOR{$normalized_error}},
	17324					120321
450							$correction->[$next] // $correction->[-1];
451
452							# Record explanation...
453	17324					92679	$EXPLANATION_FOR{$normalized_error} = $explanation;
454							}
455
456							# Remember error for eventual regexes (with generalized whitespace)...
457	15639			15639		74232	push @regex_components, _rewrite { s{\h+}{\\s+}g } $error;
	15639					105595
458	15639			15639		87011	push @VIM_ERROR_REGEX_COMPONENTS, _rewrite { s{\h+}{\\_s\\+}g } $error;
	15639					112133
459							}
460							}
461
462							# Build error-detecting regex...
463	1					32206	my $ERRORONEOUS_TERM = join('\|', reverse sort @regex_components);
464	1					133706	$ERRORS_REGEX = qr{
465							$SPACE_TRANSITION
466							( $ERRORONEOUS_TERM \| (?&REPEATED_WORD) )
467							$SPACE_TRANSITION
468
469							(?(DEFINE)
470							(? (? \S++) \s++ \k )
471							)
472							}ixms;
473
474	1					13371	return 1;
475							}
476
477							# Apply regexes to detect offending terms...
478							sub extract_cautions_from {
479	0			0	1		my ($text) = @_;
480
481	0						state %cautions_cache;
482	0	0					if (!exists $cautions_cache{$text}) {
483	0						my $cache = $cautions_cache{$text} = [];
484	0						while ($text =~ m{\G .*? $CAUTIONS_REGEX}gcxms) {
485	0						push @{$cache}, Lingua::EN::Grammarian::Caution->new($1,\$text);
	0
486							}
487							}
488
489	0						return @{ $cautions_cache{$text} };
	0
490							}
491
492							sub extract_errors_from {
493	0			0	1		my ($text) = @_;
494
495	0						state %errors_cache;
496	0	0					if (!exists $errors_cache{$text}) {
497	0						my $cache = $errors_cache{$text} = [];
498	0						while ($text =~ m{\G .*? $ERRORS_REGEX}gcxms) {
499	0						push @{$cache}, Lingua::EN::Grammarian::Error->new($1,\$text);
	0
500							}
501							}
502
503	0						return @{ $errors_cache{$text} };
	0
504							}
505
506							# Report coverage...
507							sub get_coverage_stats {
508							return {
509	0			0	1		cautions => scalar keys %CAUTIONS_FOR,
510							errors => scalar keys %CORRECTIONS_FOR,
511							}
512							}
513
514							# Identify offences (if any) at a particular location...
515
516							sub get_error_at {
517	0			0	1		my ($text, $index_or_line, $col) = @_;
518	0						return _problem_in($text, [extract_errors_from($text)], $index_or_line, $col,\do{my $no_next});
	0
519							}
520
521							sub get_next_error_at {
522	0			0	0		my ($text, $index_or_line, $col) = @_;
523	0						state $prev_error_index = -1;
524	0						return _problem_in($text, [extract_errors_from($text)], $index_or_line, $col,\$prev_error_index);
525							}
526
527							sub get_caution_at {
528	0			0	1		my ($text, $index_or_line, $col) = @_;
529	0						return _problem_in($text, [extract_cautions_from($text)], $index_or_line, $col,\do{my $no_next});
	0
530							}
531
532							sub get_next_caution_at {
533	0			0	0		my ($text, $index_or_line, $col) = @_;
534	0						state $prev_caution_index = -1;
535	0						return _problem_in($text, [extract_cautions_from($text)], $index_or_line, $col,\$prev_caution_index);
536							}
537
538							sub _problem_in {
539	0			0			my ($text, $problems_ref, $index_or_line, $col, $prev_findex_ref) = @_;
540
541							# Convert line/col to index...
542	0	0					if (defined $col) {
543	0						$index_or_line -= 1;
544	0	0					$text =~ m{( \A (?: [^\n]* \n){$index_or_line} [^\n]{$col} )}xms
545							or return;
546	0						$index_or_line = length($1);
547							}
548
549							# Look for a hit...
550	0						for my $problem (@{$problems_ref}) {
	0
551	0						my $findex = $problem->from->{index};
552	0						my $tindex = $problem->to->{index};
553
554							# Cursor is "in" a problem...
555	0	0	0				if ($findex <= $index_or_line && $index_or_line <= $tindex && $findex != (${$prev_findex_ref} // -1)) {
	0	0	0
			0
556	0						${$prev_findex_ref} = $findex;
	0
557	0	0					return wantarray ? ($problem, 1) # There's a problem and the cursor is over it
558							: $problem;
559							}
560
561							# Cursor not in a problem, so return next problem...
562							elsif ($findex > $index_or_line) {
563	0						${$prev_findex_ref} = $findex;
	0
564	0	0					return wantarray ? ($problem, 0) # There's a problem and the cursor isn't over it
565							: undef;
566							}
567							}
568
569							# Otherwise, it's a miss...
570	0						return;
571							}
572
573
574							# Provide regexes for matching grammar problems in Vim...
575
576							sub get_vim_error_regexes {
577	0			0	1		_build_vim_regex_from(@VIM_ERROR_REGEX_COMPONENTS);
578							}
579
580							sub get_vim_caution_regexes {
581	0			0	1		_build_vim_regex_from(@VIM_CAUTION_REGEX_COMPONENTS);
582							}
583
584							sub _build_vim_regex_from {
585	0			0			my @regex_components = reverse sort @_;
586
587	0						my @regexes;
588	0						for my $alternative (@regex_components) {
589	0						$alternative =~ s/'/''/g;
590	0	0	0				if (@regexes && length($regexes[-1]) + length($alternative) + 10 < $VIM_REGEX_MAX_LEN) {
591	0						$regexes[-1] .= '\\\|' . $alternative;
592							}
593							else {
594	0						push @regexes, '\\c' . $alternative;
595							}
596							}
597	0						return map { '\<\%('.$_.'\)\>' } @regexes;
	0
598							}
599
600
601							my $UPPER_CASE_PAT = qr{\A [[:upper:]]* \Z}xms;
602							my $LOWER_CASE_PAT = qr{\A [[:lower:]]* \Z}xms;
603							my $TITLE_CASE_PAT = qr{\A [[:upper:]][[:lower:]]* \Z}xms;
604
605							# Convert a term to have the same capitalization as an original paradigm...
606							my $_recase_like = sub {
607							my ($paradigm, $target) = @_;
608
609							# Process two strings word-by-word...
610							my @paradigm_words = split($SPACE_TRANSITION, $paradigm);
611							my @target_words = split($SPACE_TRANSITION, $target );
612
613							while (@paradigm_words < @target_words) {
614							push @paradigm_words, $paradigm_words[-1];
615							}
616
617							# Accumulate modified target by transforming each word...
618							my $modified_target = "";
619							for my $next_paradigm (@paradigm_words) {
620							# If target completely processed, we're done...
621							last if !@target_words;
622
623							# Otherwise, convert target according to pattern of paradigm...
624							$modified_target .= $next_paradigm =~ $UPPER_CASE_PAT ? uc(shift @target_words)
625							: $next_paradigm =~ $LOWER_CASE_PAT ? lc(shift @target_words)
626							: $next_paradigm =~ $TITLE_CASE_PAT ? ucfirst(lc(shift @target_words))
627							: shift @target_words
628							;
629							}
630
631							return $modified_target;
632							};
633
634							package Lingua::EN::Grammarian::Error; {
635	1			1		1849	use Hash::Util::FieldHash 'fieldhash';
	1					1227
	1					556
636							_rewrite = Lingua::EN::Grammarian::_rewrite;
637
638							fieldhash my %match_for;
639							fieldhash my %startpos_for;
640							fieldhash my %endpos_for;
641
642							sub new {
643	0			0			my ($class, $term, $source_ref) = @_;
644
645	0						my $newobj = bless \do{my $scalar}, $class;
	0
646
647	0						my $endindex = pos(${$source_ref}) - 1;
	0
648	0						my $startindex = pos(${$source_ref}) - length($term);
	0
649	0						my $startline = 1 + substr(${$source_ref},0,$startindex) =~ tr/\n//;
	0
650	0						my $endline = 1 + substr(${$source_ref},0,$endindex) =~ tr/\n//;
	0
651	0			0			my $startcol = 1 + length(Lingua::EN::Grammarian::_rewrite {s{\A.*\n}{}xms} substr(${$source_ref},0,$startindex));
	0
	0
652	0			0			my $endcol = 1 + length(Lingua::EN::Grammarian::_rewrite {s{\A.*\n}{}xms} substr(${$source_ref},0,$endindex));
	0
	0
653
654	0						$match_for{$newobj} = $term;
655	0						$startpos_for{$newobj} = { index => $startindex, line => $startline, column => $startcol };
656	0						$endpos_for{$newobj} = { index => $endindex, line => $endline, column => $endcol };
657
658	0						return $newobj;
659							}
660
661	0			0			sub match { my $self = shift; return $match_for{$self} }
	0
662	1			1		2015	use overload q{""} => sub { my $self = shift; return $match_for{$self} };
	1			0		9433
	1					15
	0					0
	0					0
663
664	0			0			sub from { my $self = shift; return $startpos_for{$self} }
	0
665	0			0			sub to { my $self = shift; return $endpos_for{$self} }
	0
666
667							sub explanation {
668	0			0			my $self = shift;
669	0		0	0			return $EXPLANATION_FOR{lc Lingua::EN::Grammarian::_rewrite {s{\s+}{ }g} $match_for{$self}}
	0
670							// "Repeated word";
671							}
672
673							sub explanation_hash {
674	0			0			return {};
675							}
676
677							sub suggestions {
678	0			0			my $self = shift;
679	0						my $term = $match_for{$self};
680
681							# Locate suggestions...
682	0			0			my $corrections_ref
683	0	0	0				= $CORRECTIONS_FOR{lc Lingua::EN::Grammarian::_rewrite {s{\s+}{ }g} $term}
684							// [$term =~ m{\A (\S+) \s+ \1 \z}ixms ? $1 : () ];
685
686							# Adjust their casings...
687	0						return map { $_recase_like->($term, $_) } @{$corrections_ref};
	0
	0
688							}
689							}
690
691							package Lingua::EN::Grammarian::Caution; {
692	1			1		393	use Hash::Util::FieldHash 'fieldhash';
	1					1
	1					555
693
694							fieldhash my %match_for;
695							fieldhash my %startpos_for;
696							fieldhash my %endpos_for;
697
698							sub new {
699	0			0			my ($class, $term, $source_ref) = @_;
700
701	0						my $newobj = bless \do{my $scalar}, $class;
	0
702
703	0						my $endindex = pos(${$source_ref}) - 1;
	0
704	0						my $startindex = pos(${$source_ref}) - length($term);
	0
705	0						my $startline = 1 + substr(${$source_ref},0,$startindex) =~ tr/\n//;
	0
706	0						my $endline = 1 + substr(${$source_ref},0,$endindex) =~ tr/\n//;
	0
707	0			0			my $startcol = 1 + length(Lingua::EN::Grammarian::_rewrite { s{\A.*\n}{}xms } substr(${$source_ref},0,$startindex));
	0
	0
708	0			0			my $endcol = 1 + length(Lingua::EN::Grammarian::_rewrite { s{\A.*\n}{}xms } substr(${$source_ref},0,$endindex));
	0
	0
709
710	0						$match_for{$newobj} = $term;
711	0						$startpos_for{$newobj} = { index => $startindex, line => $startline, column => $startcol };
712	0						$endpos_for{$newobj} = { index => $endindex, line => $endline, column => $endcol };
713
714	0						return $newobj;
715							}
716
717	0			0			sub match { my $self = shift; return $match_for{$self} }
	0
718	1			1		7	use overload q{""} => sub { my $self = shift; return $match_for{$self} };
	1			0		2
	1					8
	0					0
	0					0
719
720	0			0			sub from { my $self = shift; return $startpos_for{$self} }
	0
721	0			0			sub to { my $self = shift; return $endpos_for{$self} }
	0
722
723							sub explanation {
724	0			0			my $self = shift;
725	0			0			my $target = lc Lingua::EN::Grammarian::_rewrite {s{\s+}{ }g} $match_for{$self};
	0
726	0						my $suggested = $CAUTIONS_FOR{$target};
727	0	0					return if !defined $suggested;
728	0						return $suggested->{explanation};
729							}
730
731							sub explanation_hash {
732	0			0			my $self = shift;
733	0			0			my $target = lc Lingua::EN::Grammarian::_rewrite {s{\s+}{ }g} $match_for{$self};
	0
734	0						my $suggested = $CAUTIONS_FOR{$target};
735	0	0					return if !defined $suggested;
736	0						return { split /\s+:\s+\|\s*\n/, $suggested->{explanation} };
737							}
738
739							sub suggestions {
740	0			0			my $self = shift;
741	0			0			my $target = lc Lingua::EN::Grammarian::_rewrite {s{\s+}{ }g} $match_for{$self};
	0
742	0						my $suggested = $CAUTIONS_FOR{$target};
743	0	0					return if !defined $suggested;
744
745							# Reorder suggestions by relevance to term...
746	0	0					return map { $_recase_like->($match_for{$self}, $_) }
	0	0
747							sort {
748	0						$a eq $target ? -1
749							: $b eq $target ? +1
750							: $a cmp $b
751							}
752	0						@{ $suggested->{inflexions} }
753							}
754							}
755
756
757							1; # Magic true value required at end of module
758							__END__