File Coverage

blib/lib/Text/Extract/MaketextCallPhrases.pm

Criterion	Covered	Total	%
statement	187	201	93.0
branch	99	126	78.5
condition	80	107	74.7
subroutine	9	9	100.0
pod	2	2	100.0
total	377	445	84.7

line	stmt	bran	cond	sub	pod	time	code
1							package Text::Extract::MaketextCallPhrases;
2
3	6			6		271941	use strict;
	6					14
	6					229
4	6			6		34	use warnings;
	6					12
	6					240
5
6							$Text::Extract::MaketextCallPhrases::VERSION = '0.92';
7
8	6			6		7550	use Text::Balanced ();
	6					188296
	6					372
9	6			6		6136	use String::Unquotemeta ();
	6					1855
	6					172
10
11							# So we don't have to maintain an identical regex
12	6			6		5802	use Module::Want 0.3 ();
	6					8660
	6					427
13							my $ns_regexp = Module::Want::get_ns_regexp();
14
15							sub import {
16	6			6		42	no strict 'refs'; ## no critic
	6					14
	6					22656
17	6			6		84	*{ caller() . '::get_phrases_in_text' } = \&get_phrases_in_text;
	6					63
18	6					13	*{ caller() . '::get_phrases_in_file' } = \&get_phrases_in_file;
	6					12456
19							}
20
21							my $default_regexp_conf_item = [
22							qr/(?:(?:^\|\:\|\s\|=\|\()translatable\|(?:make\|lex)text(?:_[a-zA-Z0-9_]+_context)?)\s*\(?/,
23							sub { return substr( $_[0], -1, 1 ) eq '(' ? qr/\s\)/ : qr/\s\;/ },
24							];
25
26							sub get_phrases_in_text {
27
28							# 3rd arg is used internally to get the line number in the 'debug_ignored_matches' results when called via get_phrases_in_file(). Don't rely on this as it may change.
29	48			48	1	238846	my ( $text, $conf_hr, $linenum ) = @_; # 3rd arg is used internally to get the line number in the 'debug_ignored_matches' results when called via get_phrases_in_file(). Don't rely on this as it may change.
30
31	48		100			256	$conf_hr \|\|= {};
32
33	48	50				158	if ( $conf_hr->{'encode_unicode_slash_x'} ) {
34	0	0				0	Module::Want::have_mod('Encode') \|\| die $@;
35							}
36
37	48	100	66			184	if ( $conf_hr->{'cpanel_mode'} && $conf_hr->{'cpanel_mode'} != 0 ) {
38	1					3	$conf_hr->{'cpanel_mode'} = '0E0';
39	1					2	push @{ $conf_hr->{'regexp_conf'} }, [ qr/\/ ], [ qr/(?:^\|[^<])cptext\s$/, qr/\s$/ ], [ qr/Cpanel::Exception->new$/, qr/\s*$/ ];
	1					14
40							}
41
42	48					68	my @results;
43							my %offset_seen;
44
45							# I like this alignment better than what tidy does, seems clearer to me even if a bit overkill perhaps
46							#tidyoff
47	48	100				176	for my $regexp (
	2	100				8
48							$conf_hr->{'regexp_conf'} ? (
49	3					12	$conf_hr->{'no_default_regex'} ? @{ $conf_hr->{'regexp_conf'} }
50							: ( $default_regexp_conf_item, @{ $conf_hr->{'regexp_conf'} } )
51							)
52							: ($default_regexp_conf_item)
53							) {
54							#tidyon
55	53					117	my $text_working_copy = $text;
56	53					78	my $original_len = length($text_working_copy);
57
58	53		66			2014	while ( defined $text_working_copy && $text_working_copy =~ m/($regexp->[0]\|## no extract maketext)/ ) {
59	452					937	my $matched = $1;
60
61							# we have a (possibly multiline) chunk w/ notation-not-preceeded-by-token that we should ignore
62	452	100				914	if ( $matched eq '## no extract maketext' ) {
63	3					19	$text_working_copy =~ s/.## no extract maketext[^\n]//;
64	3					31	next;
65							}
66
67	449					469	my $pre;
68
69							# TODO: incorporate the \s* into results: 'post_token_ws' => $1 \|\| '' ?
70	449					5694	( $pre, $text_working_copy ) = split( m/$regexp->[0]\s/, $text_working_copy, 2 ); # the \s takes into account trailing WS that Text::Balanced ignores which then can throw off the offset
71
72							# we have a token line that we should ignore
73	449	100				2170	next if $text_working_copy =~ s/^[^\n]## no extract maketext[^\n]//;
74
75	443					669	my $offset = $original_len - length($text_working_copy);
76
77	443					432	my $phrase;
78	443					1915	my $result_hr = { 'is_error' => 0, 'is_warning' => 0, 'offset' => $offset, 'regexp' => $regexp, 'matched' => $matched };
79
80	443	100				1079	if ( $conf_hr->{'ignore_perlish_comments'} ) {
81
82							# ignore matches in a comment
83	6	100	100			31	if ( $pre =~ m/\#/ && $pre !~ m/[\n\r]$/ ) {
84	2					12	my @lines = split( /[\n\r]+/, $pre );
85
86	2	100				10	if ( $lines[-1] =~ m/\#/ ) {
87	1					3	$result_hr->{'type'} = 'comment';
88	1	50				4	$result_hr->{'line'} = $linenum if defined $linenum;
89	1					2	push @{ $conf_hr->{'debug_ignored_matches'} }, $result_hr;
	1					3
90	1					25	next;
91							}
92							}
93							}
94
95							# ignore functions named *$1
96	442	100				1490	if ( $text_working_copy =~ m/^\s*\{/ ) {
97	4					11	$result_hr->{'type'} = 'function';
98	4	50				12	$result_hr->{'line'} = $linenum if defined $linenum;
99	4					7	push @{ $conf_hr->{'debug_ignored_matches'} }, $result_hr;
	4					12
100	4					66	next;
101							}
102
103							# ignore assignments to things named *maketext
104	438	100				1243	if ( $text_working_copy =~ m/^\s*=/ ) {
105	2					5	$result_hr->{'type'} = 'assignment';
106	2	50				6	$result_hr->{'line'} = $linenum if defined $linenum;
107	2					3	push @{ $conf_hr->{'debug_ignored_matches'} }, $result_hr;
	2					13
108	2					36	next;
109							}
110
111	436	100				850	if ( $conf_hr->{'ignore_perlish_statement'} ) {
112
113							# ignore a statement named *maketext (e.g. goto &XYZ::maketext;)
114	3	100				9	if ( $text_working_copy =~ m/^\s*;/ ) {
115	1					4	$result_hr->{'type'} = 'statement';
116	1	50				9	$result_hr->{'line'} = $linenum if defined $linenum;
117	1					2	push @{ $conf_hr->{'debug_ignored_matches'} }, $result_hr;
	1					3
118	1					17	next;
119							}
120							}
121
122	435					1172	( $phrase, $text_working_copy ) = Text::Balanced::extract_variable($text_working_copy);
123
124	435	100				23075	if ( !$phrase ) {
125
126							# undef $@;
127	405					463	my ( $type, $inside, $opener, $closer );
128	405					991	( $phrase, $text_working_copy, undef, $type, $opener, $inside, $closer ) = Text::Balanced::extract_quotelike($text_working_copy);
129	405	50				48782	$text_working_copy = '' if !defined $text_working_copy;
130
131	405	100	100			3041	$result_hr->{'quotetype'} = 'single' if ( defined $opener && $opener eq "'" ) \|\| ( defined $type && ( $type eq 'q' \|\| $type eq 'qw' ) );
			100
			66
			66
132	405	100	100			2601	$result_hr->{'quotetype'} = 'double' if ( defined $opener && $opener eq '"' ) \|\| ( defined $type && $type eq 'qq' );
			100
			66
133	405	100				892	if ( $result_hr->{'quotetype'} ) {
134	252					577	$result_hr->{'quote_before'} = $type . $opener;
135	252					698	$result_hr->{'quote_after'} = $closer;
136							}
137
138	405	100	100			1466	if ( defined $type && $type eq '<<' ) {
139	22					55	$result_hr->{'quote_before'} = $type . $opener;
140	22					45	$result_hr->{'quote_after'} = $closer;
141
142	22					57	$result_hr->{'heredoc'} = $opener;
143	22	100				55	if ( substr( $opener, 0, 1 ) eq "'" ) {
144	19					36	$result_hr->{'quotetype'} = 'single';
145							}
146							else {
147	3					8	$result_hr->{'quotetype'} = 'double';
148							}
149							}
150
151	405	100	100			3330	if ( defined $inside && ( exists $result_hr->{'quotetype'} ) && $inside eq '' ) {
		100	100
		50	66
			33
			33
			0
			0
152	30					47	$result_hr->{'is_error'} = 1;
153	30					50	$result_hr->{'type'} = 'empty';
154	30					52	$phrase = $inside;
155							}
156							elsif ( defined $inside && $inside ) {
157	286					388	$phrase = $inside;
158
159	286	100	100			3279	if ( $type eq 'qw' ) {
		100	100
		100	100
			100
			100
160	21	100				78	if ( $phrase =~ m/\A(\s+)/ ) {
161	2					4	$result_hr->{'quote_before'} .= $1;
162	2					9	$phrase =~ s/\A(\s+)//;
163							}
164	21	100				82	if ( $phrase =~ m/(\s+)\z/ ) {
165	2					8	$result_hr->{'quote_after'} = $1 . $result_hr->{'quote_after'};
166	2					7	$phrase =~ s/(\s+)\z//;
167							}
168
169	21	100				78	if ( $phrase =~ m/(\s+)/ ) {
170	19					48	$result_hr->{'quote_after'} = $1;
171							}
172
173							# otherwise leave quote_after asis for cases like this: qw(foo)
174
175	21					72	($phrase) = split( /\s+/, $phrase, 2 );
176							}
177							elsif ( $type eq 'qx' \|\| $opener eq '`' ) {
178	12					18	$result_hr->{'is_warning'} = 1;
179	12					35	$result_hr->{'type'} = 'command';
180							}
181							elsif ( $type eq 'm' \|\| $type eq 'qr' \|\| $type eq 's' \|\| $type eq 'tr' \|\| $opener eq '/' ) {
182	30					44	$result_hr->{'is_warning'} = 1;
183	30					67	$result_hr->{'type'} = 'pattern';
184							}
185							}
186							elsif ( defined $opener && defined $inside && defined $closer && defined $phrase && $phrase eq "$opener$inside$closer" ) {
187	0					0	$result_hr->{'is_error'} = 1;
188	0					0	$result_hr->{'type'} = 'empty';
189	0					0	$phrase = $inside;
190							}
191							else {
192	89					113	my $is_no_arg = 0;
193	89	50				203	if ( defined $regexp->[1] ) {
194	89	50				220	if ( ref( $regexp->[1] ) eq 'CODE' ) {
		0
195	89					189	my $rgx = $regexp->[1]->($matched);
196	89	100				1334	if ( $text_working_copy =~ m/^$rgx/ ) {
197	37					108	$is_no_arg = 1;
198							}
199							}
200							elsif ( ref( $regexp->[1] ) eq 'Regexp' ) {
201	0					0	my $rgx = qr/^$regexp->[1]/;
202	0	0				0	if ( $text_working_copy =~ $rgx ) {
203	0					0	$is_no_arg = 1;
204							}
205							}
206							}
207
208	89	100				786	if ($is_no_arg) {
		100
209	37					62	$result_hr->{'is_error'} = 1;
210	37					82	$result_hr->{'type'} = 'no_arg';
211							}
212							elsif ( $text_working_copy =~ m/^\s(((?:\&\|\\\)?)$ns_regexp(?:\-\>$ns_regexp)?((?:\s*\()?))/o ) {
213	40					102	$phrase = $1;
214	40	100	66			211	my $perlish = $2 \|\| $3 ? 1 : 0;
215
216	40					465	$text_working_copy =~ s/\s(?:\&\|\\\)?$ns_regexp(?:\-\>$ns_regexp)?(?:\s\()?\s//o;
217
218	40					81	$result_hr->{'is_warning'} = 1;
219	40	100				136	$result_hr->{'type'} = $perlish ? 'perlish' : 'bareword';
220							}
221							}
222							}
223							else {
224	30					56	$result_hr->{'is_warning'} = 1;
225	30					76	$result_hr->{'type'} = 'perlish';
226							}
227
228	435	100				958	if ( !defined $phrase ) {
229	49					58	my $is_no_arg = 0;
230	49	50				115	if ( defined $regexp->[1] ) {
231	49	50				106	if ( ref( $regexp->[1] ) eq 'CODE' ) {
		0
232	49					99	my $rgx = $regexp->[1]->($matched);
233	49	100				779	if ( $text_working_copy =~ m/^$rgx/ ) {
234	37					96	$is_no_arg = 1;
235							}
236							}
237							elsif ( ref( $regexp->[1] ) eq 'Regexp' ) {
238	0					0	my $rgx = qr/^$regexp->[1]/;
239	0	0				0	if ( $text_working_copy =~ $rgx ) {
240	0					0	$is_no_arg = 1;
241							}
242							}
243							}
244
245	49	100				121	if ($is_no_arg) {
246	37					54	$result_hr->{'is_error'} = 1;
247	37					65	$result_hr->{'type'} = 'no_arg';
248							}
249							else {
250	12					24	$result_hr->{'is_warning'} = 1;
251	12					27	$result_hr->{'type'} = 'multiline';
252							}
253							}
254							else {
255	386					734	$result_hr->{'original_text'} = $phrase;
256
257							# make sure its wasn't a tricky variable in quotes like maketext("$foo->{zip}")
258							# '$foo->{zip}' ' $foo->{zip} ' " $foo->{zip} " to but that seems like a good idea to flag as wonky and in need of human follow up
259	386					1265	my ( $var, $for, $aft ) = Text::Balanced::extract_variable($phrase);
260	386	100	66			23785	if ( $var && defined $for && defined $aft && $for =~ m/\A\s\z/ && $aft =~ m/\A\s\z/ ) {
			66
			33
			33
261	34					64	$result_hr->{'is_warning'} = 1;
262	34					63	$result_hr->{'type'} = 'perlish';
263							}
264							else {
265	352	100				893	if ( exists $result_hr->{'quotetype'} ) {
266	270	100				663	if ( $result_hr->{'quotetype'} eq 'single' ) {
		50
267
268							# escape \n\t etc to preserver them during unquotemeta()
269	186					415	$phrase =~ s{(\\(?:n\|t\|f\|r\|a\|b))}{\\$1}g;
270							}
271							elsif ( $result_hr->{'quotetype'} eq 'double' ) {
272
273							# interpolate \n\t etc
274	84					180	$phrase =~ s{(\\(?:n\|t\|f\|r\|a\|b))}{eval qq{"$1"}}eg;
	6					388
275							}
276							}
277
278	352	50				652	if ( $conf_hr->{'encode_unicode_slash_x'} ) {
279
280							# Turn Unicode string \x{} into bytes strings
281	0					0	$phrase =~ s{(\\x\{[0-9a-fA-F]+\})}{Encode::encode_utf8( eval qq{"$1"} )}eg;
	0					0
282							}
283							else {
284
285							# Preserve Unicode string \x{} for unquotemeta()
286	352					585	$phrase =~ s{(\\)(x\{[0-9a-fA-F]+\})}{$1$1$2}g;
287							}
288
289							# Turn graphemes into characters to avoid quotemeta() problems
290	352					443	$phrase =~ s{((:?\\x[0-9a-fA-F]{2})+)}{eval qq{"$1"}}eg;
	2					133
291	352	100	100			1703	$phrase = String::Unquotemeta::unquotemeta($phrase) unless exists $result_hr->{'type'} && $result_hr->{'type'} eq 'perlish';
292							}
293							}
294
295	435					3369	$result_hr->{'phrase'} = $phrase;
296
297	435	50				8665	push @results, $result_hr if ++$offset_seen{ $result_hr->{'offset'} } == 1;
298							}
299							}
300
301	48					274	return [ sort { $a->{'offset'} <=> $b->{'offset'} } @results ];
	451					939
302							}
303
304							sub get_phrases_in_file {
305	1			1	1	3344	my ( $file, $regex_conf ) = @_;
306
307	1	50				54	open my $fh, '<', $file or return;
308
309	1					3	my @results;
310	1					3	my $prepend = '';
311	1					2	my $linenum = 0;
312	1					3	my $in_multi_line = 0;
313	1					2	my $line; # buffer
314
315	1					17	while ( $line = readline($fh) ) {
316	12					14	$linenum++;
317
318	12					17	my $initial_result_count = @results;
319	12	100				19	push @results, map { $_->{'file'} = $file; $_->{'line'} = $in_multi_line ? $in_multi_line : $linenum; $_ } @{ get_phrases_in_text( $prepend . $line, $regex_conf, $linenum ) };
	9					23
	9					24
	9					22
	12					40
320	12					24	my $updated_result_count = @results;
321
322	12	50	66			137	if ( $in_multi_line && $updated_result_count == $initial_result_count ) {
		100	66
		100	100
			66
			100
			66
323	0					0	$prepend = $prepend . $line;
324	0					0	next;
325							}
326							elsif ( $in_multi_line && $updated_result_count > $initial_result_count && $results[-1]->{'type'} ) {
327	4					8	$prepend = $prepend . $line;
328	4					6	pop @results;
329	4					22	next;
330							}
331							elsif ( !$in_multi_line && @results && defined $results[-1]->{'type'} && $results[-1]->{'type'} eq 'multiline' ) {
332	2					3	$in_multi_line = $linenum;
333	2					6	my $trailing_partial = pop @results;
334
335	2					12	require bytes;
336	2	50				11	my $offset = $trailing_partial->{'offset'} > bytes::length( $prepend . $line ) ? bytes::length( $prepend . $line ) : $trailing_partial->{'offset'};
337	2					14	$prepend = $trailing_partial->{'matched'} . substr( "$prepend$line", $offset );
338	2					14	next;
339							}
340							else {
341	6					8	$in_multi_line = 0;
342	6					32	$prepend = '';
343							}
344							}
345
346	1					25	close $fh;
347
348	1					7	return \@results;
349							}
350
351							1;
352
353							__END__