File Coverage

blib/lib/Text/FindIndent.pm
Criterion Covered Total %
statement 185 224 82.5
branch 113 162 69.7
condition 39 61 63.9
subroutine 12 14 85.7
pod 2 2 100.0
total 351 463 75.8


line stmt bran cond sub pod time code
1             package Text::FindIndent;
2             # -*- mode: Perl -*-
3             # Emacs mode is necessary for https://github.com/github/linguist/issues/2458
4              
5 2     2   479842 use 5.00503;
  2         7  
6 2     2   12 use strict;
  2         10  
  2         80  
7              
8 2     2   11 use vars qw{$VERSION};
  2         5  
  2         127  
9             BEGIN {
10 2     2   66 $VERSION = '0.12';
11             }
12              
13 2     2   13 use constant MAX_LINES => 500;
  2         3  
  2         9437  
14              
15             sub parse {
16 21     21 1 201535 my $class = shift;
17 21         43 my $text = shift;
18              
19 21         88 my %opts = @_;
20 21 50       69 my $textref = ref($text) ? $text : \$text; # accept references, too
21              
22 21         47 my $skip_pod = $opts{skip_pod};
23 21 50       55 my $first_level_indent_only = $opts{first_level_indent_only}?1:0;
24              
25 21         35 my %modeline_settings;
26              
27             my %indentdiffs;
28 21         68 my $lines = 0;
29 21         39 my $prev_indent = undef;
30 21         31 my $skip = 0;
31 21         53 my $in_pod = 0;
32              
33             # Do we have emacs smart comments?
34 21         81 $class->_check_emacs_local_variables_at_file_end($textref, \%modeline_settings);
35 21 50 33     678 if (exists $modeline_settings{softtabstop} and exists $modeline_settings{usetabs}) {
    50 66        
    100 66        
36             $modeline_settings{mixedmode} = $modeline_settings{usetabs}
37 0 0       0 if not defined $modeline_settings{mixedmode};
38             return(
39             ($modeline_settings{mixedmode} ? "m" : "s")
40             . $modeline_settings{softtabstop}
41 0 0       0 );
42             }
43             elsif (exists $modeline_settings{tabstop} and $modeline_settings{usetabs}) {
44 0 0       0 return( ($modeline_settings{mixedmode} ? "m" : "t") . $modeline_settings{tabstop} );
45             }
46             elsif (exists $modeline_settings{tabstop} and exists $modeline_settings{usetabs}) {
47 1         28 return( "s" . $modeline_settings{tabstop} );
48             }
49              
50 20         34 my $next_line_braces_pos_plus_1;
51 20         76 my $prev_indent_type = undef;
52 20         119 while ($$textref =~ /\G([ \t]*)([^\r\n]*)[\r\n]+/cgs) {
53 404         1002 my $ws = $1;
54 404         737 my $rest = $2;
55 404         775 my $fullline = "$ws$rest";
56 404         789 $lines++;
57            
58             # check emacs start line stuff with some slack (shebang)
59 404         587 my $changed_modelines;
60 404 100       883 if ($lines < 3) {
61 37         149 $changed_modelines = $class->_check_emacs_local_variables_first_line($fullline, \%modeline_settings);
62             }
63              
64             # Do we have emacs smart comments?
65             # ==> Done once at start
66             #$class->_check_emacs_local_variables($fullline, \%modeline_settings);
67              
68             # Do we have vim smart comments?
69 404 100 100     1237 if ($class->_check_vim_modeline($fullline, \%modeline_settings) || $changed_modelines) {
70 10 100 100     88 if (exists $modeline_settings{softtabstop} and exists $modeline_settings{usetabs}) {
    100 66        
    100 66        
71             $modeline_settings{mixedmode} = $modeline_settings{usetabs}
72 2 50       10 if not defined $modeline_settings{mixedmode};
73             return(
74             ($modeline_settings{mixedmode} ? "m" : "s")
75             . $modeline_settings{softtabstop}
76 2 100       22 );
77             }
78             elsif (exists $modeline_settings{tabstop} and $modeline_settings{usetabs}) {
79 2 100       24 return( ($modeline_settings{mixedmode} ? "m" : "t") . $modeline_settings{tabstop} );
80             }
81             elsif (exists $modeline_settings{tabstop} and exists $modeline_settings{usetabs}) {
82 3         28 return( "s" . $modeline_settings{tabstop} );
83             }
84             }
85              
86 397 50       988 if ($lines > MAX_LINES) {
87 0         0 next;
88             }
89              
90 397 50       809 if ($skip) {
91 0         0 $skip--;
92 0         0 next;
93             }
94              
95 397 100 100     1999 if ($skip_pod and $ws eq '' and substr($rest, 0, 1) eq '=') {
      100        
96 4 100 66     38 if (not $in_pod and $rest =~ /^=(?:head\d|over|item|back|pod|begin|for|end)/ ) {
    50 33        
97 2         6 $in_pod = 1;
98             }
99             elsif ($in_pod and $rest =~ /^=cut/) {
100 2         5 $in_pod = 0;
101             }
102              
103             }
104 397 100 100     1396 next if $in_pod or $rest eq '';
105              
106 352 100       700 if ($ws eq '') {
107 99         176 $prev_indent = $ws;
108 99         509 next;
109             }
110              
111             # skip next line if the last char is a backslash.
112             # Doesn't matter for Perl, but let's be generous!
113 253 50       658 $skip = 1 if $rest =~ /\\$/;
114            
115             # skip single-line comments
116 253 50       897 next if $rest =~ /^(?:#|\/\/|\/\*)/; # TODO: parse /* ... */!
117              
118 253 100       518 if ($next_line_braces_pos_plus_1) {
119 10 100       22 if ($next_line_braces_pos_plus_1==_length_with_tabs_converted($ws)) {
120 8         42 next;
121             }
122 2         4 $next_line_braces_pos_plus_1=0;
123             } else {
124 243 100       556 if ($rest =~ /=> \{$/) { #handle case where hash keys and values are indented by braces pos + 1
125 2         9 $next_line_braces_pos_plus_1=_length_with_tabs_converted($ws)+length($rest);
126             }
127             }
128              
129 245 50 33     522 if ($first_level_indent_only and $prev_indent ne '') {
130 0         0 next;
131             }
132              
133 245 100       507 if ($prev_indent eq $ws) {
134 70 50       141 if ($prev_indent_type) {
135 70         189 $indentdiffs{$prev_indent_type}+=0.01;
136             #coefficient is not based on data, so change if you think it should be different
137             }
138 70         343 next;
139             }
140              
141             # prefix-matching higher indentation level
142 175 100       3731 if ($ws =~ /^\Q$prev_indent\E(.+)$/) {
143 75         251 my $diff = $1;
144 75         207 my $indent_type=_analyse_indent_diff($diff);
145 75         220 $indentdiffs{$indent_type}++;
146 75         127 $prev_indent_type=$indent_type;
147 75         123 $prev_indent = $ws;
148 75         426 next;
149             }
150              
151             # prefix-matching lower indentation level
152 100 100       1439 if ($prev_indent =~ /^\Q$ws\E(.+)$/) {
153 42         134 my $diff = $1;
154             #_grok_indent_diff($diff, \%indentdiffs);
155 42         99 my $indent_type=_analyse_indent_diff($diff);
156 42         148 $indentdiffs{$indent_type}++;
157 42         63 $prev_indent_type=$indent_type;
158 42         72 $prev_indent = $ws;
159 42         2801 next;
160             }
161              
162             # at this point, we're desperate!
163 58         136 my $prev_spaces = $prev_indent;
164 58         408 $prev_spaces =~ s/[ ]{0,7}\t/ /g;
165 58         120 my $spaces = $ws;
166 58         268 $spaces =~ s/[ ]{0,7}\t/ /g;
167 58         133 my $len_diff = length($spaces) - length($prev_spaces);
168 58 50       140 if ($len_diff) {
169 58         100 $len_diff = abs($len_diff);
170 58         178 $indentdiffs{"m$len_diff"}++;
171             }
172 58         320 $prev_indent = $ws;
173            
174             } # end while lines
175              
176             # nothing found
177 13 100       39 return 'u' if not keys %indentdiffs;
178              
179 12         23 my $max = 0;
180 12         21 my $maxkey = undef;
181 12         52 while (my ($key, $value) = each %indentdiffs) {
182 27 100       126 $maxkey = $key, $max = $value if $value > $max;
183             }
184              
185 12 100       66 if ($maxkey =~ /^s(\d+)$/) {
186 8         50 my $mixedkey = "m" . $1;
187 8         18 my $mixed = $indentdiffs{$mixedkey};
188 8 100 66     48 if (defined($mixed) and $mixed >= $max * 0.2) {
189 3         9 $maxkey = $mixedkey;
190             }
191             }
192              
193             # fallback to emacs styles which are guesses only
194 12         33 foreach my $key (qw(softtabstop tabstop usetabs)) {
195 36 100 100     164 if (not exists $modeline_settings{$key}
196             and exists $modeline_settings{"style_$key"}) {
197 6         18 $modeline_settings{$key} = $modeline_settings{"style_$key"};
198             }
199             }
200              
201 12 100       44 if (exists $modeline_settings{softtabstop}) {
    100          
202 3         28 $maxkey =~ s/\d+/$modeline_settings{softtabstop}/;
203             }
204             elsif (exists $modeline_settings{tabstop}) {
205 1         10 $maxkey =~ s/\d+/$modeline_settings{tabstop}/;
206             }
207 12 100       61 if (exists $modeline_settings{usetabs}) {
208 2 50       11 if ($modeline_settings{usetabs}) {
209 2 100       11 $maxkey =~ s/^(.)(\d+)$/$1 eq 'u' ? "t8" : ($2 == 8 ? "t8" : "m$2")/e;
  2 50       18  
210             }
211             else {
212 0         0 $maxkey =~ s/^./m/;
213             }
214             }
215              
216             # mixedmode explicitly asked for
217 12 50       31 if ($modeline_settings{mixedmode}) {
218 0         0 $maxkey =~ s/^./m/;
219             }
220              
221 12         96 return $maxkey;
222             }
223              
224             sub _length_with_tabs_converted {
225 12     12   25 my $str=shift;
226 12   50     36 my $tablen=shift || 8;
227 12         60 $str =~ s/( +)$//;
228 12   50     48 my $trailing_spaces = $1||'';
229 12         22 $str =~ s/ +//g; # assume the spaces are all contained in tabs!
230 12         35 return length($str)*$tablen+length($trailing_spaces);
231             }
232              
233             sub _grok_indent_diff {
234 0     0   0 my $diff = shift;
235 0         0 my $indentdiffs = shift;
236              
237 0 0       0 if ($diff =~ /^ +$/) {
    0          
238 0         0 $indentdiffs->{"s" . length($diff)}++;
239             }
240             elsif ($diff =~ /^\t+$/) {
241 0         0 $indentdiffs->{"t8"}++; # we can't infer what a tab means. Or rather, we need smarter code to do it
242             }
243             else { # mixed!
244 0         0 $indentdiffs->{"m" . _length_with_tabs_converted($diff)}++;
245             }
246             }
247              
248             sub _analyse_indent_diff {
249 117     117   217 my $diff = shift;
250              
251 117 100       484 if ($diff =~ /^ +$/) {
    50          
252 102         425 return "s" . length($diff);
253             }
254             elsif ($diff =~ /^\t+$/) {
255 15         44 return "t8"; # we can't infer what a tab means. Or rather, we need smarter code to do it
256             }
257             else { # mixed!
258 0         0 return "m" . _length_with_tabs_converted($diff);
259             }
260             }
261              
262             {
263             # the vim modeline regexes
264             my $VimTag = qr/(?:ex|vim?(?:[<=>]\d+)?):/;
265             my $OptionArg = qr/[^\s\\]*(?:\\[\s\\][^\s\\]*)*/;
266             my $VimOption = qr/
267             \w+(?:=)?$OptionArg
268             /xo;
269              
270             my $VimModeLineStart = qr/(?:^|\s+)$VimTag/o;
271              
272             # while technically, we match against $VimModeLineStart before,
273             # IF there is a vim modeline, we don't need to optimize
274             my $VimModelineTypeOne = qr/
275             $VimModeLineStart
276             \s*
277             ($VimOption
278             (?:
279             (?:\s*:\s*|\s+)
280             $VimOption
281             )*
282             )
283             \s*$
284             /xo;
285            
286             my $VimModelineTypeTwo = qr/
287             $VimModeLineStart
288             \s*
289             set?\s+
290             ($VimOption
291             (?:\s+$VimOption)*
292             )
293             \s*
294             :
295             /xo;
296              
297             sub _check_vim_modeline {
298 404     404   685 my $class = shift;
299 404         877 my $line = shift;
300 404         583 my $settings = shift;
301              
302             # Quoting the vim docs:
303             # There are two forms of modelines. The first form:
304             # [text]{white}{vi:|vim:|ex:}[white]{options}
305             #
306             #[text] any text or empty
307             #{white} at least one blank character ( or )
308             #{vi:|vim:|ex:} the string "vi:", "vim:" or "ex:"
309             #[white] optional white space
310             #{options} a list of option settings, separated with white space or ':',
311             # where each part between ':' is the argument for a ":set"
312             # command (can be empty)
313             #
314             #Example:
315             # vi:noai:sw=3 ts=6 ~
316             # The second form (this is compatible with some versions of Vi):
317             #
318             # [text]{white}{vi:|vim:|ex:}[white]se[t] {options}:[text]
319             #
320             #[text] any text or empty
321             #{white} at least one blank character ( or )
322             #{vi:|vim:|ex:} the string "vi:", "vim:" or "ex:"
323             #[white] optional white space
324             #se[t] the string "set " or "se " (note the space)
325             #{options} a list of options, separated with white space, which is the
326             # argument for a ":set" command
327             #: a colon
328             #[text] any text or empty
329             #
330             #Example:
331             # /* vim: set ai tw=75: */ ~
332             #
333            
334 404         603 my @options;
335 404 100       4002 return if $line !~ $VimModeLineStart;
336              
337 6 100       563 if ($line =~ $VimModelineTypeOne) {
    50          
338 2         20 push @options, split /(?!<\\)[:\s]+/, $1;
339             }
340             elsif ($line =~ $VimModelineTypeTwo) {
341 4         41 push @options, split /(?!<\\)\s+/, $1;
342             }
343             else {
344 0         0 return;
345             }
346              
347 6 50       20 return if not @options;
348              
349 6         13 my $changed = 0;
350 6         18 foreach (@options) {
351 14 100       58 /s(?:ts|ofttabstop)=(\d+)/i and $settings->{softtabstop} = $1, $changed = 1, next;
352 11 100       63 /t(?:s|abstop)=(\d+)/i and $settings->{tabstop} = $1, $changed = 1, next;
353 7 100 66     75 /((?:no)?)(?:expandtab|et)/i and $settings->{usetabs} = (defined $1 and $1 =~ /no/i ? 1 : 0), $changed = 1, next;
354             }
355 6         32 return $changed;
356             }
357             }
358              
359              
360              
361              
362             {
363             # lookup for emacs tab modes
364             my %tabmodelookup = (
365             t => sub {
366             $_[0]->{mixedmode} = 1;
367             $_[0]->{usetabs} = 1;
368             },
369             nil => sub {
370             delete $_[0]->{mixedmode};
371             $_[0]->{usetabs} = 0;
372             },
373             );
374              
375             # lookup for emacs styles
376             my %stylelookup = (
377             kr => sub {
378             $_[0]->{style_softtabstop} = 4;
379             $_[0]->{style_tabstop} = 8;
380             $_[0]->{style_usetabs} = 1;
381             },
382             linux => sub {
383             $_[0]->{style_softtabstop} = 8;
384             $_[0]->{style_tabstop} = 8;
385             $_[0]->{style_usetabs} = 1;
386             },
387             'gnu' => sub {
388             $_[0]->{style_softtabstop} = 2;
389             $_[0]->{style_tabstop} = 8;
390             $_[0]->{style_usetabs} = 1;
391             },
392             'bsd' => sub {
393             $_[0]->{style_softtabstop} = 4;
394             $_[0]->{style_tabstop} = 8;
395             $_[0]->{style_usetabs} = 1;
396             },
397             'ellemtel' => sub {
398             $_[0]->{style_softtabstop} = 3;
399             $_[0]->{style_tabstop} = 3;
400             $_[0]->{style_usetabs} = 0;
401             },
402             'java' => sub {
403             $_[0]->{style_softtabstop} = 4;
404             $_[0]->{style_tabstop} = 8;
405             },
406             );
407             $stylelookup{'k&r'} = $stylelookup{kr};
408             $stylelookup{'bsd'} = $stylelookup{kr};
409             $stylelookup{'whitesmith'} = $stylelookup{kr};
410             $stylelookup{'stroustrup'} = $stylelookup{kr};
411              
412             my $FirstLineVar = qr/[^\s:]+/;
413             my $FirstLineValue = qr/[^;]+/; # dumb
414             my $FirstLinePair = qr/\s*$FirstLineVar\s*:\s*$FirstLineValue;/o;
415             my $FirstLineRegexp = qr/-\*-\s*mode:\s*[^\s;]+;\s*($FirstLinePair+)\s*-\*-/o;
416            
417            
418             sub _check_emacs_local_variables_first_line {
419 37     37   64 my $class = shift;
420 37         67 my $line = shift;
421 37         69 my $settings = shift;
422              
423             # on first line (second if there is a shebang):
424             # -*- mode: $MODENAME; $VARNAME: $VALUE; ... -*-
425             # ($FOO is not a literal)
426             # Example with a Lisp comment:
427             # ;; -*- mode: Lisp; fill-column: 75; comment-column: 50; -*-
428              
429              
430 37         63 my $changed = 0;
431 37 100       323 if ($line =~ $FirstLineRegexp) {
432 4         48 my @pairs = split /\s*;\s*/, $1;
433 4         14 foreach my $pair (@pairs) {
434 7         43 my ($key, $value) = split /\s*:\s*/, $pair, 2;
435 7 100       36 if ($key eq 'tab-width') {
    100          
    100          
    50          
436 1         4 $settings->{tabstop} = $value;# FIXME: check var
437 1         3 $changed = 1;
438             }
439             elsif ($key eq 'indent-tabs-mode') {
440 3 50       20 $tabmodelookup{$value}->($settings), $changed = 1 if defined $tabmodelookup{$value};
441             }
442             elsif ($key eq 'c-basic-offset') {
443 2   33     18 $settings->{tabstop} ||= $value; # tab-width takes precedence!?
444 2         6 $changed = 1;
445             }
446             elsif ($key eq 'style') { # this is quite questionable practice...
447 1 50       11 $stylelookup{$value}->($settings), $changed = 1 if defined $stylelookup{$value};
448             }
449             }
450             }
451              
452             # do this only as a LAST resort!
453             #$settings->{tabstop} = $settings->{style_tabstop} if not exists $settings->{tabstop};
454             #$settings->{softtabstop} = $settings->{style_softtabstop} if not exists $settings->{softtabstop};
455             #$settings->{usetabs} = $settings->{style_usetabs} if not exists $settings->{usetabs};
456              
457 37         99 return $changed;
458             }
459              
460             sub _check_emacs_local_variables {
461 513     513   864 my $class = shift;
462 513         1053 my $line = shift;
463 513         768 my $settings = shift;
464              
465             # A local variables list goes near the end of the file, in the last page.[...]
466             # The local variables list starts with a line containing the string `Local Variables:',
467             # and ends with a line containing the string `End:'. In between come the variable names
468             # and values, one set per line, as `variable: value'. The values are not evaluated;
469             # they are used literally. If a file has both a local variables list and a `-*-'
470             # line, Emacs processes everything in the `-*-' line first, and everything in the
471             # local variables list afterward.
472             #
473             # Here is an example of a local variables list:
474             #
475             # ;; Local Variables: **
476             # ;; mode:lisp **
477             # ;; comment-column:0 **
478             # ;; comment-start: ";; " **
479             # ;; comment-end:"**" **
480             # ;; End: **
481             #
482             # Each line starts with the prefix `;; ' and each line ends with the suffix ` **'.
483             # Emacs recognizes these as the prefix and suffix based on the first line of the
484             # list, by finding them surrounding the magic string `Local Variables:'; then it
485             # automatically discards them from the other lines of the list.
486             #
487             # The usual reason for using a prefix and/or suffix is to embed the local variables
488             # list in a comment, so it won't confuse other programs that the file is intended as
489             # input for. The example above is for a language where comment lines start with `;; '
490             # and end with `**'; the local values for comment-start and comment-end customize the
491             # rest of Emacs for this unusual syntax. Don't use a prefix (or a suffix) if you don't need one.
492             #
493             #
494             # Can it be any more annoying to parse? --Steffen
495              
496 513 100       2527 if ($settings->{in_local_variables_section}) {
    100          
497 5         11 my $prefix = $settings->{local_variable_prefix};
498 5 50       16 $prefix = '' if not defined $prefix;
499 5         10 $prefix = quotemeta($prefix);
500 5         11 my $suffix = $settings->{local_variable_suffix};
501 5 50       13 $suffix = '' if not defined $suffix;
502 5         11 $suffix = quotemeta($suffix);
503              
504 5 100       187 if ($line =~ /^\s*$prefix\s*([^\s:]+):\s*(.+)$suffix\s*$/) {
505 3         14 my $key = $1;
506 3         38 my $value = $2;
507 3         13 $value =~ s/\s+$//;
508 3 50       43 if ($key eq 'tab-width') {
    100          
    100          
    50          
509 0         0 $settings->{tabstop} = $value;
510             }
511             elsif ($key eq 'indent-tabs-mode') {
512 1 50       9 $tabmodelookup{$value}->($settings) if defined $tabmodelookup{$value};
513             }
514             elsif ($key eq 'c-basic-offset') {
515 1   33     18 $settings->{tabstop} ||= $value; # tab-width takes precedence!?
516             }
517             elsif ($key eq 'style') { # this is quite questionable practice...
518 1 50       7 $stylelookup{$value}->($settings) if defined $stylelookup{$value};
519             }
520             } # end if variable line
521             else {
522 2         6 delete $settings->{in_local_variables_section};
523 2         5 delete $settings->{local_variable_prefix};
524 2         9 delete $settings->{local_variable_suffix};
525             }
526             }
527             elsif ($line =~ /^\s*(\S*)\s*Local Variables:\s*(\S*)\s*$/) {
528 2         12 $settings->{local_variable_prefix} = $1;
529 2         6 $settings->{local_variable_suffix} = $2;
530 2         12 $settings->{in_local_variables_section} = 1;
531             }
532             }
533              
534             sub _check_emacs_local_variables_at_file_end {
535 21     21   39 my $class = shift;
536 21         30 my $textref = shift;
537 21         33 my $settings = shift;
538 21         35 my $len = length($$textref);
539 21         50 my $start = $len-3000;
540 21 50       65 $start = 0 if $start < 0;
541 21         95 my $text = substr($$textref, $start);
542              
543 21         174 while ($text =~ /\G[ \t]*([^\r\n]*)[\r\n]+/cgs) {
544 513         1103 $class->_check_emacs_local_variables($1, $settings);
545             }
546 21         64 return;
547             }
548             } # end lexical block for emacs lookups
549              
550              
551             sub to_vim_commands {
552 0     0 1   my $indent = shift;
553 0 0         $indent = shift if $indent eq __PACKAGE__;
554 0 0 0       $indent = __PACKAGE__->parse($indent) if ref($indent) or length($indent) > 5;
555              
556 0           my @cmd;
557 0 0         if ( $indent =~ /^t(\d+)/ ) {
    0          
    0          
558 0           my $chars = $1;
559 0           push @cmd, ":set shiftwidth=$chars";
560 0           push @cmd, ":set tabstop=$chars";
561 0           push @cmd, ":set softtabstop=0";
562 0           push @cmd, ":set noexpandtab";
563             } elsif ( $indent =~ /^s(\d+)/ ) {
564 0           my $spaces = $1;
565 0           push @cmd, ":set shiftwidth=$spaces";
566 0           push @cmd, ":set tabstop=8";
567 0           push @cmd, ":set softtabstop=$spaces";
568 0           push @cmd, ':set expandtab';
569             } elsif ( $indent =~ /^m(\d+)/ ) {
570 0           my $spaces = $1;
571 0           push @cmd, ":set shiftwidth=$spaces";
572 0           push @cmd, ":set tabstop=8";
573 0           push @cmd, ":set softtabstop=$spaces";
574 0           push @cmd, ':set noexpandtab';
575             }
576 0           return @cmd;
577             }
578              
579             1;
580              
581             __END__