File Coverage

blib/lib/Text/FindIndent.pm
Criterion Covered Total %
statement 186 231 80.5
branch 113 162 69.7
condition 40 65 61.5
subroutine 12 14 85.7
pod 2 2 100.0
total 353 474 74.4


line stmt bran cond sub pod time code
1             package Text::FindIndent;
2              
3 2     2   35437 use 5.00503;
  2         7  
  2         89  
4 2     2   12 use strict;
  2         4  
  2         152  
5              
6 2     2   11 use vars qw{$VERSION};
  2         13  
  2         143  
7             BEGIN {
8 2     2   38 $VERSION = '0.10';
9             }
10              
11 2     2   11 use constant MAX_LINES => 500;
  2         4  
  2         8302  
12              
13             sub parse {
14 21     21 1 22332 my $class = shift;
15 21         35 my $text = shift;
16              
17 21         54 my %opts = @_;
18 21 50       51 my $textref = ref($text) ? $text : \$text; # accept references, too
19              
20 21         39 my $skip_pod = $opts{skip_pod};
21 21 50       44 my $first_level_indent_only = $opts{first_level_indent_only}?1:0;
22              
23 21         28 my %modeline_settings;
24              
25             my %indentdiffs;
26 21         24 my $lines = 0;
27 21         23 my $prev_indent = undef;
28 21         25 my $skip = 0;
29 21         22 my $in_pod = 0;
30              
31             # Do we have emacs smart comments?
32 21         59 $class->_check_emacs_local_variables_at_file_end($textref, \%modeline_settings);
33 21 50 33     173 if (exists $modeline_settings{softtabstop} and exists $modeline_settings{usetabs}) {
    50 66        
    100 66        
34 0 0       0 $modeline_settings{mixedmode} = $modeline_settings{usetabs}
35             if not defined $modeline_settings{mixedmode};
36             return(
37 0 0       0 ($modeline_settings{mixedmode} ? "m" : "s")
38             . $modeline_settings{softtabstop}
39             );
40             }
41             elsif (exists $modeline_settings{tabstop} and $modeline_settings{usetabs}) {
42 0 0       0 return( ($modeline_settings{mixedmode} ? "m" : "t") . $modeline_settings{tabstop} );
43             }
44             elsif (exists $modeline_settings{tabstop} and exists $modeline_settings{usetabs}) {
45 1         6 return( "s" . $modeline_settings{tabstop} );
46             }
47              
48 20         25 my $next_line_braces_pos_plus_1;
49 20         23 my $prev_indent_type = undef;
50 20         101 while ($$textref =~ /\G([ \t]*)([^\r\n]*)[\r\n]+/cgs) {
51 404         733 my $ws = $1;
52 404         641 my $rest = $2;
53 404         583 my $fullline = "$ws$rest";
54 404         447 $lines++;
55            
56             # check emacs start line stuff with some slack (shebang)
57 404         401 my $changed_modelines;
58 404 100       792 if ($lines < 3) {
59 37         93 $changed_modelines = $class->_check_emacs_local_variables_first_line($fullline, \%modeline_settings);
60             }
61              
62             # Do we have emacs smart comments?
63             # ==> Done once at start
64             #$class->_check_emacs_local_variables($fullline, \%modeline_settings);
65              
66             # Do we have vim smart comments?
67 404 100 100     1022 if ($class->_check_vim_modeline($fullline, \%modeline_settings) || $changed_modelines) {
68 10 100 100     94 if (exists $modeline_settings{softtabstop} and exists $modeline_settings{usetabs}) {
    100 100        
    100 100        
69 2 50       8 $modeline_settings{mixedmode} = $modeline_settings{usetabs}
70             if not defined $modeline_settings{mixedmode};
71             return(
72 2 100       17 ($modeline_settings{mixedmode} ? "m" : "s")
73             . $modeline_settings{softtabstop}
74             );
75             }
76             elsif (exists $modeline_settings{tabstop} and $modeline_settings{usetabs}) {
77 2 100       18 return( ($modeline_settings{mixedmode} ? "m" : "t") . $modeline_settings{tabstop} );
78             }
79             elsif (exists $modeline_settings{tabstop} and exists $modeline_settings{usetabs}) {
80 3         21 return( "s" . $modeline_settings{tabstop} );
81             }
82             }
83              
84 397 50       750 if ($lines > MAX_LINES) {
85 0         0 next;
86             }
87              
88 397 50       636 if ($skip) {
89 0         0 $skip--;
90 0         0 next;
91             }
92              
93 397 100 100     875 if ($skip_pod and $ws eq '' and substr($rest, 0, 1) eq '=') {
      100        
94 4 100 66     40 if (not $in_pod and $rest =~ /^=(?:head\d|over|item|back|pod|begin|for|end)/ ) {
    50 33        
95 2         5 $in_pod = 1;
96             }
97             elsif ($in_pod and $rest =~ /^=cut/) {
98 2         3 $in_pod = 0;
99             }
100              
101             }
102 397 100 100     1586 next if $in_pod or $rest eq '';
103              
104 352 100       630 if ($ws eq '') {
105 99         107 $prev_indent = $ws;
106 99         398 next;
107             }
108              
109             # skip next line if the last char is a backslash.
110             # Doesn't matter for Perl, but let's be generous!
111 253 50       488 $skip = 1 if $rest =~ /\\$/;
112            
113             # skip single-line comments
114 253 50       583 next if $rest =~ /^(?:#|\/\/|\/\*)/; # TODO: parse /* ... */!
115              
116 253 100       378 if ($next_line_braces_pos_plus_1) {
117 10 100       19 if ($next_line_braces_pos_plus_1==_length_with_tabs_converted($ws)) {
118 8         40 next;
119             }
120 2         4 $next_line_braces_pos_plus_1=0;
121             } else {
122 243 100       469 if ($rest=~/=> {$/) { #handle case where hash keys and values are indented by braces pos + 1
123 2         6 $next_line_braces_pos_plus_1=_length_with_tabs_converted($ws)+length($rest);
124             }
125             }
126              
127 245 50 33     497 if ($first_level_indent_only and $prev_indent ne '') {
128 0         0 next;
129             }
130              
131 245 100       446 if ($prev_indent eq $ws) {
132 70 50       133 if ($prev_indent_type) {
133 70         116 $indentdiffs{$prev_indent_type}+=0.01;
134             #coefficient is not based on data, so change if you think it should be different
135             }
136 70         296 next;
137             }
138              
139             # prefix-matching higher indentation level
140 175 100       2317 if ($ws =~ /^\Q$prev_indent\E(.+)$/) {
141 75         131 my $diff = $1;
142 75         129 my $indent_type=_analyse_indent_diff($diff);
143 75         153 $indentdiffs{$indent_type}++;
144 75         91 $prev_indent_type=$indent_type;
145 75         98 $prev_indent = $ws;
146 75         361 next;
147             }
148              
149             # prefix-matching lower indentation level
150 100 100       1061 if ($prev_indent =~ /^\Q$ws\E(.+)$/) {
151 42         70 my $diff = $1;
152             #_grok_indent_diff($diff, \%indentdiffs);
153 42         79 my $indent_type=_analyse_indent_diff($diff);
154 42         75 $indentdiffs{$indent_type}++;
155 42         44 $prev_indent_type=$indent_type;
156 42         58 $prev_indent = $ws;
157 42         198 next;
158             }
159              
160             # at this point, we're desperate!
161 58         87 my $prev_spaces = $prev_indent;
162 58         259 $prev_spaces =~ s/[ ]{0,7}\t/ /g;
163 58         79 my $spaces = $ws;
164 58         223 $spaces =~ s/[ ]{0,7}\t/ /g;
165 58         97 my $len_diff = length($spaces) - length($prev_spaces);
166 58 50       105 if ($len_diff) {
167 58         59 $len_diff = abs($len_diff);
168 58         114 $indentdiffs{"m$len_diff"}++;
169             }
170 58         305 $prev_indent = $ws;
171            
172             } # end while lines
173              
174             # nothing found
175 13 100       39 return 'u' if not keys %indentdiffs;
176              
177 12         17 my $max = 0;
178 12         14 my $maxkey = undef;
179 12         47 while (my ($key, $value) = each %indentdiffs) {
180 27 100       135 $maxkey = $key, $max = $value if $value > $max;
181             }
182              
183 12 100       43 if ($maxkey =~ /^s(\d+)$/) {
184 8         20 my $mixedkey = "m" . $1;
185 8         14 my $mixed = $indentdiffs{$mixedkey};
186 8 100 66     36 if (defined($mixed) and $mixed >= $max * 0.2) {
187 3         7 $maxkey = $mixedkey;
188             }
189             }
190              
191             # fallback to emacs styles which are guesses only
192 12         22 foreach my $key (qw(softtabstop tabstop usetabs)) {
193 36 100 100     174 if (not exists $modeline_settings{$key}
194             and exists $modeline_settings{"style_$key"}) {
195 6         19 $modeline_settings{$key} = $modeline_settings{"style_$key"};
196             }
197             }
198              
199 12 100       49 if (exists $modeline_settings{softtabstop}) {
    100          
200 3         22 $maxkey =~ s/\d+/$modeline_settings{softtabstop}/;
201             }
202             elsif (exists $modeline_settings{tabstop}) {
203 1         8 $maxkey =~ s/\d+/$modeline_settings{tabstop}/;
204             }
205 12 100       32 if (exists $modeline_settings{usetabs}) {
206 2 50       7 if ($modeline_settings{usetabs}) {
207 2 100       10 $maxkey =~ s/^(.)(\d+)$/$1 eq 'u' ? "t8" : ($2 == 8 ? "t8" : "m$2")/e;
  2 50       15  
208             }
209             else {
210 0         0 $maxkey =~ s/^./m/;
211             }
212             }
213              
214             # mixedmode explicitly asked for
215 12 50       28 if ($modeline_settings{mixedmode}) {
216 0         0 $maxkey =~ s/^./m/;
217             }
218              
219 12         69 return $maxkey;
220             }
221              
222             sub _length_with_tabs_converted {
223 12     12   16 my $str=shift;
224 12   50     47 my $tablen=shift || 8;
225 12         46 $str =~ s/( +)$//;
226 12   50     38 my $trailing_spaces = $1||0;
227 12         16 $str =~ s/ +//g; # assume the spaces are all contained in tabs!
228 12         38 return length($str)*$tablen+length($trailing_spaces);
229             }
230              
231             sub _grok_indent_diff {
232 0     0   0 my $diff = shift;
233 0         0 my $indentdiffs = shift;
234              
235 0 0       0 if ($diff =~ /^ +$/) {
    0          
236 0         0 $indentdiffs->{"s" . length($diff)}++;
237             }
238             elsif ($diff =~ /^\t+$/) {
239 0         0 $indentdiffs->{"t8"}++; # we can't infer what a tab means. Or rather, we need smarter code to do it
240             }
241             else { # mixed!
242 0         0 $diff =~ s/( +)$//;
243 0   0     0 my $trailing_spaces = $1||0;
244 0         0 $diff =~ s/ +//g; # assume the spaces are all contained in tabs!
245 0         0 $indentdiffs->{"m" . (length($diff)*8+length($trailing_spaces))}++;
246             }
247             }
248              
249             sub _analyse_indent_diff {
250 117     117   160 my $diff = shift;
251              
252 117 100       367 if ($diff =~ /^ +$/) {
    50          
253 102         276 return "s" . length($diff);
254             }
255             elsif ($diff =~ /^\t+$/) {
256 15         29 return "t8"; # we can't infer what a tab means. Or rather, we need smarter code to do it
257             }
258             else { # mixed!
259 0         0 $diff =~ s/( +)$//;
260 0   0     0 my $trailing_spaces = $1||0;
261 0         0 $diff =~ s/ +//g; # assume the spaces are all contained in tabs!
262 0         0 return "m" . (length($diff)*8+length($trailing_spaces));
263             }
264             }
265              
266             {
267             # the vim modeline regexes
268             my $VimTag = qr/(?:ex|vim?(?:[<=>]\d+)?):/;
269             my $OptionArg = qr/[^\s\\]*(?:\\[\s\\][^\s\\]*)*/;
270             my $VimOption = qr/
271             \w+(?:=)?$OptionArg
272             /xo;
273              
274             my $VimModeLineStart = qr/(?:^|\s+)$VimTag/o;
275              
276             # while technically, we match against $VimModeLineStart before,
277             # IF there is a vim modeline, we don't need to optimize
278             my $VimModelineTypeOne = qr/
279             $VimModeLineStart
280             \s*
281             ($VimOption
282             (?:
283             (?:\s*:\s*|\s+)
284             $VimOption
285             )*
286             )
287             \s*$
288             /xo;
289            
290             my $VimModelineTypeTwo = qr/
291             $VimModeLineStart
292             \s*
293             set?\s+
294             ($VimOption
295             (?:\s+$VimOption)*
296             )
297             \s*
298             :
299             /xo;
300              
301             sub _check_vim_modeline {
302 404     404   507 my $class = shift;
303 404         464 my $line = shift;
304 404         422 my $settings = shift;
305              
306             # Quoting the vim docs:
307             # There are two forms of modelines. The first form:
308             # [text]{white}{vi:|vim:|ex:}[white]{options}
309             #
310             #[text] any text or empty
311             #{white} at least one blank character ( or )
312             #{vi:|vim:|ex:} the string "vi:", "vim:" or "ex:"
313             #[white] optional white space
314             #{options} a list of option settings, separated with white space or ':',
315             # where each part between ':' is the argument for a ":set"
316             # command (can be empty)
317             #
318             #Example:
319             # vi:noai:sw=3 ts=6 ~
320             # The second form (this is compatible with some versions of Vi):
321             #
322             # [text]{white}{vi:|vim:|ex:}[white]se[t] {options}:[text]
323             #
324             #[text] any text or empty
325             #{white} at least one blank character ( or )
326             #{vi:|vim:|ex:} the string "vi:", "vim:" or "ex:"
327             #[white] optional white space
328             #se[t] the string "set " or "se " (note the space)
329             #{options} a list of options, separated with white space, which is the
330             # argument for a ":set" command
331             #: a colon
332             #[text] any text or empty
333             #
334             #Example:
335             # /* vim: set ai tw=75: */ ~
336             #
337            
338 404         875 my @options;
339 404 100       3157 return if $line !~ $VimModeLineStart;
340              
341 6 100       601 if ($line =~ $VimModelineTypeOne) {
    50          
342 2         17 push @options, split /(?!<\\)[:\s]+/, $1;
343             }
344             elsif ($line =~ $VimModelineTypeTwo) {
345 4         31 push @options, split /(?!<\\)\s+/, $1;
346             }
347             else {
348 0         0 return;
349             }
350              
351 6 50       17 return if not @options;
352              
353 6         12 my $changed = 0;
354 6         13 foreach (@options) {
355 14 100       51 /s(?:ts|ofttabstop)=(\d+)/i and $settings->{softtabstop} = $1, $changed = 1, next;
356 11 100       53 /t(?:s|abstop)=(\d+)/i and $settings->{tabstop} = $1, $changed = 1, next;
357 7 100 33     65 /((?:no)?)(?:expandtab|et)/i and $settings->{usetabs} = (defined $1 and $1 =~ /no/i ? 1 : 0), $changed = 1, next;
358             }
359 6         26 return $changed;
360             }
361             }
362              
363              
364              
365              
366             {
367             # lookup for emacs tab modes
368             my %tabmodelookup = (
369             t => sub {
370             $_[0]->{mixedmode} = 1;
371             $_[0]->{usetabs} = 1;
372             },
373             nil => sub {
374             delete $_[0]->{mixedmode};
375             $_[0]->{usetabs} = 0;
376             },
377             );
378              
379             # lookup for emacs styles
380             my %stylelookup = (
381             kr => sub {
382             $_[0]->{style_softtabstop} = 4;
383             $_[0]->{style_tabstop} = 8;
384             $_[0]->{style_usetabs} = 1;
385             },
386             linux => sub {
387             $_[0]->{style_softtabstop} = 8;
388             $_[0]->{style_tabstop} = 8;
389             $_[0]->{style_usetabs} = 1;
390             },
391             'gnu' => sub {
392             $_[0]->{style_softtabstop} = 2;
393             $_[0]->{style_tabstop} = 8;
394             $_[0]->{style_usetabs} = 1;
395             },
396             'bsd' => sub {
397             $_[0]->{style_softtabstop} = 4;
398             $_[0]->{style_tabstop} = 8;
399             $_[0]->{style_usetabs} = 1;
400             },
401             'ellemtel' => sub {
402             $_[0]->{style_softtabstop} = 3;
403             $_[0]->{style_tabstop} = 3;
404             $_[0]->{style_usetabs} = 0;
405             },
406             'java' => sub {
407             $_[0]->{style_softtabstop} = 4;
408             $_[0]->{style_tabstop} = 8;
409             },
410             );
411             $stylelookup{'k&r'} = $stylelookup{kr};
412             $stylelookup{'bsd'} = $stylelookup{kr};
413             $stylelookup{'whitesmith'} = $stylelookup{kr};
414             $stylelookup{'stroustrup'} = $stylelookup{kr};
415              
416             my $FirstLineVar = qr/[^\s:]+/;
417             my $FirstLineValue = qr/[^;]+/; # dumb
418             my $FirstLinePair = qr/\s*$FirstLineVar\s*:\s*$FirstLineValue;/o;
419             my $FirstLineRegexp = qr/-\*-\s*mode:\s*[^\s;]+;\s*($FirstLinePair+)\s*-\*-/o;
420            
421            
422             sub _check_emacs_local_variables_first_line {
423 37     37   44 my $class = shift;
424 37         47 my $line = shift;
425 37         43 my $settings = shift;
426              
427             # on first line (second if there is a shebang):
428             # -*- mode: $MODENAME; $VARNAME: $VALUE; ... -*-
429             # ($FOO is not a literal)
430             # Example with a Lisp comment:
431             # ;; -*- mode: Lisp; fill-column: 75; comment-column: 50; -*-
432              
433              
434 37         43 my $changed = 0;
435 37 100       206 if ($line =~ $FirstLineRegexp) {
436 4         62 my @pairs = split /\s*;\s*/, $1;
437 4         8 foreach my $pair (@pairs) {
438 7         36 my ($key, $value) = split /\s*:\s*/, $pair, 2;
439 7 100       30 if ($key eq 'tab-width') {
    100          
    100          
    50          
440 1         4 $settings->{tabstop} = $value;# FIXME: check var
441 1         2 $changed = 1;
442             }
443             elsif ($key eq 'indent-tabs-mode') {
444 3 50       19 $tabmodelookup{$value}->($settings), $changed = 1 if defined $tabmodelookup{$value};
445             }
446             elsif ($key eq 'c-basic-offset') {
447 2   33     43 $settings->{tabstop} ||= $value; # tab-width takes precedence!?
448 2         5 $changed = 1;
449             }
450             elsif ($key eq 'style') { # this is quite questionable practice...
451 1 50       8 $stylelookup{$value}->($settings), $changed = 1 if defined $stylelookup{$value};
452             }
453             }
454             }
455              
456             # do this only as a LAST resort!
457             #$settings->{tabstop} = $settings->{style_tabstop} if not exists $settings->{tabstop};
458             #$settings->{softtabstop} = $settings->{style_softtabstop} if not exists $settings->{softtabstop};
459             #$settings->{usetabs} = $settings->{style_usetabs} if not exists $settings->{usetabs};
460              
461 37         79 return $changed;
462             }
463              
464             sub _check_emacs_local_variables {
465 513     513   551 my $class = shift;
466 513         762 my $line = shift;
467 513         575 my $settings = shift;
468              
469             # A local variables list goes near the end of the file, in the last page.[...]
470             # The local variables list starts with a line containing the string `Local Variables:',
471             # and ends with a line containing the string `End:'. In between come the variable names
472             # and values, one set per line, as `variable: value'. The values are not evaluated;
473             # they are used literally. If a file has both a local variables list and a `-*-'
474             # line, Emacs processes everything in the `-*-' line first, and everything in the
475             # local variables list afterward.
476             #
477             # Here is an example of a local variables list:
478             #
479             # ;; Local Variables: **
480             # ;; mode:lisp **
481             # ;; comment-column:0 **
482             # ;; comment-start: ";; " **
483             # ;; comment-end:"**" **
484             # ;; End: **
485             #
486             # Each line starts with the prefix `;; ' and each line ends with the suffix ` **'.
487             # Emacs recognizes these as the prefix and suffix based on the first line of the
488             # list, by finding them surrounding the magic string `Local Variables:'; then it
489             # automatically discards them from the other lines of the list.
490             #
491             # The usual reason for using a prefix and/or suffix is to embed the local variables
492             # list in a comment, so it won't confuse other programs that the file is intended as
493             # input for. The example above is for a language where comment lines start with `;; '
494             # and end with `**'; the local values for comment-start and comment-end customize the
495             # rest of Emacs for this unusual syntax. Don't use a prefix (or a suffix) if you don't need one.
496             #
497             #
498             # Can it be any more annoying to parse? --Steffen
499              
500 513 100       3007 if ($settings->{in_local_variables_section}) {
    100          
501 5         7 my $prefix = $settings->{local_variable_prefix};
502 5 50       13 $prefix = '' if not defined $prefix;
503 5         8 $prefix = quotemeta($prefix);
504 5         8 my $suffix = $settings->{local_variable_suffix};
505 5 50       10 $suffix = '' if not defined $suffix;
506 5         6 $suffix = quotemeta($suffix);
507              
508 5 100       76 if ($line =~ /^\s*$prefix\s*([^\s:]+):\s*(.+)$suffix\s*$/) {
509 3         7 my $key = $1;
510 3         6 my $value = $2;
511 3         7 $value =~ s/\s+$//;
512 3 50       18 if ($key eq 'tab-width') {
    100          
    100          
    50          
513 0         0 $settings->{tabstop} = $value;
514             }
515             elsif ($key eq 'indent-tabs-mode') {
516 1 50       7 $tabmodelookup{$value}->($settings) if defined $tabmodelookup{$value};
517             }
518             elsif ($key eq 'c-basic-offset') {
519 1   33     12 $settings->{tabstop} ||= $value; # tab-width takes precedence!?
520             }
521             elsif ($key eq 'style') { # this is quite questionable practice...
522 1 50       8 $stylelookup{$value}->($settings) if defined $stylelookup{$value};
523             }
524             } # end if variable line
525             else {
526 2         5 delete $settings->{in_local_variables_section};
527 2         3 delete $settings->{local_variable_prefix};
528 2         10 delete $settings->{local_variable_suffix};
529             }
530             }
531             elsif ($line =~ /^\s*(\S*)\s*Local Variables:\s*(\S*)\s*$/) {
532 2         8 $settings->{local_variable_prefix} = $1;
533 2         6 $settings->{local_variable_suffix} = $2;
534 2         27 $settings->{in_local_variables_section} = 1;
535             }
536             }
537              
538             sub _check_emacs_local_variables_at_file_end {
539 21     21   28 my $class = shift;
540 21         29 my $textref = shift;
541 21         24 my $settings = shift;
542 21         35 my $len = length($$textref);
543 21         34 my $start = $len-3000;
544 21 50       51 $start = 0 if $start < 0;
545 21         67 my $text = substr($$textref, $start);
546              
547 21         108 while ($text =~ /\G[ \t]*([^\r\n]*)[\r\n]+/cgs) {
548 513         1695 $class->_check_emacs_local_variables($1, $settings);
549             }
550 21         48 return;
551             }
552             } # end lexical block for emacs lookups
553              
554              
555             sub to_vim_commands {
556 0     0 1   my $indent = shift;
557 0 0         $indent = shift if $indent eq __PACKAGE__;
558 0 0 0       $indent = __PACKAGE__->parse($indent) if ref($indent) or length($indent) > 5;
559              
560 0           my @cmd;
561 0 0         if ( $indent =~ /^t(\d+)/ ) {
    0          
    0          
562 0           my $chars = $1;
563 0           push @cmd, ":set shiftwidth=$chars";
564 0           push @cmd, ":set tabstop=$chars";
565 0           push @cmd, ":set softtabstop=0";
566 0           push @cmd, ":set noexpandtab";
567             } elsif ( $indent =~ /^s(\d+)/ ) {
568 0           my $spaces = $1;
569 0           push @cmd, ":set shiftwidth=$spaces";
570 0           push @cmd, ":set tabstop=8";
571 0           push @cmd, ":set softtabstop=$spaces";
572 0           push @cmd, ':set expandtab';
573             } elsif ( $indent =~ /^m(\d+)/ ) {
574 0           my $spaces = $1;
575 0           push @cmd, ":set shiftwidth=$spaces";
576 0           push @cmd, ":set tabstop=8";
577 0           push @cmd, ":set softtabstop=$spaces";
578 0           push @cmd, ':set noexpandtab';
579             }
580 0           return @cmd;
581             }
582              
583             1;
584              
585             __END__