File Coverage

blib/lib/Text/Parser.pm
Criterion Covered Total %
statement 284 284 100.0
branch 125 126 99.2
condition 32 33 96.9
subroutine 78 78 100.0
pod 14 15 93.3
total 533 536 99.4


line stmt bran cond sub pod time code
1 42     42   1720841 use warnings;
  42         339  
  42         1475  
2 42     42   243 use strict;
  42         105  
  42         1041  
3 42     42   219 use feature ':5.14';
  42         88  
  42         7447  
4              
5             package Text::Parser 1.000;
6              
7             # ABSTRACT: Simplifies text parsing. Easily extensible to parse any text format.
8              
9              
10 42     42   14924 use Moose;
  42         12352250  
  42         326  
11 42     42   354246 use MooseX::CoverableModifiers;
  42         250627  
  42         323  
12 42     42   21346 use MooseX::StrictConstructor;
  42         1042105  
  42         229  
13 42     42   365513 use namespace::autoclean;
  42         107  
  42         278  
14 42     42   4228 use Moose::Util 'apply_all_roles', 'ensure_all_roles';
  42         114  
  42         457  
15 42     42   15937 use Moose::Util::TypeConstraints;
  42         100  
  42         400  
16 42     42   123719 use String::Util qw(trim ltrim rtrim eqq);
  42         120725  
  42         3740  
17 42     42   15481 use Text::Parser::Error;
  42         162  
  42         228  
18 42     42   46073 use Text::Parser::Rule;
  42         196  
  42         2499  
19 42     42   26544 use Text::Parser::RuleSpec;
  42         208  
  42         334  
20 42     42   141544 use List::MoreUtils qw(natatime first_index);
  42         104  
  42         395  
21              
22             enum 'Text::Parser::Types::MultilineType' => [qw(join_next join_last)];
23             enum 'Text::Parser::Types::LineWrapStyle' =>
24                 [qw(trailing_backslash spice just_next_line slurp custom)];
25             enum 'Text::Parser::Types::TrimType' => [qw(l r b n)];
26              
27             subtype 'NonEmptyStr', as 'Str', where { length $_ > 0 },
28                 message {"$_ is an empty string"};
29              
30 42     42   41563 no Moose::Util::TypeConstraints;
  42         109  
  42         456  
31 42     42   41692 use FileHandle;
  42         428211  
  42         271  
32              
33             has _origclass => (
34                 is => 'ro',
35                 isa => 'Str',
36                 lazy => 1,
37                 default => '',
38             );
39              
40              
41             around BUILDARGS => sub {
42 55     55   1822     my ( $orig, $class ) = ( shift, shift );
43 55 100 100     613     return $class->$orig( @_, _origclass => $class ) if @_ > 1 or not @_;
44 2         6     my $ptr = shift;
45 2 100       13     parser_exception("Invalid parameters to Text::Parser constructor")
46                     if ref($ptr) ne 'HASH';
47 1         3     $class->$orig( %{$ptr}, _origclass => $class );
  1         7  
48             };
49              
50             sub BUILD {
51 64     64 0 154994     my $self = shift;
52 64 100       2266     ensure_all_roles $self, 'Text::Parser::AutoSplit' if $self->auto_split;
53 64 100       32915     return if not defined $self->multiline_type;
54 17         103     ensure_all_roles $self, 'Text::Parser::Multiline';
55             }
56              
57              
58             has auto_chomp => (
59                 is => 'rw',
60                 isa => 'Bool',
61                 lazy => 1,
62                 default => 0,
63             );
64              
65              
66             has auto_split => (
67                 is => 'rw',
68                 isa => 'Bool',
69                 lazy => 1,
70                 default => 0,
71                 trigger => \&__newval_auto_split,
72             );
73              
74             sub __newval_auto_split {
75 86     86   50070     my ( $self, $newval, $oldval ) = ( shift, shift, shift );
76 86 100       429     ensure_all_roles $self, 'Text::Parser::AutoSplit' if $newval;
77 86 100 100     355651     $self->_clear_all_fields if not $newval and $oldval;
78             }
79              
80              
81             has auto_trim => (
82                 is => 'rw',
83                 isa => 'Text::Parser::Types::TrimType',
84                 lazy => 1,
85                 default => 'n',
86             );
87              
88              
89             has custom_line_trimmer => (
90                 is => 'rw',
91                 isa => 'CodeRef|Undef',
92                 lazy => 1,
93                 default => undef,
94             );
95              
96              
97             has FS => (
98                 is => 'rw',
99                 isa => 'RegexpRef',
100                 lazy => 1,
101                 default => sub {qr/\s+/},
102             );
103              
104              
105             has indentation_str => (
106                 is => 'rw',
107                 isa => 'NonEmptyStr',
108                 lazy => 1,
109                 default => ' ',
110             );
111              
112              
113             has line_wrap_style => (
114                 is => 'rw',
115                 isa => 'Text::Parser::Types::LineWrapStyle|Undef',
116                 default => undef,
117                 lazy => 1,
118                 trigger => \&_on_line_unwrap,
119             );
120              
121             my %MULTILINE_VAL = (
122                 default => undef,
123                 spice => 'join_last',
124                 trailing_backslash => 'join_next',
125                 just_next_line => 'join_last',
126                 slurp => 'join_last',
127                 custom => undef,
128             );
129              
130             sub _on_line_unwrap {
131 16     16   44     my ( $self, $val, $oldval ) = (@_);
132 16 100 100     75     return if not defined $val and not defined $oldval;
133 15 100       46     $val = 'default' if not defined $val;
134 15 100 100     69     return if $val eq 'custom' and defined $self->multiline_type;
135 9         37     $self->multiline_type( $MULTILINE_VAL{$val} );
136             }
137              
138              
139             has multiline_type => (
140                 is => 'rw',
141                 isa => 'Text::Parser::Types::MultilineType|Undef',
142                 lazy => 1,
143                 default => undef,
144             );
145              
146             around multiline_type => sub {
147 1783     1783   84484     my ( $orig, $self ) = ( shift, shift );
148 1783         44222     my $oldval = $orig->($self);
149 1783 100 100     11104     return $oldval if not @_ or eqq( $_[0], $oldval );
150 17         224     return __newval_multi_line( $orig, $self, @_ );
151             };
152              
153             sub __newval_multi_line {
154 17     17   51     my ( $orig, $self, $newval ) = ( shift, shift, shift );
155 17         52     delete $self->{records}; # Bug W/A: role cannot apply if records exists
156 17 100       94     ensure_all_roles( $self, 'Text::Parser::Multiline' )
157                     if defined $newval;
158 17         88615     return $orig->( $self, $newval );
159             }
160              
161              
162             has track_indentation => (
163                 is => 'rw',
164                 isa => 'Bool',
165                 lazy => 1,
166                 default => 0,
167             );
168              
169              
170             has _obj_rules => (
171                 is => 'rw',
172                 isa => 'ArrayRef[Text::Parser::Rule]',
173                 lazy => 1,
174                 default => sub { [] },
175                 traits => ['Array'],
176                 handles => {
177                     _push_obj_rule => 'push',
178                     _has_no_obj_rules => 'is_empty',
179                     _get_obj_rules => 'elements',
180                 },
181             );
182              
183             sub add_rule {
184 26     26 1 83263     my $self = shift;
185 26 100       832     $self->auto_split(1) if not $self->auto_split;
186 26         847     my $rule = Text::Parser::Rule->new(@_);
187 26         1020     $self->_push_obj_rule($rule);
188             }
189              
190              
191             sub clear_rules {
192 2     2 1 6     my $self = shift;
193 2         77     $self->_obj_rules( [] );
194 2         77     $self->_clear_begin_rule;
195 2         72     $self->_clear_end_rule;
196             }
197              
198              
199             has _begin_rule => (
200                 is => 'rw',
201                 isa => 'Text::Parser::Rule',
202                 predicate => '_has_begin_rule',
203                 clearer => '_clear_begin_rule',
204             );
205              
206             sub BEGIN_rule {
207 5     5 1 416     my $self = shift;
208 5 100       147     $self->auto_split(1) if not $self->auto_split;
209 5         24     my (%opt) = _defaults_for_begin_end(@_);
210 5         33     $self->_modify_rule( '_begin_rule', %opt );
211             }
212              
213             sub _defaults_for_begin_end {
214 8     8   35     my (%opt) = @_;
215 8 100       34     $opt{dont_record} = 1 if not exists $opt{dont_record};
216 8 100       26     delete $opt{if} if exists $opt{if};
217 8 100       26     delete $opt{continue_to_next} if exists $opt{continue_to_next};
218 8         40     return (%opt);
219             }
220              
221             sub _modify_rule {
222 8     8   28     my ( $self, $func, %opt ) = @_;
223 8         26     my $pred = '_has' . $func;
224 8 100       284     $self->_append_rule_lines( $func, \%opt ) if $self->$pred();
225 8         255     my $rule = Text::Parser::Rule->new(%opt);
226 8         244     $self->$func($rule);
227             }
228              
229             sub _append_rule_lines {
230 3     3   19     my ( $self, $func, $opt ) = ( shift, shift, shift );
231 3         73     my $old = $self->$func();
232 3         90     $opt->{do} = $old->action . $opt->{do};
233             }
234              
235              
236             has _end_rule => (
237                 is => 'rw',
238                 isa => 'Text::Parser::Rule',
239                 predicate => '_has_end_rule',
240                 clearer => '_clear_end_rule',
241             );
242              
243             sub END_rule {
244 3     3 1 28     my $self = shift;
245 3 100       101     $self->auto_split(1) if not $self->auto_split;
246 3         14     my (%opt) = _defaults_for_begin_end(@_);
247 3         16     $self->_modify_rule( '_end_rule', %opt );
248             }
249              
250              
251             has _indent_level => (
252                 is => 'ro',
253                 isa => 'Int|Undef',
254                 lazy => 1,
255                 default => undef,
256                 writer => '_set_indent_level',
257                 reader => 'this_indent',
258             );
259              
260              
261             has _current_line => (
262                 is => 'ro',
263                 isa => 'Str|Undef',
264                 init_arg => undef,
265                 writer => '_set_this_line',
266                 reader => 'this_line',
267                 clearer => '_clear_this_line',
268                 default => undef,
269             );
270              
271              
272             has filename => (
273                 is => 'rw',
274                 isa => 'Str|Undef',
275                 lazy => 1,
276                 init_arg => undef,
277                 default => undef,
278                 predicate => '_has_filename',
279                 clearer => '_clear_filename',
280                 trigger => \&_set_filehandle,
281             );
282              
283             sub _set_filehandle {
284 93     93   22888     my $self = shift;
285 93 100       2680     return $self->_clear_filename if not defined $self->filename;
286 92         510     $self->_save_filehandle( $self->__get_valid_fh );
287             }
288              
289             sub __get_valid_fh {
290 92     92   217     my $self = shift;
291 92         385     my $fname = $self->_get_valid_text_filename;
292 92 100       1333     return FileHandle->new( $fname, 'r' ) if defined $fname;
293 4         128     $fname = $self->filename;
294 4         118     $self->_clear_filename;
295 4         11     $self->_throw_invalid_file_exception($fname);
296             }
297              
298             # Don't touch: Override this in Text::Parser::AutoUncompress
299             sub _get_valid_text_filename {
300 92     92   221     my $self = shift;
301 92         2290     my $fname = $self->filename;
302 92 100 66     30087     return $fname if -f $fname and -r $fname and -T $fname;
      100        
303 4         22     return;
304             }
305              
306             # Don't touch: Override this in Text::Parser::AutoUncompress
307             sub _throw_invalid_file_exception {
308 4     4   11     my ( $self, $fname ) = ( shift, shift );
309 4 100       67     parser_exception("Invalid filename $fname") if not -f $fname;
310 1 50       17     parser_exception("Cannot read $fname") if not -r $fname;
311 1         10     parser_exception("Not a plain text file $fname");
312             }
313              
314              
315             has filehandle => (
316                 is => 'rw',
317                 isa => 'FileHandle|Undef',
318                 lazy => 1,
319                 init_arg => undef,
320                 default => undef,
321                 predicate => '_has_filehandle',
322                 writer => '_save_filehandle',
323                 reader => '_get_filehandle',
324                 clearer => '_close_filehandles',
325             );
326              
327             sub filehandle {
328 105     105 1 1358     my $self = shift;
329 105 100 100     3744     return if not @_ and not $self->_has_filehandle;
330 102 100       787     $self->_save_filehandle(@_) if @_;
331 98 100       450     $self->_clear_filename if @_;
332 98         3056     return $self->_get_filehandle;
333             }
334              
335              
336             sub read {
337 95     95 1 95797     my $self = shift;
338 95 100       456     return if not defined $self->_handle_read_inp(@_);
339 89         589     $self->_run_begin_end_block('_begin_rule');
340 89         491     $self->__read_and_close_filehandle;
341 78         295     $self->_run_begin_end_block('_end_rule');
342             }
343              
344             sub _handle_read_inp {
345 95     95   249     my $self = shift;
346 95 100       318     return $self->filehandle if not @_;
347 93 100 100     555     return if not ref( $_[0] ) and not $_[0];
348 92 100       3063     return $self->filename(@_) if not ref( $_[0] );
349 6         21     return $self->filehandle(@_);
350             }
351              
352             sub _before_begin {
353 89     89   175     my $self = shift;
354 89         437     $self->forget;
355 89 100       3679     $self->_preset_vars( $self->_all_preset ) if not $self->_has_no_prestash;
356             }
357              
358             sub _after_end {
359 78     78   181     my $self = shift;
360 78         2199     my $h = $self->_hidden_stash;
361 78         165     $h->{$_} = $self->stashed($_) for ( keys %{$h} );
  78         530  
362             }
363              
364             sub _run_begin_end_block {
365 167     167   492     my ( $self, $func ) = ( shift, shift );
366 167 100       791     $self->_before_begin if $func eq '_begin_rule';
367 167         754     $self->_run_beg_end__($func);
368 167 100       923     $self->_after_end if $func eq '_end_rule';
369             }
370              
371             sub _run_beg_end__ {
372 167     167   415     my ( $self, $func ) = ( shift, shift );
373 167         481     my $pred = '_has' . $func;
374 167 100       5634     return if not $self->$pred();
375 6         170     my $rule = $self->$func();
376 6         33     $rule->_run( $self, 0 );
377             }
378              
379             sub __read_and_close_filehandle {
380 89     89   206     my $self = shift;
381 89         423     $self->_prep_to_read_file;
382 89         520     $self->__read_file_handle;
383 78         748     $self->_final_operations_after_read;
384             }
385              
386             sub _prep_to_read_file {
387 89     89   248     my $self = shift;
388 89         3581     $self->_reset_line_count;
389 89         3442     $self->_empty_records;
390 89         3297     $self->_clear_abort;
391             }
392              
393             sub __read_file_handle {
394 89     89   1221     my $self = shift;
395 89         504     my $fh = $self->filehandle();
396 89         2079     while (<$fh>) {
397 738 100       2068         last if not $self->__parse_line($_);
398                 }
399             }
400              
401             sub __parse_line {
402 738     738   1648     my ( $self, $line ) = ( shift, shift );
403 738         1692     $line = $self->_prep_line_for_parsing($line);
404 738         10687     $self->_set_this_line($line);
405 738         3943     $self->save_record($line);
406 730         19981     return not $self->has_aborted;
407             }
408              
409             sub _prep_line_for_parsing {
410 738     738   1266     my ( $self, $line ) = ( shift, shift );
411 738         24667     $self->_next_line_parsed();
412 738 100       18410     $self->_find_indent_level($line) if $self->track_indentation;
413 738         2046     $line = $self->_line_manip($line);
414             }
415              
416             sub _find_indent_level {
417 18     18   31     my ( $self, $line ) = ( shift, shift );
418 18         34     chomp $line;
419 18 100       439     length( $self->indentation_str ) >= 2
420                     ? $self->_find_long_indent_level($line)
421                     : $self->_singlechar_indent($line);
422             }
423              
424             sub _find_long_indent_level {
425 9     9   20     my ( $self, $line ) = ( shift, shift );
426 9         211     my $n = _num_matching( $self->indentation_str, $line );
427 9         284     $self->_set_indent_level($n);
428             }
429              
430             sub _num_matching {
431 9     9   21     my ( $ch, $line ) = ( shift, shift );
432 9         94     my $it = natatime( length($ch), ( split //, $line ) );
433 9         36     my ( $i, @x ) = ( 0, $it->() );
434 9         32     while ( $ch eq join( '', @x ) ) {
435 1         6         ( $i, @x ) = ( $i + 1, $it->() );
436                 }
437 9         40     return $i;
438             }
439              
440             sub _singlechar_indent {
441 9     9   20     my ( $self, $line ) = ( shift, shift );
442 9     10   94     my $n = first_index { $_ ne $self->indentation_str } ( split //, $line );
  10         244  
443 9         283     $self->_set_indent_level($n);
444             }
445              
446             sub _line_manip {
447 738     738   1576     my ( $self, $line ) = ( shift, shift );
448 738 100       18118     return $self->_def_line_manip($line)
449                     if not defined $self->custom_line_trimmer;
450 1         26     my $cust = $self->custom_line_trimmer;
451 1         6     return $cust->($line);
452             }
453              
454             sub _def_line_manip {
455 737     737   1562     my ( $self, $line ) = ( shift, shift );
456 737 100       15608     chomp $line if $self->auto_chomp;
457 737         1889     return $self->_trim_line($line);
458             }
459              
460             sub _trim_line {
461 737     737   1434     my ( $self, $line ) = ( shift, shift );
462 737 100       15659     return $line if $self->auto_trim eq 'n';
463 69 100       563     return trim($line) if $self->auto_trim eq 'b';
464 8 100       179     return ltrim($line) if $self->auto_trim eq 'l';
465 4         15     return rtrim($line);
466             }
467              
468             sub _final_operations_after_read {
469 78     78   199     my $self = shift;
470 78 100       2844     $self->_close_filehandles if $self->_has_filename;
471 78         1687     $self->_clear_this_line;
472 78 100       2144     $self->_set_indent_level(undef) if $self->track_indentation;
473             }
474              
475              
476             sub last_record {
477 16     16 1 712     my $self = shift;
478 16         653     my $count = $self->_num_records();
479 16 100       62     return if not $count;
480 15         540     return $self->_access_record( $count - 1 );
481             }
482              
483              
484             has records => (
485                 isa => 'ArrayRef[Any]',
486                 is => 'ro',
487                 lazy => 1,
488                 default => sub { return []; },
489                 auto_deref => 1,
490                 init_arg => undef,
491                 traits => ['Array'],
492                 predicate => '_has_records_attrib',
493                 handles => {
494                     get_records => 'elements',
495                     push_records => 'push',
496                     pop_record => 'pop',
497                     _empty_records => 'clear',
498                     _num_records => 'count',
499                     _access_record => 'accessor',
500                 },
501             );
502              
503              
504             has _stashed_vars => (
505                 is => 'ro',
506                 isa => 'HashRef[Any]',
507                 default => sub { {} },
508                 lazy => 1,
509                 traits => ['Hash'],
510                 handles => {
511                     _clear_stash => 'clear',
512                     _stashed => 'elements',
513                     _has_stashed => 'exists',
514                     _forget => 'delete',
515                     has_empty_stash => 'is_empty',
516                     _get_vars => 'get',
517                     _preset_vars => 'set',
518                 }
519             );
520              
521             sub forget {
522 93     93 1 235     my $self = shift;
523 93 100       333     return $self->_forget_stashed(@_) if @_;
524 90         3430     $self->_clear_stash;
525             }
526              
527             sub _forget_stashed {
528 3     3   6     my $self = shift;
529 3         7     foreach my $s (@_) {
530 3 100       141         $self->_forget($s) if $self->_has_stashed($s);
531 3 100       109         $self->_del_prestash($s) if $self->_has_prestash($s);
532                 }
533             }
534              
535             sub stashed {
536 11     11 1 1224     my $self = shift;
537 11 100       392     return $self->_get_vars(@_) if @_;
538 1         37     return $self->_stashed;
539             }
540              
541             sub has_stashed {
542 10     10 1 396     my $self = shift;
543 10 100       391     return 1 if $self->_has_stashed(@_);
544 8         276     return $self->_has_prestash(@_);
545             }
546              
547             has _hidden_stash => (
548                 is => 'ro',
549                 isa => 'HashRef[Any]',
550                 default => sub { {} },
551                 lazy => 1,
552                 traits => ['Hash'],
553                 handles => {
554                     prestash => 'set',
555                     _all_preset => 'elements',
556                     _has_prestash => 'exists',
557                     _has_no_prestash => 'is_empty',
558                     _del_prestash => 'delete',
559                 }
560             );
561              
562              
563             has lines_parsed => (
564                 is => 'ro',
565                 isa => 'Int',
566                 lazy => 1,
567                 init_arg => undef,
568                 default => 0,
569                 traits => ['Counter'],
570                 handles => {
571                     _next_line_parsed => 'inc',
572                     _reset_line_count => 'reset',
573                 }
574             );
575              
576              
577             has abort => (
578                 is => 'rw',
579                 isa => 'Bool',
580                 lazy => 1,
581                 default => 0,
582                 traits => ['Bool'],
583                 reader => 'has_aborted',
584                 handles => {
585                     abort_reading => 'set',
586                     _clear_abort => 'unset'
587                 },
588             );
589              
590              
591             has _is_wrapped => (
592                 is => 'rw',
593                 isa => 'CodeRef|Undef',
594                 default => undef,
595                 lazy => 1,
596             );
597              
598             has _unwrap_routine => (
599                 is => 'rw',
600                 isa => 'CodeRef|Undef',
601                 default => undef,
602                 lazy => 1,
603             );
604              
605             sub custom_line_unwrap_routines {
606 8     8 1 479     my $self = shift;
607 8         34     $self->_prep_for_custom_unwrap_routines;
608 7         28     my ( $is_wr, $un_wr ) = _check_custom_unwrap_args(@_);
609 2         96     $self->_is_wrapped($is_wr);
610 2         56     $self->_unwrap_routine($un_wr);
611             }
612              
613             sub _prep_for_custom_unwrap_routines {
614 8     8   11     my $self = shift;
615 8         267     my $s = $self->line_wrap_style();
616 8 100 100     44     parser_exception("Line wrap style already set to $s")
617                     if defined $s and 'custom' ne $s;
618 7         220     $self->line_wrap_style('custom');
619             }
620              
621             my $unwrap_prefix = "Bad call to custom_line_unwrap_routines: ";
622              
623              
624              
625              
626             sub save_record {
627 454     454 1 8655     my ( $self, $record ) = ( shift, shift );
628 454 100       1118     ( $self->_has_no_rules )
629                     ? $self->push_records($record)
630                     : $self->_run_through_rules;
631             }
632              
633             sub _has_no_rules {
634 454     454   710     my $self = shift;
635 454 100       11816     return 0 if Text::Parser::RuleSpec->class_has_rules( $self->_origclass );
636 388         13450     return $self->_has_no_obj_rules;
637             }
638              
639             sub _run_through_rules {
640 265     265   445     my $self = shift;
641 265         6263     my (@crules) = Text::Parser::RuleSpec->class_rules( $self->_origclass );
642 265         8827     foreach my $rule ( @crules, $self->_get_obj_rules ) {
643 630 100       1793         next if not $rule->_test($self);
644 174         619         $rule->_run( $self, 0 );
645 174 100       4866         last if not $rule->continue_to_next;
646                 }
647             }
648              
649              
650             my %IS_LINE_CONTINUED = (
651                 default => \&_def_is_line_continued,
652                 spice => \&_spice_is_line_contd,
653                 trailing_backslash => \&_tbs_is_line_contd,
654                 just_next_line => \&_jnl_is_line_contd,
655                 slurp => \&_def_is_line_continued,
656                 custom => undef,
657             );
658              
659             my %JOIN_LAST_LINE = (
660                 default => \&_def_join_last_line,
661                 spice => \&_spice_join_last_line,
662                 trailing_backslash => \&_tbs_join_last_line,
663                 just_next_line => \&_jnl_join_last_line,
664                 slurp => \&_def_join_last_line,
665                 custom => undef,
666             );
667              
668             sub is_line_continued {
669 162     162 1 291     my $self = shift;
670 162 100       345     return 0 if not defined $self->multiline_type;
671 156         412     my $routine = $self->_get_is_line_contd_routine;
672 156 100       327     parser_exception("is_wrapped routine not defined")
673                     if not defined $routine;
674 155         328     $routine->( $self, @_ );
675             }
676              
677             sub _val_of_line_wrap_style {
678 291     291   385     my $self = shift;
679 291 100       7050     defined $self->line_wrap_style ? $self->line_wrap_style : 'default';
680             }
681              
682             sub _get_is_line_contd_routine {
683 156     156   232     my $self = shift;
684 156         300     my $val = $self->_val_of_line_wrap_style;
685                 ( $val ne 'custom' )
686 156 100       712         ? $IS_LINE_CONTINUED{$val}
687                     : $self->_is_wrapped;
688             }
689              
690             sub join_last_line {
691 135     135 1 218     my $self = shift;
692 135         293     my $routine = $self->_get_join_last_line_routine;
693 135 100       282     parser_exception("unwrap_routine not defined")
694                     if not defined $routine;
695 134         276     $routine->( $self, @_ );
696             }
697              
698             sub _get_join_last_line_routine {
699 135     135   563     my $self = shift;
700 135         246     my $val = $self->_val_of_line_wrap_style;
701                 ( $val ne 'custom' )
702 135 100       489         ? $JOIN_LAST_LINE{$val}
703                     : $self->_unwrap_routine;
704             }
705              
706             sub _def_is_line_continued {
707 116     116   168     my $self = shift;
708 116 100 100     257     return 0
709                     if $self->multiline_type eq 'join_last'
710                     and $self->lines_parsed() == 1;
711 110         377     return 1;
712             }
713              
714             sub _spice_is_line_contd {
715 7     7   9     my $self = shift;
716 7         158     substr( shift, 0, 1 ) eq '+';
717             }
718              
719             sub _tbs_is_line_contd {
720 3     3   6     my $self = shift;
721 3         10     substr( trim(shift), -1, 1 ) eq "\\";
722             }
723              
724             sub _jnl_is_line_contd {
725 21     21   31     my $self = shift;
726 21 100       497     return 0 if $self->lines_parsed == 1;
727 20         65     return length( trim(shift) ) > 0;
728             }
729              
730             sub _def_join_last_line {
731 107     107   220     my ( $self, $last, $line ) = ( shift, shift, shift );
732 107         378     return $last . $line;
733             }
734              
735             sub _spice_join_last_line {
736 3     3   7     my ( $self, $last, $line ) = ( shift, shift, shift );
737 3         8     chomp $last;
738 3         21     $line =~ s/^[+]\s*/ /;
739 3         14     $last . $line;
740             }
741              
742             sub _tbs_join_last_line {
743 2     2   5     my ( $self, $last, $line ) = ( shift, shift, shift );
744 2         6     chomp $last;
745 2         10     $last =~ s/\\\s*$//;
746 2         10     rtrim($last) . ' ' . ltrim($line);
747             }
748              
749             sub _jnl_join_last_line {
750 18     18   38     my ( $self, $last, $line ) = ( shift, shift, shift );
751 18         44     chomp $last;
752 18         59     return $last . $line;
753             }
754              
755              
756             __PACKAGE__->meta->make_immutable;
757              
758 42     42   235822 no Moose;
  42         131  
  42         403  
759              
760             1;
761              
762             __END__
763            
764             =pod
765            
766             =encoding UTF-8
767            
768             =head1 NAME
769            
770             Text::Parser - Simplifies text parsing. Easily extensible to parse any text format.
771            
772             =head1 VERSION
773            
774             version 1.000
775            
776             =head1 SYNOPSIS
777            
778             The following prints the content of the file (named in the first argument) to C<STDOUT>.
779            
780             use Text::Parser;
781            
782             my $parser = Text::Parser->new();
783             $parser->read(shift);
784             print $parser->get_records, "\n";
785            
786             The above code prints after reading the whole file, which can be slow if you have large fules. This following prints contents immediately.
787            
788             my $parser = Text::Parser->new();
789             $parser->add_rule(do => 'print', dont_record => 1);
790             ($#ARGV > 0) ? $parser->filename(shift) : $parser->filehandle(\*STDIN);
791             $parser->read(); # Runs the rule for each line of input file
792            
793             Also, the third line there allows this program to read from a file name specified on command-line, or C<STDIN>. In effect, this makes this Perl code a good replica of the UNIX C<cat>.
794            
795             Here is an example with a simple rule that extracts the first error in the logfile and aborts reading further:
796            
797             my $parser = Text::Parser->new();
798             $parser->add_rule(
799             if => '$1 eq "ERROR:"',
800             # $1 is a positional identifier for first 'field' on the line
801             do => '$this->abort_reading; return $_;'
802             # $this is copy of $parser accessible from within the rule
803             # abort_reading() tells parser to stop reading further
804             # Returned values are saved as records. Any data structure can be saved.
805             # $_ contains the full line as string, including any whitespaces
806             );
807            
808             # Returns the first line starting with "ERROR:"
809             $parser->read('/path/to/logfile');
810            
811             print "Some errors were found:\n" if $parser->get_records();
812            
813             See L<this important note|Text::Parser::Manual::ExtendedAWKSyntax/"Important note about quotes"> about using single quotes instead of double quotes.
814            
815             Here is an example that parses a table with field separators indicated by C<|> character:
816            
817             use Data::Dumper 'Dumper';
818             my $table_parser = Text::Parser->new( FS => qr/\s*[|]\s*/ );
819             $table_parser->add_rule(
820             if => '$this->NF == 0',
821             dont_record => 1
822             );
823             $table_parser->add_rule(
824             if => '$this->lines_parsed == 1',
825             do => '~columns = [$this->fields()];'
826             );
827             $table_parser->add_rule(
828             if => '$this->lines_parsed > 1',
829             do => 'my %rec = ();
830             foreach my $i (0..$#{~columns}) {
831             my $k = ~columns->[$i];
832             $rec{$k} = $this->field($i);
833             }
834             return \%rec;',
835             );
836             $table_parser->read('table.txt');
837             print Dumper($table_parser->get_records()), "\n";
838            
839             In the above example you see the use of a L<stashed variable|/"METHODS FOR ACCESSING STASHED VARIABLES"> named C<~columns>. Note that the sigil used here is not a Perl sigil, but is converted to native Perl code. In the above case, each record is a hash with fixed number of fields.
840            
841             More complex file-formats can be read and contents stored in a data-structure or an object. Here is an example:
842            
843             use strict;
844             use warnings;
845            
846             package ComplexFormatParser;
847            
848             use Text::Parser::RuleSpec; ## provides applies_rule + other sugar, imports Moose
849             extends 'Text::Parser';
850            
851             # This rule ignores all comments
852             applies_rule ignore_comments => (
853             if => 'substr($1, 0, 1) eq "#"',
854             dont_record => 1,
855             );
856            
857             # An attribute of the parser class.
858             has current_section => (
859             is => 'rw',
860             isa => 'Str',
861             default => undef,
862             );
863            
864             applies_rule get_header => (
865             if => '$1 eq "SECTION"',
866             do => '$this->current_section($2);', # $this : this parser object
867             dont_record => 1,
868             );
869            
870             # ... More can be done
871            
872             package main;
873             use ComplexFormatParser;
874            
875             my $p = ComplexFormatParser->new();
876             $p->read('myfile.complex.fmt');
877            
878             =head1 RATIONALE
879            
880             The L<need|Text::Parser::Manual/MOTIVATION> for this class stems from the fact that text parsing is the most common thing that programmers do, and yet there is no lean, simple way to do it in Perl. Most programmers still write boilerplate code with a C<while> loop.
881            
882             Instead C<Text::Parser> allows programmers to parse text with simple, self-explanatory L<rules|Text::Parser::Manual::ExtendedAWKSyntax>, whose structure is very similar to L<AWK|https://books.google.com/books/about/The_AWK_Programming_Language.html?id=53ueQgAACAAJ>, but extends beyond the capability of AWK.
883            
884             I<B<Sidenote:>> Incidentally, AWK is L<one of the ancestors of Perl|http://history.perl.org/PerlTimeline.html>! One would have expected Perl to do way better than AWK. But while you can use Perl to do what AWK already does, that is usually limited to one-liners like C<perl -lane>. Even C<perl -lan script.pl> is not meant for serious projects. And it seems that L<some people still prefer AWK to Perl|https://aplawrence.com/Unixart/awk-vs.perl.html>. This is not looking good.
885            
886             =head1 OVERVIEW
887            
888             With C<Text::Parser>, a developer can focus on specifying a grammar and then simply C<read> the file. The C<L<read|/read>> method automatically runs each rule collecting records from the text input into an internal array. Finally, C<L<get_records|/get_records>> can retrieve the records.
889            
890             Since C<Text::Parser> is a class, a programmer can subclass it to parse very complex file formats. L<Text::Parser::RuleSpec> provides intuitive rule sugar. Use of L<Moose> is encouraged. And data from parsed files can be turned into very complex data-structures or even objects. In this case, you wouldn't need to use C<get_records>.
891            
892             With B<L<Text::Parser>> programmers have the elegance and simplicity of AWK combined with the power of Perl at their disposal.
893            
894             =head1 CONSTRUCTOR
895            
896             =head2 new
897            
898             Takes optional attributes as in example below. See section L<ATTRIBUTES|/ATTRIBUTES> for a list of the attributes and their description.
899            
900             my $parser = Text::Parser->new();
901            
902             my $parser2 = Text::Parser->new( line_wrap_style => 'trailing_backslash' );
903            
904             =head1 ATTRIBUTES
905            
906             The attributes below can be used as options to the C<new> constructor. Each attribute has an accessor with the same name.
907            
908             =head2 auto_chomp
909            
910             Read-write attribute. Takes a boolean value as parameter. Defaults to C<0>.
911            
912             print "Parser will chomp lines automatically\n" if $parser->auto_chomp;
913            
914             =head2 auto_split
915            
916             Read-write boolean attribute. Defaults to C<0> (false). Indicates if the parser will automatically split every line into fields.
917            
918             If it is set to a true value, each line will be split into fields, and L<a set of methods|/"METHODS USED ONLY IN RULES AND SUBCLASSES"> become accessible to C<L<save_record|/save_record>> or the rules.
919            
920             =head2 auto_trim
921            
922             Read-write attribute. The values this can take are shown under the C<L<new|/new>> constructor also. Defaults to C<'n'> (neither side spaces will be trimmed).
923            
924             $parser->auto_trim('l'); # 'l' (left), 'r' (right), 'b' (both), 'n' (neither) (Default)
925            
926             =head2 custom_line_trimmer
927            
928             Read-write attribute which can be set to a custom subroutine that trims each line before applying any rules or saving any records. The function is expected to take a single argument containing the complete un-trimmed line, and is expected to return a manipulated line.
929            
930             sub _cust_trimmer {
931             my $line = shift;
932             chomp $line;
933             return $line;
934             }
935            
936             $parser->custom_line_trimmer(\&_cust_trimmer);
937            
938             B<Note:> If you set this attribute, you are entirely responsible for the trimming. Poorly written routines could causing the C<auto_split> operation to misbehave.
939            
940             By default it is undefined.
941            
942             =head2 FS
943            
944             Read-write attribute that can be used to specify the field separator to be used by the C<auto_split> feature. It must be a regular expression reference enclosed in the C<qr> function, like C<qr/\s+|[,]/> which will split across either spaces or commas. The default value for this attribute is C<qr/\s+/>.
945            
946             The name for this attribute comes from the built-in C<FS> variable in the popular L<GNU Awk program|https://www.gnu.org/software/gawk/gawk.html>. The ability to use a regular expression is an upgrade from AWK.
947            
948             $parser->FS( qr/\s+\(*|\s*\)/ );
949            
950             C<FS> I<can> be changed from within a rule. Changes made even within a rule would take effect on the immediately next line read.
951            
952             =head2 indentation_str
953            
954             This can be used to set the indentation character or string. By default it is a single space C< >. But you may want to set it to be a tab (C<\t>) or perhaps some other character like a hyphen (C<->) or even a string (C< -E<gt>>). This attribute is used only if C<L<track_indentation|/track_indentation>> is set.
955            
956             =head2 line_wrap_style
957            
958             Read-write attribute used as a quick way to select from commonly known line-wrapping styles. If the target text format allows line-wrapping this attribute allows the programmer to write rules as if they were on a single line.
959            
960             $parser->line_wrap_style('trailing_backslash');
961            
962             Allowed values are:
963            
964             trailing_backslash - very common style ending lines with \
965             and continuing on the next line
966            
967             spice - used for SPICE syntax, where the (+)
968             + symbol continues content of last line
969            
970             just_next_line - used in simple text files written to be
971             humanly-readable. New paragraphs start
972             on a new line after a blank line.
973            
974             slurp - used to "slurp" the whole file into
975             a single line.
976            
977             custom - user-defined style. User must specify
978             value of multiline_type and define
979             two custom unwrap routines using the
980             custom_line_unwrap_routines method
981             when custom is chosen.
982            
983             When C<line_wrap_style> is set to one of these values, the value of C<multiline_type> is automatically set to an appropriate value. Read more about L<handling the common line-wrapping styles|/"Common line-wrapping styles">.
984            
985             =head2 multiline_type
986            
987             Read-write attribute used mainly if the programmer wishes to specify custom line-unwrapping methods. By default, this attribute is C<undef>, i.e., the target text format will not have wrapped lines.
988            
989             $parser->line_wrap_style('custom');
990             $parser->multiline_type('join_next');
991            
992             my $mult = $parser->multiline_type;
993             print "Parser is a multi-line parser of type: $mult" if defined $mult;
994            
995             Allowed values for C<multiline_type> are described below, but it can also be set back to C<undef>.
996            
997             =over 4
998            
999             =item *
1000            
1001             If the target format allows line-wrapping I<to the B<next>> line, set C<multiline_type> to C<join_next>.
1002            
1003             =item *
1004            
1005             If the target format allows line-wrapping I<from the B<last>> line, set C<multiline_type> to C<join_last>.
1006            
1007             =back
1008            
1009             To know more about how to use this, read about L<specifying custom line-unwrap routines|/"Specifying custom line-unwrap routines">.
1010            
1011             =head2 track_indentation
1012            
1013             This boolean attribute enables tracking of the number of indentation characters are there at the beginning of each line. In some text formats, this is a very important information that can indicate the depth of some data. By default, this is false. When set to a true value, you can get the number of indentation characters on a given line with the C<L<this_indent|/this_indent>> method.
1014            
1015             $parser->track_indentation(1);
1016            
1017             Now you can use C<this_indent> method in the rules:
1018            
1019             $parser->add_rule(if => '$this->this_indent > 0', do => '~num_indented ++;')
1020            
1021             =head1 METHODS FOR SPECIFYING RULES
1022            
1023             These are meant to be called from the C<::main> program or within subclasses.
1024            
1025             =head2 add_rule
1026            
1027             Takes a hash as input. The keys of this hash must be the attributes of the L<Text::Parser::Rule> class constructor and the values should also meet the requirements of that constructor.
1028            
1029             $parser->add_rule(do => '', dont_record => 1); # Empty rule: does nothing
1030             $parser->add_rule(if => 'm/li/, do => 'print', dont_record); # Prints lines with 'li'
1031             $parser->add_rule( do => 'uc($3)' ); # Saves records of upper-cased third elements
1032            
1033             Calling this method without any arguments will throw an exception. The method internally sets the C<auto_split> attribute.
1034            
1035             =head2 clear_rules
1036            
1037             Takes no arguments, returns nothing. Clears the rules that were added to the object.
1038            
1039             $parser->clear_rules;
1040            
1041             This is useful to be able to re-use the parser after a C<read> call, to parse another text with another set of rules. The C<clear_rules> method does clear even the rules set up by C<L<BEGIN_rule|/BEGIN_rule>> and C<L<END_rule|/END_rule>>.
1042            
1043             =head2 BEGIN_rule
1044            
1045             Takes a hash input like C<add_rule>, but C<if> and C<continue_to_next> keys will be ignored.
1046            
1047             $parser->BEGIN_rule(do => '~count = 0;');
1048            
1049             =over 4
1050            
1051             =item *
1052            
1053             Since any C<if> key is ignored, the C<do> key is required. Multiple calls to C<BEGIN_rule> will append to the previous calls; meaning, the actions of previous calls will be included.
1054            
1055             =item *
1056            
1057             The C<BEGIN_rule> is mainly used to initialize some variables.
1058            
1059             =item *
1060            
1061             By default C<dont_record> is set true. User I<can> change this and set C<dont_record> as false, thus forcing a record to be saved even before reading the first line of text.
1062            
1063             =back
1064            
1065             =head2 END_rule
1066            
1067             Takes a hash input like C<add_rule>, but C<if> and C<continue_to_next> keys will be ignored. Similar to C<BEGIN_rule>, but the actions in the C<END_rule> will be executed at the end of the C<read> method.
1068            
1069             $parser->END_rule(do => 'print ~count, "\n";');
1070            
1071             =over 4
1072            
1073             =item *
1074            
1075             Since any C<if> key is ignored, the C<do> key is required. Multiple calls to C<END_rule> will append to the previous calls; meaning, the actions of previous calls will be included.
1076            
1077             =item *
1078            
1079             The C<END_rule> is mainly used to do final processing of collected records.
1080            
1081             =item *
1082            
1083             By default C<dont_record> is set true. User I<can> change this and set C<dont_record> as false, thus forcing a record to be saved after the end rule is processed.
1084            
1085             =back
1086            
1087             =head1 METHODS USED ONLY IN RULES AND SUBCLASSES
1088            
1089             These methods can be used only inside rules, or methods of a subclass. Some of these methods are available only when C<auto_split> is on. They are listed as follows:
1090            
1091             =over 4
1092            
1093             =item *
1094            
1095             L<NF|Text::Parser::AutoSplit/NF> - number of fields on this line
1096            
1097             =item *
1098            
1099             L<fields|Text::Parser::AutoSplit/fields> - all the fields as an array of strings ; trailing C<\n> removed
1100            
1101             =item *
1102            
1103             L<field|Text::Parser::AutoSplit/field> - access individual elements of the array above ; negative arguments count from back
1104            
1105             =item *
1106            
1107             L<field_range|Text::Parser::AutoSplit/field_range> - array of fields in the given range of indices ; negative arguments allowed
1108            
1109             =item *
1110            
1111             L<join_range|Text::Parser::AutoSplit/join_range> - join the fields in the range of indices ; negative arguments allowed
1112            
1113             =item *
1114            
1115             L<find_field|Text::Parser::AutoSplit/find_field> - returns field for which a given subroutine is true ; each field is passed to the subroutine in C<$_>
1116            
1117             =item *
1118            
1119             L<find_field_index|Text::Parser::AutoSplit/find_field_index> - similar to above, except it returns the index of the field instead of the field itself
1120            
1121             =item *
1122            
1123             L<splice_fields|Text::Parser::AutoSplit/splice_fields> - like the native Perl C<splice>
1124            
1125             =back
1126            
1127             Other methods described below are also to be used only inside a rule, or inside methods called by the rules.
1128            
1129             =head2 abort_reading
1130            
1131             Takes no arguments. Returns C<1>. Aborts C<read>ing any more lines, and C<read> method exits gracefully as if nothing unusual happened.
1132            
1133             $parser->add_rule(
1134             do => '$this->abort_reading;',
1135             if => '$1 eq "EOF"',
1136             dont_record => 1,
1137             );
1138            
1139             =head2 this_indent
1140            
1141             Takes no arguments, and returns the number of indentation characters found at the front of the current line. This can be called from within a rule:
1142            
1143             $parser->add_rule( if => '$this->this_indent > 0', );
1144            
1145             =head2 this_line
1146            
1147             Takes no arguments, and returns the current line being parsed. For example:
1148            
1149             $parser->add_rule(
1150             if => 'length($this->this_line) > 256',
1151             );
1152             ## Saves all lines longer than 256 characters
1153            
1154             Inside rules, instead of using this method, one may also use C<$_>:
1155            
1156             $parser->add_rule(
1157             if => 'length($_) > 256',
1158             );
1159            
1160             =head1 METHODS FOR READING INPUT
1161            
1162             =head2 filename
1163            
1164             Takes an optional string argument containing the name of a file. Returns the name of the file that was last opened if any. Returns C<undef> if no file has been opened.
1165            
1166             print "Last read ", $parser->filename, "\n";
1167            
1168             The value stored is "persistent" - meaning that the method remembers the last file that was C<L<read|/read>>.
1169            
1170             $parser->read(shift @ARGV);
1171             print $parser->filename(), ":\n",
1172             "=" x (length($parser->filename())+1),
1173             "\n",
1174             $parser->get_records(),
1175             "\n";
1176            
1177             A C<read> call with a filehandle, will clear the last file name.
1178            
1179             $parser->read(\*MYFH);
1180             print "Last file name is lost\n" if not defined $parser->filename();
1181            
1182             =head2 filehandle
1183            
1184             Takes an optional argument, that is a filehandle C<GLOB> (such as C<\*STDIN>) or an object of the C<FileHandle> class. Returns the filehandle last saved, or C<undef> if none was saved.
1185            
1186             my $fh = $parser->filehandle();
1187            
1188             Like C<L<filename|/filename>>, C<filehandle> is also "persistent". Its old value is lost when either C<filename> is set, or C<read> is called with a filename.
1189            
1190             $parser->read(\*STDOUT);
1191             my $lastfh = $parser->filehandle(); # Will return glob of STDOUT
1192            
1193             =head2 read
1194            
1195             Takes a single optional argument that can be either a string containing the name of the file, or a filehandle reference (a C<GLOB>) like C<\*STDIN> or an object of the C<L<FileHandle>> class.
1196            
1197             $parser->read($filename); # Read the file
1198             $parser->read(\*STDIN); # Read the filehandle
1199            
1200             The above could also be done in two steps if the developer so chooses.
1201            
1202             $parser->filename($filename);
1203             $parser->read(); # equiv: $parser->read($filename)
1204            
1205             $parser->filehandle(\*STDIN);
1206             $parser->read(); # equiv: $parser->read(\*STDIN)
1207            
1208             The method returns once all records have been read, or if an exception is thrown, or if reading has been aborted with the C<L<abort_reading|/abort_reading>> method.
1209            
1210             Any C<close> operation will be handled (even if any exception is thrown), as long as C<read> is called with a file name parameter - not if you call with a file handle or C<GLOB> parameter.
1211            
1212             $parser->read('myfile.txt'); # Will close file automatically
1213            
1214             open MYFH, "<myfile.txt" or die "Can't open file myfile.txt at ";
1215             $parser->read(\*MYFH); # Will not close MYFH
1216             close MYFH;
1217            
1218             =head1 METHODS FOR HANDLING RECORDS
1219            
1220             =head2 get_records
1221            
1222             Takes no arguments. Returns an array containing all the records saved by the parser.
1223            
1224             foreach my $record ( $parser->get_records ) {
1225             $i++;
1226             print "Record: $i: ", $record, "\n";
1227             }
1228            
1229             =head2 last_record
1230            
1231             Takes no arguments and returns the last saved record. Leaves the saved records untouched.
1232            
1233             my $last_rec = $parser->last_record;
1234            
1235             =head2 pop_record
1236            
1237             Takes no arguments and pops the last saved record.
1238            
1239             my $last_rec = $parser->pop_record;
1240            
1241             =head2 push_records
1242            
1243             Takes an array as input, and stores each element as a separate record. Returns the number of elements in the new array.
1244            
1245             $parser->push_records(qw(insert these as separate records));
1246            
1247             =head1 METHODS FOR ACCESSING STASHED VARIABLES
1248            
1249             Stashed variables can be data structures or simple scalar variables stored as elements in the parser object. Hence they are accessible across different rules. Stashed variables start with a tilde (~). So you could set up rules like these:
1250            
1251             $parser->BEGIN_rule( do => '~count=0;' );
1252             $parser->add_rule( if => '$1 eq "SECTION"', do => '~count++;' );
1253            
1254             In the above rule C<~count> is a stashed variable. Internally this is just a hash element with key named C<count>. After the C<read> call is over, this variable can be accessed.
1255            
1256             $parser->read('some_text_file.txt');
1257             print "Found ", $parser->stashed('count'), " sections in file.\n";
1258            
1259             Stashed variables that are created entirely within the rules are forgotten at the beginning of the next C<read> call. This means, you can C<read> another text file and don't have to bother to clear out the stashed variable C<~count>.
1260            
1261             $parser->read('another_text_file.txt');
1262             print "Found ", $parser->stashed('count'), " sections in file.\n";
1263            
1264             In contrast, stashed variables created by calling C<prestash> continue to persist for subsequent calls of C<read>, unless an explicit call to C<forget> names these pre-stashed variables.
1265            
1266             $parser->prestash( max_err => 100 );
1267             $parser->BEGIN_rule( do => '~err_count = 0;' );
1268             $parser->add_rule(
1269             if => '$1 eq "ERROR:" && ~err_count < ~max_err',
1270             do => '~err_count++;',
1271             continue_to_next => 1,
1272             );
1273             $parser->add_rule(
1274             if => '$1 eq "ERROR:" && ~err_count == ~max_err',
1275             do => '$this->abort_reading;',
1276             );
1277             $parser->read('first.log');
1278             print "Top 100 errors:\n", $parser->get_records, "\n";
1279            
1280             $parser->read('another.log'); # max_err is still set to 100, but err_count is forgotten and reset to 0 by the BEGIN_rule
1281             print "Top 100 errors:\n", $parser->get_records, "\n";
1282            
1283             =head2 forget
1284            
1285             Takes an optional list of string arguments which must be the names of stashed variables. This method forgets those stashed variables for ever. So be sure you really intend to do this. In list context, this method returns the values of the variables whose names were passed to the method. In scalar context, it returns the last value of the last stashed variable passed.
1286            
1287             my $pop_and_forget_me = $parser->forget('forget_me_totally', 'pop_and_forget_me');
1288            
1289             Inside rules, you could simply C<delete> the stashed variable like this:
1290            
1291             $parser->add_rule( do => 'delete ~forget_me;' );
1292            
1293             The above C<delete> statement works because the stashed variable C<~forget_me> is just a hash key named C<forget_me> internally. Using this on pre-stashed variables, will only temporarily delete the variable. It will be present in subsequent calls to C<read>. If you want to delete it completely call C<forget> with the pre-stashed variable name as an argument.
1294            
1295             When no arguments are passed, it clears all stashed variables (not pre-stashed).
1296            
1297             $parser->forget;
1298            
1299             Note that when C<forget> is called with no arguments, pre-stashed variables are not deleted and are still accessible in subsequent calls to C<read>. To forget a pre-stashed variable, it needs to be explicitly named in a call to forget. Then it is forgotten.
1300            
1301             A call to C<forget> method is done without any arguments, right before C<read> starts reading a new text input. That is how we can reset the values of stashed variables, but still retain pre-stashed variables.
1302            
1303             =head2 has_empty_stash
1304            
1305             Takes no arguments and returns a true value if the stash of variables is empty (i.e., no stashed variables are present). If not, it returns a boolean false.
1306            
1307             if ( not $parser->has_empty_stash ) {
1308             my $myvar = $parser->stashed('myvar');
1309             print "myvar = $myvar\n";
1310             }
1311            
1312             =head2 has_stashed
1313            
1314             Takes a single string argument and returns a boolean indicating if there is a stashed variable with that name or not:
1315            
1316             if ( $parser->has_stashed('stashed_var') ) {
1317             print "Here is what stashed_var contains: ", $parser->stashed('stashed_var');
1318             }
1319            
1320             Inside rules you could check this with the C<exists> keyword:
1321            
1322             $parser->add_rule( if => 'exists ~stashed_var' );
1323            
1324             =head2 prestash
1325            
1326             Takes an even number of arguments, or a hash, with variable name and value as pairs. This is useful to preset some stash variables before C<read> is called so that the rules have some variables accessible inside them. The main difference between pre-stashed variables created via C<prestash> and those created in the rules or using C<stashed> is that the pre-stashed ones are static.
1327            
1328             $parser->prestash(pattern => 'string');
1329             $parser->add_rule( if => 'my $patt = ~pattern; m/$patt/;' );
1330            
1331             You may change the value of a C<prestash>ed variable inside any of the rules.
1332            
1333             =head2 stashed
1334            
1335             Takes an optional list of string arguments each with the name of a stashed variable you want to query, i.e., get the value of. In list context, it returns their values in the same order as the queried variables, and in scalar context it returns the value of the last variable queried.
1336            
1337             my (%var_vals) = $parser->stashed;
1338             my (@vars) = $parser->stashed( qw(first second third) );
1339             my $third = $parser->stashed( qw(first second third) ); # returns value of last variable listed
1340             my $myvar = $parser->stashed('myvar');
1341            
1342             Or you could do this:
1343            
1344             use Data::Dumper 'Dumper';
1345            
1346             if ( $parser->has_empty_stash ) {
1347             print "Nothing on my stash\n";
1348             } else {
1349             my %stash = $parser->stashed;
1350             print Dumper(\%stash), "\n";
1351             }
1352            
1353             =head1 MISCELLANEOUS METHODS
1354            
1355             =head2 lines_parsed
1356            
1357             Takes no arguments. Returns the number of lines last parsed. Every call to C<read>, causes the value to be auto-reset.
1358            
1359             print $parser->lines_parsed, " lines were parsed\n";
1360            
1361             =head2 has_aborted
1362            
1363             Takes no arguments, returns a boolean to indicate if text reading was aborted in the middle.
1364            
1365             print "Aborted\n" if $parser->has_aborted();
1366            
1367             =head2 custom_line_unwrap_routines
1368            
1369             This method should be used only when the line-wrapping supported by the text format is not already among the L<known line-wrapping styles supported|/"Common line-wrapping styles">.
1370            
1371             Takes a hash argument with required keys C<is_wrapped> and C<unwrap_routine>. Used in setting up L<custom line-unwrapping routines|/"Specifying custom line-unwrap routines">.
1372            
1373             Here is an example of setting custom line-unwrapping routines:
1374            
1375             $parser->multiline_type('join_last');
1376             $parser->custom_line_unwrap_routines(
1377             is_wrapped => sub { # A method that detects if this line is wrapped or not
1378             my ($self, $this_line) = @_;
1379             $this_line =~ /^[~]/;
1380             },
1381             unwrap_routine => sub { # A method to unwrap the line by joining it with the last line
1382             my ($self, $last_line, $this_line) = @_;
1383             chomp $last_line;
1384             $last_line =~ s/\s*$//g;
1385             $this_line =~ s/^[~]\s*//g;
1386             "$last_line $this_line";
1387             },
1388             );
1389            
1390             Now you can parse a file with the following content:
1391            
1392             This is a long line that is wrapped around with a custom
1393             ~ character - the tilde. It is unusual, but hey, we're
1394             ~ showing an example.
1395            
1396             When C<$parser> gets to C<read> this, these three lines get unwrapped and processed by the rules, as if it were a single line.
1397            
1398             L<Text::Parser::Multiline> shows another example with C<join_next> type.
1399            
1400             =head1 METHODS THAT MAY BE OVERRIDDEN IN SUBCLASSES
1401            
1402             The following methods should never be called in the C<::main> program. They may be overridden (or re-defined) in a subclass.
1403            
1404             Starting version 0.925, users should never need to override any of these methods to make their own parser.
1405            
1406             =head2 save_record
1407            
1408             The default implementation takes a single argument, runs any rules, and saves the returned value as a record in an internal array. If nothing is returned from the rule, C<undef> is stored as a record.
1409            
1410             B<Note>: Starting C<0.925> version of C<Text::Parser> it is not required to override this method in your derived class. In most cases, you should use the rules.
1411            
1412             B<Importnant Note:> Starting version C<1.0> of C<Text::Parser> this method will be deprecated to improve performance. So avoid inheriting this method.
1413            
1414             =head2 is_line_continued
1415            
1416             The default implementation of this routine:
1417            
1418             multiline_type | Return value
1419             ------------------+---------------------------------
1420             undef | 0
1421             join_last | 0 for first line, 1 otherwise
1422             join_next | 1
1423            
1424             In earlier versions of L<Text::Parser> you had no way but to subclass L<Text::Parser> to change the routine that detects if a line is wrapped. Now you can instead select from a list of known C<line_wrap_style>s, or even set custom methods for this.
1425            
1426             =head2 join_last_line
1427            
1428             The default implementation of this routine takes two string arguments, joins them without any C<chomp> or any other operation, and returns that result.
1429            
1430             In earlier versions of L<Text::Parser> you had no way but to subclass L<Text::Parser> to select a line-unwrapping routine. Now you can instead select from a list of known C<line_wrap_style>s, or even set custom methods for this.
1431            
1432             =head1 THINGS TO DO FURTHER
1433            
1434             Future versions are expected to include features to:
1435            
1436             =over 4
1437            
1438             =item *
1439            
1440             read and parse from a buffer
1441            
1442             =item *
1443            
1444             automatically uncompress input
1445            
1446             =item *
1447            
1448             I<suggestions welcome ...>
1449            
1450             =back
1451            
1452             Contributions and suggestions are welcome and properly acknowledged.
1453            
1454             =head1 HANDLING LINE-WRAPPING
1455            
1456             Different text formats sometimes allow line-wrapping to make their content more human-readable. Handling this can be rather complicated if you use native Perl, but extremely easy with L<Text::Parser>.
1457            
1458             =head2 Common line-wrapping styles
1459            
1460             L<Text::Parser> supports a range of commonly-used line-unwrapping routines which can be selected using the C<L<line_wrap_style|Text::Parser/"line_wrap_style">> attribute. The attribute automatically sets up the parser to handle line-unwrapping for that specific text format.
1461            
1462             $parser->line_wrap_style('trailing_backslash');
1463             # Now when read runs the rules, all the back-slash
1464             # line-wrapped lines are auto-unwrapped to a single
1465             # line, and rules are applied on that single line
1466            
1467             When C<read> reads each line of text, it looks for any trailing backslash and unwraps the line. The next line may have a trailing back-slash too, and that too is unwrapped. Once the fully-unwrapped line has been identified, the rules are run on that unwrapped line, as if the file had no line-wrapping at all. So say the content of a line is like this:
1468            
1469             This is a long line wrapped into multiple lines \
1470             with a back-slash character. This is a very common \
1471             way to wrap long lines. In general, line-wrapping \
1472             can be much easier on the reader's eyes.
1473            
1474             When C<read> runs any rules in C<$parser>, the text above appears as a single line to the rules.
1475            
1476             =head2 Specifying custom line-unwrap routines
1477            
1478             I have included the common types of line-wrapping styles known to me. But obviously there can be more. To specify a custom line-unwrapping style follow these steps:
1479            
1480             =over 4
1481            
1482             =item *
1483            
1484             Set the C<L<multiline_type|/"multiline_type">> attribute appropriately. If you do not set this, your custom unwrapping routines won't have any effect.
1485            
1486             =item *
1487            
1488             Call C<L<custom_line_unwrap_routines|/"custom_line_unwrap_routines">> method. If you forget to call this method, or if you don't provide appropriate arguments, then an exception is thrown.
1489            
1490             =back
1491            
1492             L<Here|/"custom_line_unwrap_routines"> is an example with C<join_last> value for C<multiline_type>. And L<here|Text::Parser::Multiline/"SYNOPSIS"> is an example using C<join_next>. You'll notice that in both examples, you need to specify both routines. In fact, if you don't
1493            
1494             =head2 Line-unwrapping in a subclass
1495            
1496             You may subclass C<Text::Paser> to parse your specific text format. And that format may support some line-wrapping. To handle the known common line-wrapping styles, set a default value for C<line_wrap_style>. For example:
1497            
1498             =over 4
1499            
1500             =item *
1501            
1502             Set a default value for C<line_wrap_style>. For example, the following uses one of the supported common line-unwrap methods. has '+line_wrap_style' => ( default => 'spice', );
1503            
1504             =back
1505            
1506             * Setup custom line-unwrap routines with C<unwraps_lines> from L<Text::Parser::RuleSpec>.
1507            
1508             use Text::Parser::RuleSpec;
1509             extends 'Text::Parser';
1510            
1511             has '+line_wrap_style' => ( default => 'slurp', is => 'ro');
1512             has '+multiline_type' => ( is => 'ro' );
1513            
1514             Of course, you don't I<have> to make them read-only.
1515            
1516             To setup custom line-unwrapping routines in a subclass, you can use the C<L<unwraps_lines_using|Text::Parser::RuleSpec/"unwraps_lines_using">> syntax sugar from L<Text::Parser::RuleSpec>. For example:
1517            
1518             package MyParser;
1519            
1520             use Text::Parser::RuleSpec;
1521             extends 'Text::Parser';
1522            
1523             has '+multiline_type' => (
1524             default => 'join_next',
1525             is => 'ro',
1526             );
1527            
1528             unwraps_lines_using(
1529             is_wrapped => \&_my_is_wrapped_routine,
1530             unwrap_routine => \&_my_unwrap_routine,
1531             );
1532            
1533             =head1 SEE ALSO
1534            
1535             =over 4
1536            
1537             =item *
1538            
1539             L<Text::Parser::Manual> - Read this manual to learn how to do cool things with this class
1540            
1541             =item *
1542            
1543             L<Text::Parser::Error> - there is a change in how exceptions are thrown by this class. Read this page for more information.
1544            
1545             =item *
1546            
1547             L<The AWK Programming Language|https://books.google.com/books/about/The_AWK_Programming_Language.html?id=53ueQgAACAAJ> - by B<A>ho, B<W>einberg, and B<K>ernighan.
1548            
1549             =item *
1550            
1551             L<Text::Parser::Multiline> - how to read line-wrapped text input
1552            
1553             =back
1554            
1555             =head1 BUGS
1556            
1557             Please report any bugs or feature requests on the bugtracker website
1558             L<http://github.com/balajirama/Text-Parser/issues>
1559            
1560             When submitting a bug or request, please include a test-file or a
1561             patch to an existing test-file that illustrates the bug or desired
1562             feature.
1563            
1564             =head1 AUTHOR
1565            
1566             Balaji Ramasubramanian <balajiram@cpan.org>
1567            
1568             =head1 COPYRIGHT AND LICENSE
1569            
1570             This software is copyright (c) 2018-2019 by Balaji Ramasubramanian.
1571            
1572             This is free software; you can redistribute it and/or modify it under
1573             the same terms as the Perl 5 programming language system itself.
1574            
1575             =head1 CONTRIBUTORS
1576            
1577             =for stopwords H.Merijn Brand - Tux Mohammad S Anwar
1578            
1579             =over 4
1580            
1581             =item *
1582            
1583             H.Merijn Brand - Tux <h.m.brand@xs4all.nl>
1584            
1585             =item *
1586            
1587             Mohammad S Anwar <mohammad.anwar@yahoo.com>
1588            
1589             =back
1590            
1591             =cut
1592