File Coverage

blib/lib/Text/Parser.pm

Criterion	Covered	Total	%
statement	284	284	100.0
branch	125	126	99.2
condition	32	33	96.9
subroutine	78	78	100.0
pod	14	15	93.3
total	533	536	99.4

line	stmt	bran	cond	sub	pod	time	code
1	42			42		1720841	use warnings;
	42					339
	42					1475
2	42			42		243	use strict;
	42					105
	42					1041
3	42			42		219	use feature ':5.14';
	42					88
	42					7447
4
5							package Text::Parser 1.000;
6
7							# ABSTRACT: Simplifies text parsing. Easily extensible to parse any text format.
8
9
10	42			42		14924	use Moose;
	42					12352250
	42					326
11	42			42		354246	use MooseX::CoverableModifiers;
	42					250627
	42					323
12	42			42		21346	use MooseX::StrictConstructor;
	42					1042105
	42					229
13	42			42		365513	use namespace::autoclean;
	42					107
	42					278
14	42			42		4228	use Moose::Util 'apply_all_roles', 'ensure_all_roles';
	42					114
	42					457
15	42			42		15937	use Moose::Util::TypeConstraints;
	42					100
	42					400
16	42			42		123719	use String::Util qw(trim ltrim rtrim eqq);
	42					120725
	42					3740
17	42			42		15481	use Text::Parser::Error;
	42					162
	42					228
18	42			42		46073	use Text::Parser::Rule;
	42					196
	42					2499
19	42			42		26544	use Text::Parser::RuleSpec;
	42					208
	42					334
20	42			42		141544	use List::MoreUtils qw(natatime first_index);
	42					104
	42					395
21
22							enum 'Text::Parser::Types::MultilineType' => [qw(join_next join_last)];
23							enum 'Text::Parser::Types::LineWrapStyle' =>
24							[qw(trailing_backslash spice just_next_line slurp custom)];
25							enum 'Text::Parser::Types::TrimType' => [qw(l r b n)];
26
27							subtype 'NonEmptyStr', as 'Str', where { length $_ > 0 },
28							message {"$_ is an empty string"};
29
30	42			42		41563	no Moose::Util::TypeConstraints;
	42					109
	42					456
31	42			42		41692	use FileHandle;
	42					428211
	42					271
32
33							has _origclass => (
34							is => 'ro',
35							isa => 'Str',
36							lazy => 1,
37							default => '',
38							);
39
40
41							around BUILDARGS => sub {
42	55			55		1822	my ( $orig, $class ) = ( shift, shift );
43	55	100	100			613	return $class->$orig( @_, _origclass => $class ) if @_ > 1 or not @_;
44	2					6	my $ptr = shift;
45	2	100				13	parser_exception("Invalid parameters to Text::Parser constructor")
46							if ref($ptr) ne 'HASH';
47	1					3	$class->$orig( %{$ptr}, _origclass => $class );
	1					7
48							};
49
50							sub BUILD {
51	64			64	0	154994	my $self = shift;
52	64	100				2266	ensure_all_roles $self, 'Text::Parser::AutoSplit' if $self->auto_split;
53	64	100				32915	return if not defined $self->multiline_type;
54	17					103	ensure_all_roles $self, 'Text::Parser::Multiline';
55							}
56
57
58							has auto_chomp => (
59							is => 'rw',
60							isa => 'Bool',
61							lazy => 1,
62							default => 0,
63							);
64
65
66							has auto_split => (
67							is => 'rw',
68							isa => 'Bool',
69							lazy => 1,
70							default => 0,
71							trigger => \&__newval_auto_split,
72							);
73
74							sub __newval_auto_split {
75	86			86		50070	my ( $self, $newval, $oldval ) = ( shift, shift, shift );
76	86	100				429	ensure_all_roles $self, 'Text::Parser::AutoSplit' if $newval;
77	86	100	100			355651	$self->_clear_all_fields if not $newval and $oldval;
78							}
79
80
81							has auto_trim => (
82							is => 'rw',
83							isa => 'Text::Parser::Types::TrimType',
84							lazy => 1,
85							default => 'n',
86							);
87
88
89							has custom_line_trimmer => (
90							is => 'rw',
91							isa => 'CodeRef\|Undef',
92							lazy => 1,
93							default => undef,
94							);
95
96
97							has FS => (
98							is => 'rw',
99							isa => 'RegexpRef',
100							lazy => 1,
101							default => sub {qr/\s+/},
102							);
103
104
105							has indentation_str => (
106							is => 'rw',
107							isa => 'NonEmptyStr',
108							lazy => 1,
109							default => ' ',
110							);
111
112
113							has line_wrap_style => (
114							is => 'rw',
115							isa => 'Text::Parser::Types::LineWrapStyle\|Undef',
116							default => undef,
117							lazy => 1,
118							trigger => \&_on_line_unwrap,
119							);
120
121							my %MULTILINE_VAL = (
122							default => undef,
123							spice => 'join_last',
124							trailing_backslash => 'join_next',
125							just_next_line => 'join_last',
126							slurp => 'join_last',
127							custom => undef,
128							);
129
130							sub _on_line_unwrap {
131	16			16		44	my ( $self, $val, $oldval ) = (@_);
132	16	100	100			75	return if not defined $val and not defined $oldval;
133	15	100				46	$val = 'default' if not defined $val;
134	15	100	100			69	return if $val eq 'custom' and defined $self->multiline_type;
135	9					37	$self->multiline_type( $MULTILINE_VAL{$val} );
136							}
137
138
139							has multiline_type => (
140							is => 'rw',
141							isa => 'Text::Parser::Types::MultilineType\|Undef',
142							lazy => 1,
143							default => undef,
144							);
145
146							around multiline_type => sub {
147	1783			1783		84484	my ( $orig, $self ) = ( shift, shift );
148	1783					44222	my $oldval = $orig->($self);
149	1783	100	100			11104	return $oldval if not @_ or eqq( $_[0], $oldval );
150	17					224	return __newval_multi_line( $orig, $self, @_ );
151							};
152
153							sub __newval_multi_line {
154	17			17		51	my ( $orig, $self, $newval ) = ( shift, shift, shift );
155	17					52	delete $self->{records}; # Bug W/A: role cannot apply if records exists
156	17	100				94	ensure_all_roles( $self, 'Text::Parser::Multiline' )
157							if defined $newval;
158	17					88615	return $orig->( $self, $newval );
159							}
160
161
162							has track_indentation => (
163							is => 'rw',
164							isa => 'Bool',
165							lazy => 1,
166							default => 0,
167							);
168
169
170							has _obj_rules => (
171							is => 'rw',
172							isa => 'ArrayRef[Text::Parser::Rule]',
173							lazy => 1,
174							default => sub { [] },
175							traits => ['Array'],
176							handles => {
177							_push_obj_rule => 'push',
178							_has_no_obj_rules => 'is_empty',
179							_get_obj_rules => 'elements',
180							},
181							);
182
183							sub add_rule {
184	26			26	1	83263	my $self = shift;
185	26	100				832	$self->auto_split(1) if not $self->auto_split;
186	26					847	my $rule = Text::Parser::Rule->new(@_);
187	26					1020	$self->_push_obj_rule($rule);
188							}
189
190
191							sub clear_rules {
192	2			2	1	6	my $self = shift;
193	2					77	$self->_obj_rules( [] );
194	2					77	$self->_clear_begin_rule;
195	2					72	$self->_clear_end_rule;
196							}
197
198
199							has _begin_rule => (
200							is => 'rw',
201							isa => 'Text::Parser::Rule',
202							predicate => '_has_begin_rule',
203							clearer => '_clear_begin_rule',
204							);
205
206							sub BEGIN_rule {
207	5			5	1	416	my $self = shift;
208	5	100				147	$self->auto_split(1) if not $self->auto_split;
209	5					24	my (%opt) = _defaults_for_begin_end(@_);
210	5					33	$self->_modify_rule( '_begin_rule', %opt );
211							}
212
213							sub _defaults_for_begin_end {
214	8			8		35	my (%opt) = @_;
215	8	100				34	$opt{dont_record} = 1 if not exists $opt{dont_record};
216	8	100				26	delete $opt{if} if exists $opt{if};
217	8	100				26	delete $opt{continue_to_next} if exists $opt{continue_to_next};
218	8					40	return (%opt);
219							}
220
221							sub _modify_rule {
222	8			8		28	my ( $self, $func, %opt ) = @_;
223	8					26	my $pred = '_has' . $func;
224	8	100				284	$self->_append_rule_lines( $func, \%opt ) if $self->$pred();
225	8					255	my $rule = Text::Parser::Rule->new(%opt);
226	8					244	$self->$func($rule);
227							}
228
229							sub _append_rule_lines {
230	3			3		19	my ( $self, $func, $opt ) = ( shift, shift, shift );
231	3					73	my $old = $self->$func();
232	3					90	$opt->{do} = $old->action . $opt->{do};
233							}
234
235
236							has _end_rule => (
237							is => 'rw',
238							isa => 'Text::Parser::Rule',
239							predicate => '_has_end_rule',
240							clearer => '_clear_end_rule',
241							);
242
243							sub END_rule {
244	3			3	1	28	my $self = shift;
245	3	100				101	$self->auto_split(1) if not $self->auto_split;
246	3					14	my (%opt) = _defaults_for_begin_end(@_);
247	3					16	$self->_modify_rule( '_end_rule', %opt );
248							}
249
250
251							has _indent_level => (
252							is => 'ro',
253							isa => 'Int\|Undef',
254							lazy => 1,
255							default => undef,
256							writer => '_set_indent_level',
257							reader => 'this_indent',
258							);
259
260
261							has _current_line => (
262							is => 'ro',
263							isa => 'Str\|Undef',
264							init_arg => undef,
265							writer => '_set_this_line',
266							reader => 'this_line',
267							clearer => '_clear_this_line',
268							default => undef,
269							);
270
271
272							has filename => (
273							is => 'rw',
274							isa => 'Str\|Undef',
275							lazy => 1,
276							init_arg => undef,
277							default => undef,
278							predicate => '_has_filename',
279							clearer => '_clear_filename',
280							trigger => \&_set_filehandle,
281							);
282
283							sub _set_filehandle {
284	93			93		22888	my $self = shift;
285	93	100				2680	return $self->_clear_filename if not defined $self->filename;
286	92					510	$self->_save_filehandle( $self->__get_valid_fh );
287							}
288
289							sub __get_valid_fh {
290	92			92		217	my $self = shift;
291	92					385	my $fname = $self->_get_valid_text_filename;
292	92	100				1333	return FileHandle->new( $fname, 'r' ) if defined $fname;
293	4					128	$fname = $self->filename;
294	4					118	$self->_clear_filename;
295	4					11	$self->_throw_invalid_file_exception($fname);
296							}
297
298							# Don't touch: Override this in Text::Parser::AutoUncompress
299							sub _get_valid_text_filename {
300	92			92		221	my $self = shift;
301	92					2290	my $fname = $self->filename;
302	92	100	66			30087	return $fname if -f $fname and -r $fname and -T $fname;
			100
303	4					22	return;
304							}
305
306							# Don't touch: Override this in Text::Parser::AutoUncompress
307							sub _throw_invalid_file_exception {
308	4			4		11	my ( $self, $fname ) = ( shift, shift );
309	4	100				67	parser_exception("Invalid filename $fname") if not -f $fname;
310	1	50				17	parser_exception("Cannot read $fname") if not -r $fname;
311	1					10	parser_exception("Not a plain text file $fname");
312							}
313
314
315							has filehandle => (
316							is => 'rw',
317							isa => 'FileHandle\|Undef',
318							lazy => 1,
319							init_arg => undef,
320							default => undef,
321							predicate => '_has_filehandle',
322							writer => '_save_filehandle',
323							reader => '_get_filehandle',
324							clearer => '_close_filehandles',
325							);
326
327							sub filehandle {
328	105			105	1	1358	my $self = shift;
329	105	100	100			3744	return if not @_ and not $self->_has_filehandle;
330	102	100				787	$self->_save_filehandle(@_) if @_;
331	98	100				450	$self->_clear_filename if @_;
332	98					3056	return $self->_get_filehandle;
333							}
334
335
336							sub read {
337	95			95	1	95797	my $self = shift;
338	95	100				456	return if not defined $self->_handle_read_inp(@_);
339	89					589	$self->_run_begin_end_block('_begin_rule');
340	89					491	$self->__read_and_close_filehandle;
341	78					295	$self->_run_begin_end_block('_end_rule');
342							}
343
344							sub _handle_read_inp {
345	95			95		249	my $self = shift;
346	95	100				318	return $self->filehandle if not @_;
347	93	100	100			555	return if not ref( $_[0] ) and not $_[0];
348	92	100				3063	return $self->filename(@_) if not ref( $_[0] );
349	6					21	return $self->filehandle(@_);
350							}
351
352							sub _before_begin {
353	89			89		175	my $self = shift;
354	89					437	$self->forget;
355	89	100				3679	$self->_preset_vars( $self->_all_preset ) if not $self->_has_no_prestash;
356							}
357
358							sub _after_end {
359	78			78		181	my $self = shift;
360	78					2199	my $h = $self->_hidden_stash;
361	78					165	$h->{$_} = $self->stashed($_) for ( keys %{$h} );
	78					530
362							}
363
364							sub _run_begin_end_block {
365	167			167		492	my ( $self, $func ) = ( shift, shift );
366	167	100				791	$self->_before_begin if $func eq '_begin_rule';
367	167					754	$self->_run_beg_end__($func);
368	167	100				923	$self->_after_end if $func eq '_end_rule';
369							}
370
371							sub _run_beg_end__ {
372	167			167		415	my ( $self, $func ) = ( shift, shift );
373	167					481	my $pred = '_has' . $func;
374	167	100				5634	return if not $self->$pred();
375	6					170	my $rule = $self->$func();
376	6					33	$rule->_run( $self, 0 );
377							}
378
379							sub __read_and_close_filehandle {
380	89			89		206	my $self = shift;
381	89					423	$self->_prep_to_read_file;
382	89					520	$self->__read_file_handle;
383	78					748	$self->_final_operations_after_read;
384							}
385
386							sub _prep_to_read_file {
387	89			89		248	my $self = shift;
388	89					3581	$self->_reset_line_count;
389	89					3442	$self->_empty_records;
390	89					3297	$self->_clear_abort;
391							}
392
393							sub __read_file_handle {
394	89			89		1221	my $self = shift;
395	89					504	my $fh = $self->filehandle();
396	89					2079	while (<$fh>) {
397	738	100				2068	last if not $self->__parse_line($_);
398							}
399							}
400
401							sub __parse_line {
402	738			738		1648	my ( $self, $line ) = ( shift, shift );
403	738					1692	$line = $self->_prep_line_for_parsing($line);
404	738					10687	$self->_set_this_line($line);
405	738					3943	$self->save_record($line);
406	730					19981	return not $self->has_aborted;
407							}
408
409							sub _prep_line_for_parsing {
410	738			738		1266	my ( $self, $line ) = ( shift, shift );
411	738					24667	$self->_next_line_parsed();
412	738	100				18410	$self->_find_indent_level($line) if $self->track_indentation;
413	738					2046	$line = $self->_line_manip($line);
414							}
415
416							sub _find_indent_level {
417	18			18		31	my ( $self, $line ) = ( shift, shift );
418	18					34	chomp $line;
419	18	100				439	length( $self->indentation_str ) >= 2
420							? $self->_find_long_indent_level($line)
421							: $self->_singlechar_indent($line);
422							}
423
424							sub _find_long_indent_level {
425	9			9		20	my ( $self, $line ) = ( shift, shift );
426	9					211	my $n = _num_matching( $self->indentation_str, $line );
427	9					284	$self->_set_indent_level($n);
428							}
429
430							sub _num_matching {
431	9			9		21	my ( $ch, $line ) = ( shift, shift );
432	9					94	my $it = natatime( length($ch), ( split //, $line ) );
433	9					36	my ( $i, @x ) = ( 0, $it->() );
434	9					32	while ( $ch eq join( '', @x ) ) {
435	1					6	( $i, @x ) = ( $i + 1, $it->() );
436							}
437	9					40	return $i;
438							}
439
440							sub _singlechar_indent {
441	9			9		20	my ( $self, $line ) = ( shift, shift );
442	9			10		94	my $n = first_index { $_ ne $self->indentation_str } ( split //, $line );
	10					244
443	9					283	$self->_set_indent_level($n);
444							}
445
446							sub _line_manip {
447	738			738		1576	my ( $self, $line ) = ( shift, shift );
448	738	100				18118	return $self->_def_line_manip($line)
449							if not defined $self->custom_line_trimmer;
450	1					26	my $cust = $self->custom_line_trimmer;
451	1					6	return $cust->($line);
452							}
453
454							sub _def_line_manip {
455	737			737		1562	my ( $self, $line ) = ( shift, shift );
456	737	100				15608	chomp $line if $self->auto_chomp;
457	737					1889	return $self->_trim_line($line);
458							}
459
460							sub _trim_line {
461	737			737		1434	my ( $self, $line ) = ( shift, shift );
462	737	100				15659	return $line if $self->auto_trim eq 'n';
463	69	100				563	return trim($line) if $self->auto_trim eq 'b';
464	8	100				179	return ltrim($line) if $self->auto_trim eq 'l';
465	4					15	return rtrim($line);
466							}
467
468							sub _final_operations_after_read {
469	78			78		199	my $self = shift;
470	78	100				2844	$self->_close_filehandles if $self->_has_filename;
471	78					1687	$self->_clear_this_line;
472	78	100				2144	$self->_set_indent_level(undef) if $self->track_indentation;
473							}
474
475
476							sub last_record {
477	16			16	1	712	my $self = shift;
478	16					653	my $count = $self->_num_records();
479	16	100				62	return if not $count;
480	15					540	return $self->_access_record( $count - 1 );
481							}
482
483
484							has records => (
485							isa => 'ArrayRef[Any]',
486							is => 'ro',
487							lazy => 1,
488							default => sub { return []; },
489							auto_deref => 1,
490							init_arg => undef,
491							traits => ['Array'],
492							predicate => '_has_records_attrib',
493							handles => {
494							get_records => 'elements',
495							push_records => 'push',
496							pop_record => 'pop',
497							_empty_records => 'clear',
498							_num_records => 'count',
499							_access_record => 'accessor',
500							},
501							);
502
503
504							has _stashed_vars => (
505							is => 'ro',
506							isa => 'HashRef[Any]',
507							default => sub { {} },
508							lazy => 1,
509							traits => ['Hash'],
510							handles => {
511							_clear_stash => 'clear',
512							_stashed => 'elements',
513							_has_stashed => 'exists',
514							_forget => 'delete',
515							has_empty_stash => 'is_empty',
516							_get_vars => 'get',
517							_preset_vars => 'set',
518							}
519							);
520
521							sub forget {
522	93			93	1	235	my $self = shift;
523	93	100				333	return $self->_forget_stashed(@_) if @_;
524	90					3430	$self->_clear_stash;
525							}
526
527							sub _forget_stashed {
528	3			3		6	my $self = shift;
529	3					7	foreach my $s (@_) {
530	3	100				141	$self->_forget($s) if $self->_has_stashed($s);
531	3	100				109	$self->_del_prestash($s) if $self->_has_prestash($s);
532							}
533							}
534
535							sub stashed {
536	11			11	1	1224	my $self = shift;
537	11	100				392	return $self->_get_vars(@_) if @_;
538	1					37	return $self->_stashed;
539							}
540
541							sub has_stashed {
542	10			10	1	396	my $self = shift;
543	10	100				391	return 1 if $self->_has_stashed(@_);
544	8					276	return $self->_has_prestash(@_);
545							}
546
547							has _hidden_stash => (
548							is => 'ro',
549							isa => 'HashRef[Any]',
550							default => sub { {} },
551							lazy => 1,
552							traits => ['Hash'],
553							handles => {
554							prestash => 'set',
555							_all_preset => 'elements',
556							_has_prestash => 'exists',
557							_has_no_prestash => 'is_empty',
558							_del_prestash => 'delete',
559							}
560							);
561
562
563							has lines_parsed => (
564							is => 'ro',
565							isa => 'Int',
566							lazy => 1,
567							init_arg => undef,
568							default => 0,
569							traits => ['Counter'],
570							handles => {
571							_next_line_parsed => 'inc',
572							_reset_line_count => 'reset',
573							}
574							);
575
576
577							has abort => (
578							is => 'rw',
579							isa => 'Bool',
580							lazy => 1,
581							default => 0,
582							traits => ['Bool'],
583							reader => 'has_aborted',
584							handles => {
585							abort_reading => 'set',
586							_clear_abort => 'unset'
587							},
588							);
589
590
591							has _is_wrapped => (
592							is => 'rw',
593							isa => 'CodeRef\|Undef',
594							default => undef,
595							lazy => 1,
596							);
597
598							has _unwrap_routine => (
599							is => 'rw',
600							isa => 'CodeRef\|Undef',
601							default => undef,
602							lazy => 1,
603							);
604
605							sub custom_line_unwrap_routines {
606	8			8	1	479	my $self = shift;
607	8					34	$self->_prep_for_custom_unwrap_routines;
608	7					28	my ( $is_wr, $un_wr ) = _check_custom_unwrap_args(@_);
609	2					96	$self->_is_wrapped($is_wr);
610	2					56	$self->_unwrap_routine($un_wr);
611							}
612
613							sub _prep_for_custom_unwrap_routines {
614	8			8		11	my $self = shift;
615	8					267	my $s = $self->line_wrap_style();
616	8	100	100			44	parser_exception("Line wrap style already set to $s")
617							if defined $s and 'custom' ne $s;
618	7					220	$self->line_wrap_style('custom');
619							}
620
621							my $unwrap_prefix = "Bad call to custom_line_unwrap_routines: ";
622
623
624
625
626							sub save_record {
627	454			454	1	8655	my ( $self, $record ) = ( shift, shift );
628	454	100				1118	( $self->_has_no_rules )
629							? $self->push_records($record)
630							: $self->_run_through_rules;
631							}
632
633							sub _has_no_rules {
634	454			454		710	my $self = shift;
635	454	100				11816	return 0 if Text::Parser::RuleSpec->class_has_rules( $self->_origclass );
636	388					13450	return $self->_has_no_obj_rules;
637							}
638
639							sub _run_through_rules {
640	265			265		445	my $self = shift;
641	265					6263	my (@crules) = Text::Parser::RuleSpec->class_rules( $self->_origclass );
642	265					8827	foreach my $rule ( @crules, $self->_get_obj_rules ) {
643	630	100				1793	next if not $rule->_test($self);
644	174					619	$rule->_run( $self, 0 );
645	174	100				4866	last if not $rule->continue_to_next;
646							}
647							}
648
649
650							my %IS_LINE_CONTINUED = (
651							default => \&_def_is_line_continued,
652							spice => \&_spice_is_line_contd,
653							trailing_backslash => \&_tbs_is_line_contd,
654							just_next_line => \&_jnl_is_line_contd,
655							slurp => \&_def_is_line_continued,
656							custom => undef,
657							);
658
659							my %JOIN_LAST_LINE = (
660							default => \&_def_join_last_line,
661							spice => \&_spice_join_last_line,
662							trailing_backslash => \&_tbs_join_last_line,
663							just_next_line => \&_jnl_join_last_line,
664							slurp => \&_def_join_last_line,
665							custom => undef,
666							);
667
668							sub is_line_continued {
669	162			162	1	291	my $self = shift;
670	162	100				345	return 0 if not defined $self->multiline_type;
671	156					412	my $routine = $self->_get_is_line_contd_routine;
672	156	100				327	parser_exception("is_wrapped routine not defined")
673							if not defined $routine;
674	155					328	$routine->( $self, @_ );
675							}
676
677							sub _val_of_line_wrap_style {
678	291			291		385	my $self = shift;
679	291	100				7050	defined $self->line_wrap_style ? $self->line_wrap_style : 'default';
680							}
681
682							sub _get_is_line_contd_routine {
683	156			156		232	my $self = shift;
684	156					300	my $val = $self->_val_of_line_wrap_style;
685							( $val ne 'custom' )
686	156	100				712	? $IS_LINE_CONTINUED{$val}
687							: $self->_is_wrapped;
688							}
689
690							sub join_last_line {
691	135			135	1	218	my $self = shift;
692	135					293	my $routine = $self->_get_join_last_line_routine;
693	135	100				282	parser_exception("unwrap_routine not defined")
694							if not defined $routine;
695	134					276	$routine->( $self, @_ );
696							}
697
698							sub _get_join_last_line_routine {
699	135			135		563	my $self = shift;
700	135					246	my $val = $self->_val_of_line_wrap_style;
701							( $val ne 'custom' )
702	135	100				489	? $JOIN_LAST_LINE{$val}
703							: $self->_unwrap_routine;
704							}
705
706							sub _def_is_line_continued {
707	116			116		168	my $self = shift;
708	116	100	100			257	return 0
709							if $self->multiline_type eq 'join_last'
710							and $self->lines_parsed() == 1;
711	110					377	return 1;
712							}
713
714							sub _spice_is_line_contd {
715	7			7		9	my $self = shift;
716	7					158	substr( shift, 0, 1 ) eq '+';
717							}
718
719							sub _tbs_is_line_contd {
720	3			3		6	my $self = shift;
721	3					10	substr( trim(shift), -1, 1 ) eq "\\";
722							}
723
724							sub _jnl_is_line_contd {
725	21			21		31	my $self = shift;
726	21	100				497	return 0 if $self->lines_parsed == 1;
727	20					65	return length( trim(shift) ) > 0;
728							}
729
730							sub _def_join_last_line {
731	107			107		220	my ( $self, $last, $line ) = ( shift, shift, shift );
732	107					378	return $last . $line;
733							}
734
735							sub _spice_join_last_line {
736	3			3		7	my ( $self, $last, $line ) = ( shift, shift, shift );
737	3					8	chomp $last;
738	3					21	$line =~ s/^[+]\s*/ /;
739	3					14	$last . $line;
740							}
741
742							sub _tbs_join_last_line {
743	2			2		5	my ( $self, $last, $line ) = ( shift, shift, shift );
744	2					6	chomp $last;
745	2					10	$last =~ s/\\\s*$//;
746	2					10	rtrim($last) . ' ' . ltrim($line);
747							}
748
749							sub _jnl_join_last_line {
750	18			18		38	my ( $self, $last, $line ) = ( shift, shift, shift );
751	18					44	chomp $last;
752	18					59	return $last . $line;
753							}
754
755
756							__PACKAGE__->meta->make_immutable;
757
758	42			42		235822	no Moose;
	42					131
	42					403
759
760							1;
761
762							__END__
763
764							=pod
765
766							=encoding UTF-8
767
768							=head1 NAME
769
770							Text::Parser - Simplifies text parsing. Easily extensible to parse any text format.
771
772							=head1 VERSION
773
774							version 1.000
775
776							=head1 SYNOPSIS
777
778							The following prints the content of the file (named in the first argument) to C<STDOUT>.
779
780							use Text::Parser;
781
782							my $parser = Text::Parser->new();
783							$parser->read(shift);
784							print $parser->get_records, "\n";
785
786							The above code prints after reading the whole file, which can be slow if you have large fules. This following prints contents immediately.
787
788							my $parser = Text::Parser->new();
789							$parser->add_rule(do => 'print', dont_record => 1);
790							($#ARGV > 0) ? $parser->filename(shift) : $parser->filehandle(\*STDIN);
791							$parser->read(); # Runs the rule for each line of input file
792
793							Also, the third line there allows this program to read from a file name specified on command-line, or C<STDIN>. In effect, this makes this Perl code a good replica of the UNIX C<cat>.
794
795							Here is an example with a simple rule that extracts the first error in the logfile and aborts reading further:
796
797							my $parser = Text::Parser->new();
798							$parser->add_rule(
799							if => '$1 eq "ERROR:"',
800							# $1 is a positional identifier for first 'field' on the line
801							do => '$this->abort_reading; return $_;'
802							# $this is copy of $parser accessible from within the rule
803							# abort_reading() tells parser to stop reading further
804							# Returned values are saved as records. Any data structure can be saved.
805							# $_ contains the full line as string, including any whitespaces
806							);
807
808							# Returns the first line starting with "ERROR:"
809							$parser->read('/path/to/logfile');
810
811							print "Some errors were found:\n" if $parser->get_records();
812
813							See L<this important note\|Text::Parser::Manual::ExtendedAWKSyntax/"Important note about quotes"> about using single quotes instead of double quotes.
814
815							Here is an example that parses a table with field separators indicated by C<\|> character:
816
817							use Data::Dumper 'Dumper';
818							my $table_parser = Text::Parser->new( FS => qr/\s[\|]\s/ );
819							$table_parser->add_rule(
820							if => '$this->NF == 0',
821							dont_record => 1
822							);
823							$table_parser->add_rule(
824							if => '$this->lines_parsed == 1',
825							do => '~columns = [$this->fields()];'
826							);
827							$table_parser->add_rule(
828							if => '$this->lines_parsed > 1',
829							do => 'my %rec = ();
830							foreach my $i (0..$#{~columns}) {
831							my $k = ~columns->[$i];
832							$rec{$k} = $this->field($i);
833							}
834							return \%rec;',
835							);
836							$table_parser->read('table.txt');
837							print Dumper($table_parser->get_records()), "\n";
838
839							In the above example you see the use of a L<stashed variable\|/"METHODS FOR ACCESSING STASHED VARIABLES"> named C<~columns>. Note that the sigil used here is not a Perl sigil, but is converted to native Perl code. In the above case, each record is a hash with fixed number of fields.
840
841							More complex file-formats can be read and contents stored in a data-structure or an object. Here is an example:
842
843							use strict;
844							use warnings;
845
846							package ComplexFormatParser;
847
848							use Text::Parser::RuleSpec; ## provides applies_rule + other sugar, imports Moose
849							extends 'Text::Parser';
850
851							# This rule ignores all comments
852							applies_rule ignore_comments => (
853							if => 'substr($1, 0, 1) eq "#"',
854							dont_record => 1,
855							);
856
857							# An attribute of the parser class.
858							has current_section => (
859							is => 'rw',
860							isa => 'Str',
861							default => undef,
862							);
863
864							applies_rule get_header => (
865							if => '$1 eq "SECTION"',
866							do => '$this->current_section($2);', # $this : this parser object
867							dont_record => 1,
868							);
869
870							# ... More can be done
871
872							package main;
873							use ComplexFormatParser;
874
875							my $p = ComplexFormatParser->new();
876							$p->read('myfile.complex.fmt');
877
878							=head1 RATIONALE
879
880							The L<need\|Text::Parser::Manual/MOTIVATION> for this class stems from the fact that text parsing is the most common thing that programmers do, and yet there is no lean, simple way to do it in Perl. Most programmers still write boilerplate code with a C<while> loop.
881
882							Instead C<Text::Parser> allows programmers to parse text with simple, self-explanatory L<rules\|Text::Parser::Manual::ExtendedAWKSyntax>, whose structure is very similar to L<AWK\|https://books.google.com/books/about/The_AWK_Programming_Language.html?id=53ueQgAACAAJ>, but extends beyond the capability of AWK.
883
884							I<B<Sidenote:>> Incidentally, AWK is L<one of the ancestors of Perl\|http://history.perl.org/PerlTimeline.html>! One would have expected Perl to do way better than AWK. But while you can use Perl to do what AWK already does, that is usually limited to one-liners like C<perl -lane>. Even C<perl -lan script.pl> is not meant for serious projects. And it seems that L<some people still prefer AWK to Perl\|https://aplawrence.com/Unixart/awk-vs.perl.html>. This is not looking good.
885
886							=head1 OVERVIEW
887
888							With C<Text::Parser>, a developer can focus on specifying a grammar and then simply C<read> the file. The C<L<read\|/read>> method automatically runs each rule collecting records from the text input into an internal array. Finally, C<L<get_records\|/get_records>> can retrieve the records.
889
890							Since C<Text::Parser> is a class, a programmer can subclass it to parse very complex file formats. L<Text::Parser::RuleSpec> provides intuitive rule sugar. Use of L<Moose> is encouraged. And data from parsed files can be turned into very complex data-structures or even objects. In this case, you wouldn't need to use C<get_records>.
891
892							With B<L<Text::Parser>> programmers have the elegance and simplicity of AWK combined with the power of Perl at their disposal.
893
894							=head1 CONSTRUCTOR
895
896							=head2 new
897
898							Takes optional attributes as in example below. See section L<ATTRIBUTES\|/ATTRIBUTES> for a list of the attributes and their description.
899
900							my $parser = Text::Parser->new();
901
902							my $parser2 = Text::Parser->new( line_wrap_style => 'trailing_backslash' );
903
904							=head1 ATTRIBUTES
905
906							The attributes below can be used as options to the C<new> constructor. Each attribute has an accessor with the same name.
907
908							=head2 auto_chomp
909
910							Read-write attribute. Takes a boolean value as parameter. Defaults to C<0>.
911
912							print "Parser will chomp lines automatically\n" if $parser->auto_chomp;
913
914							=head2 auto_split
915
916							Read-write boolean attribute. Defaults to C<0> (false). Indicates if the parser will automatically split every line into fields.
917
918							If it is set to a true value, each line will be split into fields, and L<a set of methods\|/"METHODS USED ONLY IN RULES AND SUBCLASSES"> become accessible to C<L<save_record\|/save_record>> or the rules.
919
920							=head2 auto_trim
921
922							Read-write attribute. The values this can take are shown under the C<L<new\|/new>> constructor also. Defaults to C<'n'> (neither side spaces will be trimmed).
923
924							$parser->auto_trim('l'); # 'l' (left), 'r' (right), 'b' (both), 'n' (neither) (Default)
925
926							=head2 custom_line_trimmer
927
928							Read-write attribute which can be set to a custom subroutine that trims each line before applying any rules or saving any records. The function is expected to take a single argument containing the complete un-trimmed line, and is expected to return a manipulated line.
929
930							sub _cust_trimmer {
931							my $line = shift;
932							chomp $line;
933							return $line;
934							}
935
936							$parser->custom_line_trimmer(\&_cust_trimmer);
937
938							B<Note:> If you set this attribute, you are entirely responsible for the trimming. Poorly written routines could causing the C<auto_split> operation to misbehave.
939
940							By default it is undefined.
941
942							=head2 FS
943
944							Read-write attribute that can be used to specify the field separator to be used by the C<auto_split> feature. It must be a regular expression reference enclosed in the C<qr> function, like C<qr/\s+\|[,]/> which will split across either spaces or commas. The default value for this attribute is C<qr/\s+/>.
945
946							The name for this attribute comes from the built-in C<FS> variable in the popular L<GNU Awk program\|https://www.gnu.org/software/gawk/gawk.html>. The ability to use a regular expression is an upgrade from AWK.
947
948							$parser->FS( qr/\s+$\|\s$/ );
949
950							C<FS> I<can> be changed from within a rule. Changes made even within a rule would take effect on the immediately next line read.
951
952							=head2 indentation_str
953
954							This can be used to set the indentation character or string. By default it is a single space C< >. But you may want to set it to be a tab (C<\t>) or perhaps some other character like a hyphen (C<->) or even a string (C< -E<gt>>). This attribute is used only if C<L<track_indentation\|/track_indentation>> is set.
955
956							=head2 line_wrap_style
957
958							Read-write attribute used as a quick way to select from commonly known line-wrapping styles. If the target text format allows line-wrapping this attribute allows the programmer to write rules as if they were on a single line.
959
960							$parser->line_wrap_style('trailing_backslash');
961
962							Allowed values are:
963
964							trailing_backslash - very common style ending lines with \
965							and continuing on the next line
966
967							spice - used for SPICE syntax, where the (+)
968							+ symbol continues content of last line
969
970							just_next_line - used in simple text files written to be
971							humanly-readable. New paragraphs start
972							on a new line after a blank line.
973
974							slurp - used to "slurp" the whole file into
975							a single line.
976
977							custom - user-defined style. User must specify
978							value of multiline_type and define
979							two custom unwrap routines using the
980							custom_line_unwrap_routines method
981							when custom is chosen.
982
983							When C<line_wrap_style> is set to one of these values, the value of C<multiline_type> is automatically set to an appropriate value. Read more about L<handling the common line-wrapping styles\|/"Common line-wrapping styles">.
984
985							=head2 multiline_type
986
987							Read-write attribute used mainly if the programmer wishes to specify custom line-unwrapping methods. By default, this attribute is C<undef>, i.e., the target text format will not have wrapped lines.
988
989							$parser->line_wrap_style('custom');
990							$parser->multiline_type('join_next');
991
992							my $mult = $parser->multiline_type;
993							print "Parser is a multi-line parser of type: $mult" if defined $mult;
994
995							Allowed values for C<multiline_type> are described below, but it can also be set back to C<undef>.
996
997							=over 4
998
999							=item *
1000
1001							If the target format allows line-wrapping I<to the B<next>> line, set C<multiline_type> to C<join_next>.
1002
1003							=item *
1004
1005							If the target format allows line-wrapping I<from the B<last>> line, set C<multiline_type> to C<join_last>.
1006
1007							=back
1008
1009							To know more about how to use this, read about L<specifying custom line-unwrap routines\|/"Specifying custom line-unwrap routines">.
1010
1011							=head2 track_indentation
1012
1013							This boolean attribute enables tracking of the number of indentation characters are there at the beginning of each line. In some text formats, this is a very important information that can indicate the depth of some data. By default, this is false. When set to a true value, you can get the number of indentation characters on a given line with the C<L<this_indent\|/this_indent>> method.
1014
1015							$parser->track_indentation(1);
1016
1017							Now you can use C<this_indent> method in the rules:
1018
1019							$parser->add_rule(if => '$this->this_indent > 0', do => '~num_indented ++;')
1020
1021							=head1 METHODS FOR SPECIFYING RULES
1022
1023							These are meant to be called from the C<::main> program or within subclasses.
1024
1025							=head2 add_rule
1026
1027							Takes a hash as input. The keys of this hash must be the attributes of the L<Text::Parser::Rule> class constructor and the values should also meet the requirements of that constructor.
1028
1029							$parser->add_rule(do => '', dont_record => 1); # Empty rule: does nothing
1030							$parser->add_rule(if => 'm/li/, do => 'print', dont_record); # Prints lines with 'li'
1031							$parser->add_rule( do => 'uc($3)' ); # Saves records of upper-cased third elements
1032
1033							Calling this method without any arguments will throw an exception. The method internally sets the C<auto_split> attribute.
1034
1035							=head2 clear_rules
1036
1037							Takes no arguments, returns nothing. Clears the rules that were added to the object.
1038
1039							$parser->clear_rules;
1040
1041							This is useful to be able to re-use the parser after a C<read> call, to parse another text with another set of rules. The C<clear_rules> method does clear even the rules set up by C<L<BEGIN_rule\|/BEGIN_rule>> and C<L<END_rule\|/END_rule>>.
1042
1043							=head2 BEGIN_rule
1044
1045							Takes a hash input like C<add_rule>, but C<if> and C<continue_to_next> keys will be ignored.
1046
1047							$parser->BEGIN_rule(do => '~count = 0;');
1048
1049							=over 4
1050
1051							=item *
1052
1053							Since any C<if> key is ignored, the C<do> key is required. Multiple calls to C<BEGIN_rule> will append to the previous calls; meaning, the actions of previous calls will be included.
1054
1055							=item *
1056
1057							The C<BEGIN_rule> is mainly used to initialize some variables.
1058
1059							=item *
1060
1061							By default C<dont_record> is set true. User I<can> change this and set C<dont_record> as false, thus forcing a record to be saved even before reading the first line of text.
1062
1063							=back
1064
1065							=head2 END_rule
1066
1067							Takes a hash input like C<add_rule>, but C<if> and C<continue_to_next> keys will be ignored. Similar to C<BEGIN_rule>, but the actions in the C<END_rule> will be executed at the end of the C<read> method.
1068
1069							$parser->END_rule(do => 'print ~count, "\n";');
1070
1071							=over 4
1072
1073							=item *
1074
1075							Since any C<if> key is ignored, the C<do> key is required. Multiple calls to C<END_rule> will append to the previous calls; meaning, the actions of previous calls will be included.
1076
1077							=item *
1078
1079							The C<END_rule> is mainly used to do final processing of collected records.
1080
1081							=item *
1082
1083							By default C<dont_record> is set true. User I<can> change this and set C<dont_record> as false, thus forcing a record to be saved after the end rule is processed.
1084
1085							=back
1086
1087							=head1 METHODS USED ONLY IN RULES AND SUBCLASSES
1088
1089							These methods can be used only inside rules, or methods of a subclass. Some of these methods are available only when C<auto_split> is on. They are listed as follows:
1090
1091							=over 4
1092
1093							=item *
1094
1095							L<NF\|Text::Parser::AutoSplit/NF> - number of fields on this line
1096
1097							=item *
1098
1099							L<fields\|Text::Parser::AutoSplit/fields> - all the fields as an array of strings ; trailing C<\n> removed
1100
1101							=item *
1102
1103							L<field\|Text::Parser::AutoSplit/field> - access individual elements of the array above ; negative arguments count from back
1104
1105							=item *
1106
1107							L<field_range\|Text::Parser::AutoSplit/field_range> - array of fields in the given range of indices ; negative arguments allowed
1108
1109							=item *
1110
1111							L<join_range\|Text::Parser::AutoSplit/join_range> - join the fields in the range of indices ; negative arguments allowed
1112
1113							=item *
1114
1115							L<find_field\|Text::Parser::AutoSplit/find_field> - returns field for which a given subroutine is true ; each field is passed to the subroutine in C<$_>
1116
1117							=item *
1118
1119							L<find_field_index\|Text::Parser::AutoSplit/find_field_index> - similar to above, except it returns the index of the field instead of the field itself
1120
1121							=item *
1122
1123							L<splice_fields\|Text::Parser::AutoSplit/splice_fields> - like the native Perl C<splice>
1124
1125							=back
1126
1127							Other methods described below are also to be used only inside a rule, or inside methods called by the rules.
1128
1129							=head2 abort_reading
1130
1131							Takes no arguments. Returns C<1>. Aborts C<read>ing any more lines, and C<read> method exits gracefully as if nothing unusual happened.
1132
1133							$parser->add_rule(
1134							do => '$this->abort_reading;',
1135							if => '$1 eq "EOF"',
1136							dont_record => 1,
1137							);
1138
1139							=head2 this_indent
1140
1141							Takes no arguments, and returns the number of indentation characters found at the front of the current line. This can be called from within a rule:
1142
1143							$parser->add_rule( if => '$this->this_indent > 0', );
1144
1145							=head2 this_line
1146
1147							Takes no arguments, and returns the current line being parsed. For example:
1148
1149							$parser->add_rule(
1150							if => 'length($this->this_line) > 256',
1151							);
1152							## Saves all lines longer than 256 characters
1153
1154							Inside rules, instead of using this method, one may also use C<$_>:
1155
1156							$parser->add_rule(
1157							if => 'length($_) > 256',
1158							);
1159
1160							=head1 METHODS FOR READING INPUT
1161
1162							=head2 filename
1163
1164							Takes an optional string argument containing the name of a file. Returns the name of the file that was last opened if any. Returns C<undef> if no file has been opened.
1165
1166							print "Last read ", $parser->filename, "\n";
1167
1168							The value stored is "persistent" - meaning that the method remembers the last file that was C<L<read\|/read>>.
1169
1170							$parser->read(shift @ARGV);
1171							print $parser->filename(), ":\n",
1172							"=" x (length($parser->filename())+1),
1173							"\n",
1174							$parser->get_records(),
1175							"\n";
1176
1177							A C<read> call with a filehandle, will clear the last file name.
1178
1179							$parser->read(\*MYFH);
1180							print "Last file name is lost\n" if not defined $parser->filename();
1181
1182							=head2 filehandle
1183
1184							Takes an optional argument, that is a filehandle C<GLOB> (such as C<\*STDIN>) or an object of the C<FileHandle> class. Returns the filehandle last saved, or C<undef> if none was saved.
1185
1186							my $fh = $parser->filehandle();
1187
1188							Like C<L<filename\|/filename>>, C<filehandle> is also "persistent". Its old value is lost when either C<filename> is set, or C<read> is called with a filename.
1189
1190							$parser->read(\*STDOUT);
1191							my $lastfh = $parser->filehandle(); # Will return glob of STDOUT
1192
1193							=head2 read
1194
1195							Takes a single optional argument that can be either a string containing the name of the file, or a filehandle reference (a C<GLOB>) like C<\*STDIN> or an object of the C<L<FileHandle>> class.
1196
1197							$parser->read($filename); # Read the file
1198							$parser->read(\*STDIN); # Read the filehandle
1199
1200							The above could also be done in two steps if the developer so chooses.
1201
1202							$parser->filename($filename);
1203							$parser->read(); # equiv: $parser->read($filename)
1204
1205							$parser->filehandle(\*STDIN);
1206							$parser->read(); # equiv: $parser->read(\*STDIN)
1207
1208							The method returns once all records have been read, or if an exception is thrown, or if reading has been aborted with the C<L<abort_reading\|/abort_reading>> method.
1209
1210							Any C<close> operation will be handled (even if any exception is thrown), as long as C<read> is called with a file name parameter - not if you call with a file handle or C<GLOB> parameter.
1211
1212							$parser->read('myfile.txt'); # Will close file automatically
1213
1214							open MYFH, "<myfile.txt" or die "Can't open file myfile.txt at ";
1215							$parser->read(\*MYFH); # Will not close MYFH
1216							close MYFH;
1217
1218							=head1 METHODS FOR HANDLING RECORDS
1219
1220							=head2 get_records
1221
1222							Takes no arguments. Returns an array containing all the records saved by the parser.
1223
1224							foreach my $record ( $parser->get_records ) {
1225							$i++;
1226							print "Record: $i: ", $record, "\n";
1227							}
1228
1229							=head2 last_record
1230
1231							Takes no arguments and returns the last saved record. Leaves the saved records untouched.
1232
1233							my $last_rec = $parser->last_record;
1234
1235							=head2 pop_record
1236
1237							Takes no arguments and pops the last saved record.
1238
1239							my $last_rec = $parser->pop_record;
1240
1241							=head2 push_records
1242
1243							Takes an array as input, and stores each element as a separate record. Returns the number of elements in the new array.
1244
1245							$parser->push_records(qw(insert these as separate records));
1246
1247							=head1 METHODS FOR ACCESSING STASHED VARIABLES
1248
1249							Stashed variables can be data structures or simple scalar variables stored as elements in the parser object. Hence they are accessible across different rules. Stashed variables start with a tilde (~). So you could set up rules like these:
1250
1251							$parser->BEGIN_rule( do => '~count=0;' );
1252							$parser->add_rule( if => '$1 eq "SECTION"', do => '~count++;' );
1253
1254							In the above rule C<~count> is a stashed variable. Internally this is just a hash element with key named C<count>. After the C<read> call is over, this variable can be accessed.
1255
1256							$parser->read('some_text_file.txt');
1257							print "Found ", $parser->stashed('count'), " sections in file.\n";
1258
1259							Stashed variables that are created entirely within the rules are forgotten at the beginning of the next C<read> call. This means, you can C<read> another text file and don't have to bother to clear out the stashed variable C<~count>.
1260
1261							$parser->read('another_text_file.txt');
1262							print "Found ", $parser->stashed('count'), " sections in file.\n";
1263
1264							In contrast, stashed variables created by calling C<prestash> continue to persist for subsequent calls of C<read>, unless an explicit call to C<forget> names these pre-stashed variables.
1265
1266							$parser->prestash( max_err => 100 );
1267							$parser->BEGIN_rule( do => '~err_count = 0;' );
1268							$parser->add_rule(
1269							if => '$1 eq "ERROR:" && ~err_count < ~max_err',
1270							do => '~err_count++;',
1271							continue_to_next => 1,
1272							);
1273							$parser->add_rule(
1274							if => '$1 eq "ERROR:" && ~err_count == ~max_err',
1275							do => '$this->abort_reading;',
1276							);
1277							$parser->read('first.log');
1278							print "Top 100 errors:\n", $parser->get_records, "\n";
1279
1280							$parser->read('another.log'); # max_err is still set to 100, but err_count is forgotten and reset to 0 by the BEGIN_rule
1281							print "Top 100 errors:\n", $parser->get_records, "\n";
1282
1283							=head2 forget
1284
1285							Takes an optional list of string arguments which must be the names of stashed variables. This method forgets those stashed variables for ever. So be sure you really intend to do this. In list context, this method returns the values of the variables whose names were passed to the method. In scalar context, it returns the last value of the last stashed variable passed.
1286
1287							my $pop_and_forget_me = $parser->forget('forget_me_totally', 'pop_and_forget_me');
1288
1289							Inside rules, you could simply C<delete> the stashed variable like this:
1290
1291							$parser->add_rule( do => 'delete ~forget_me;' );
1292
1293							The above C<delete> statement works because the stashed variable C<~forget_me> is just a hash key named C<forget_me> internally. Using this on pre-stashed variables, will only temporarily delete the variable. It will be present in subsequent calls to C<read>. If you want to delete it completely call C<forget> with the pre-stashed variable name as an argument.
1294
1295							When no arguments are passed, it clears all stashed variables (not pre-stashed).
1296
1297							$parser->forget;
1298
1299							Note that when C<forget> is called with no arguments, pre-stashed variables are not deleted and are still accessible in subsequent calls to C<read>. To forget a pre-stashed variable, it needs to be explicitly named in a call to forget. Then it is forgotten.
1300
1301							A call to C<forget> method is done without any arguments, right before C<read> starts reading a new text input. That is how we can reset the values of stashed variables, but still retain pre-stashed variables.
1302
1303							=head2 has_empty_stash
1304
1305							Takes no arguments and returns a true value if the stash of variables is empty (i.e., no stashed variables are present). If not, it returns a boolean false.
1306
1307							if ( not $parser->has_empty_stash ) {
1308							my $myvar = $parser->stashed('myvar');
1309							print "myvar = $myvar\n";
1310							}
1311
1312							=head2 has_stashed
1313
1314							Takes a single string argument and returns a boolean indicating if there is a stashed variable with that name or not:
1315
1316							if ( $parser->has_stashed('stashed_var') ) {
1317							print "Here is what stashed_var contains: ", $parser->stashed('stashed_var');
1318							}
1319
1320							Inside rules you could check this with the C<exists> keyword:
1321
1322							$parser->add_rule( if => 'exists ~stashed_var' );
1323
1324							=head2 prestash
1325
1326							Takes an even number of arguments, or a hash, with variable name and value as pairs. This is useful to preset some stash variables before C<read> is called so that the rules have some variables accessible inside them. The main difference between pre-stashed variables created via C<prestash> and those created in the rules or using C<stashed> is that the pre-stashed ones are static.
1327
1328							$parser->prestash(pattern => 'string');
1329							$parser->add_rule( if => 'my $patt = ~pattern; m/$patt/;' );
1330
1331							You may change the value of a C<prestash>ed variable inside any of the rules.
1332
1333							=head2 stashed
1334
1335							Takes an optional list of string arguments each with the name of a stashed variable you want to query, i.e., get the value of. In list context, it returns their values in the same order as the queried variables, and in scalar context it returns the value of the last variable queried.
1336
1337							my (%var_vals) = $parser->stashed;
1338							my (@vars) = $parser->stashed( qw(first second third) );
1339							my $third = $parser->stashed( qw(first second third) ); # returns value of last variable listed
1340							my $myvar = $parser->stashed('myvar');
1341
1342							Or you could do this:
1343
1344							use Data::Dumper 'Dumper';
1345
1346							if ( $parser->has_empty_stash ) {
1347							print "Nothing on my stash\n";
1348							} else {
1349							my %stash = $parser->stashed;
1350							print Dumper(\%stash), "\n";
1351							}
1352
1353							=head1 MISCELLANEOUS METHODS
1354
1355							=head2 lines_parsed
1356
1357							Takes no arguments. Returns the number of lines last parsed. Every call to C<read>, causes the value to be auto-reset.
1358
1359							print $parser->lines_parsed, " lines were parsed\n";
1360
1361							=head2 has_aborted
1362
1363							Takes no arguments, returns a boolean to indicate if text reading was aborted in the middle.
1364
1365							print "Aborted\n" if $parser->has_aborted();
1366
1367							=head2 custom_line_unwrap_routines
1368
1369							This method should be used only when the line-wrapping supported by the text format is not already among the L<known line-wrapping styles supported\|/"Common line-wrapping styles">.
1370
1371							Takes a hash argument with required keys C<is_wrapped> and C<unwrap_routine>. Used in setting up L<custom line-unwrapping routines\|/"Specifying custom line-unwrap routines">.
1372
1373							Here is an example of setting custom line-unwrapping routines:
1374
1375							$parser->multiline_type('join_last');
1376							$parser->custom_line_unwrap_routines(
1377							is_wrapped => sub { # A method that detects if this line is wrapped or not
1378							my ($self, $this_line) = @_;
1379							$this_line =~ /^[~]/;
1380							},
1381							unwrap_routine => sub { # A method to unwrap the line by joining it with the last line
1382							my ($self, $last_line, $this_line) = @_;
1383							chomp $last_line;
1384							$last_line =~ s/\s*$//g;
1385							$this_line =~ s/^[~]\s*//g;
1386							"$last_line $this_line";
1387							},
1388							);
1389
1390							Now you can parse a file with the following content:
1391
1392							This is a long line that is wrapped around with a custom
1393							~ character - the tilde. It is unusual, but hey, we're
1394							~ showing an example.
1395
1396							When C<$parser> gets to C<read> this, these three lines get unwrapped and processed by the rules, as if it were a single line.
1397
1398							L<Text::Parser::Multiline> shows another example with C<join_next> type.
1399
1400							=head1 METHODS THAT MAY BE OVERRIDDEN IN SUBCLASSES
1401
1402							The following methods should never be called in the C<::main> program. They may be overridden (or re-defined) in a subclass.
1403
1404							Starting version 0.925, users should never need to override any of these methods to make their own parser.
1405
1406							=head2 save_record
1407
1408							The default implementation takes a single argument, runs any rules, and saves the returned value as a record in an internal array. If nothing is returned from the rule, C<undef> is stored as a record.
1409
1410							B<Note>: Starting C<0.925> version of C<Text::Parser> it is not required to override this method in your derived class. In most cases, you should use the rules.
1411
1412							B<Importnant Note:> Starting version C<1.0> of C<Text::Parser> this method will be deprecated to improve performance. So avoid inheriting this method.
1413
1414							=head2 is_line_continued
1415
1416							The default implementation of this routine:
1417
1418							multiline_type \| Return value
1419							------------------+---------------------------------
1420							undef \| 0
1421							join_last \| 0 for first line, 1 otherwise
1422							join_next \| 1
1423
1424							In earlier versions of L<Text::Parser> you had no way but to subclass L<Text::Parser> to change the routine that detects if a line is wrapped. Now you can instead select from a list of known C<line_wrap_style>s, or even set custom methods for this.
1425
1426							=head2 join_last_line
1427
1428							The default implementation of this routine takes two string arguments, joins them without any C<chomp> or any other operation, and returns that result.
1429
1430							In earlier versions of L<Text::Parser> you had no way but to subclass L<Text::Parser> to select a line-unwrapping routine. Now you can instead select from a list of known C<line_wrap_style>s, or even set custom methods for this.
1431
1432							=head1 THINGS TO DO FURTHER
1433
1434							Future versions are expected to include features to:
1435
1436							=over 4
1437
1438							=item *
1439
1440							read and parse from a buffer
1441
1442							=item *
1443
1444							automatically uncompress input
1445
1446							=item *
1447
1448							I<suggestions welcome ...>
1449
1450							=back
1451
1452							Contributions and suggestions are welcome and properly acknowledged.
1453
1454							=head1 HANDLING LINE-WRAPPING
1455
1456							Different text formats sometimes allow line-wrapping to make their content more human-readable. Handling this can be rather complicated if you use native Perl, but extremely easy with L<Text::Parser>.
1457
1458							=head2 Common line-wrapping styles
1459
1460							L<Text::Parser> supports a range of commonly-used line-unwrapping routines which can be selected using the C<L<line_wrap_style\|Text::Parser/"line_wrap_style">> attribute. The attribute automatically sets up the parser to handle line-unwrapping for that specific text format.
1461
1462							$parser->line_wrap_style('trailing_backslash');
1463							# Now when read runs the rules, all the back-slash
1464							# line-wrapped lines are auto-unwrapped to a single
1465							# line, and rules are applied on that single line
1466
1467							When C<read> reads each line of text, it looks for any trailing backslash and unwraps the line. The next line may have a trailing back-slash too, and that too is unwrapped. Once the fully-unwrapped line has been identified, the rules are run on that unwrapped line, as if the file had no line-wrapping at all. So say the content of a line is like this:
1468
1469							This is a long line wrapped into multiple lines \
1470							with a back-slash character. This is a very common \
1471							way to wrap long lines. In general, line-wrapping \
1472							can be much easier on the reader's eyes.
1473
1474							When C<read> runs any rules in C<$parser>, the text above appears as a single line to the rules.
1475
1476							=head2 Specifying custom line-unwrap routines
1477
1478							I have included the common types of line-wrapping styles known to me. But obviously there can be more. To specify a custom line-unwrapping style follow these steps:
1479
1480							=over 4
1481
1482							=item *
1483
1484							Set the C<L<multiline_type\|/"multiline_type">> attribute appropriately. If you do not set this, your custom unwrapping routines won't have any effect.
1485
1486							=item *
1487
1488							Call C<L<custom_line_unwrap_routines\|/"custom_line_unwrap_routines">> method. If you forget to call this method, or if you don't provide appropriate arguments, then an exception is thrown.
1489
1490							=back
1491
1492							L<Here\|/"custom_line_unwrap_routines"> is an example with C<join_last> value for C<multiline_type>. And L<here\|Text::Parser::Multiline/"SYNOPSIS"> is an example using C<join_next>. You'll notice that in both examples, you need to specify both routines. In fact, if you don't
1493
1494							=head2 Line-unwrapping in a subclass
1495
1496							You may subclass C<Text::Paser> to parse your specific text format. And that format may support some line-wrapping. To handle the known common line-wrapping styles, set a default value for C<line_wrap_style>. For example:
1497
1498							=over 4
1499
1500							=item *
1501
1502							Set a default value for C<line_wrap_style>. For example, the following uses one of the supported common line-unwrap methods. has '+line_wrap_style' => ( default => 'spice', );
1503
1504							=back
1505
1506							* Setup custom line-unwrap routines with C<unwraps_lines> from L<Text::Parser::RuleSpec>.
1507
1508							use Text::Parser::RuleSpec;
1509							extends 'Text::Parser';
1510
1511							has '+line_wrap_style' => ( default => 'slurp', is => 'ro');
1512							has '+multiline_type' => ( is => 'ro' );
1513
1514							Of course, you don't I<have> to make them read-only.
1515
1516							To setup custom line-unwrapping routines in a subclass, you can use the C<L<unwraps_lines_using\|Text::Parser::RuleSpec/"unwraps_lines_using">> syntax sugar from L<Text::Parser::RuleSpec>. For example:
1517
1518							package MyParser;
1519
1520							use Text::Parser::RuleSpec;
1521							extends 'Text::Parser';
1522
1523							has '+multiline_type' => (
1524							default => 'join_next',
1525							is => 'ro',
1526							);
1527
1528							unwraps_lines_using(
1529							is_wrapped => \&_my_is_wrapped_routine,
1530							unwrap_routine => \&_my_unwrap_routine,
1531							);
1532
1533							=head1 SEE ALSO
1534
1535							=over 4
1536
1537							=item *
1538
1539							L<Text::Parser::Manual> - Read this manual to learn how to do cool things with this class
1540
1541							=item *
1542
1543							L<Text::Parser::Error> - there is a change in how exceptions are thrown by this class. Read this page for more information.
1544
1545							=item *
1546
1547							L<The AWK Programming Language\|https://books.google.com/books/about/The_AWK_Programming_Language.html?id=53ueQgAACAAJ> - by B<A>ho, B<W>einberg, and B<K>ernighan.
1548
1549							=item *
1550
1551							L<Text::Parser::Multiline> - how to read line-wrapped text input
1552
1553							=back
1554
1555							=head1 BUGS
1556
1557							Please report any bugs or feature requests on the bugtracker website
1558							L<http://github.com/balajirama/Text-Parser/issues>
1559
1560							When submitting a bug or request, please include a test-file or a
1561							patch to an existing test-file that illustrates the bug or desired
1562							feature.
1563
1564							=head1 AUTHOR
1565
1566							Balaji Ramasubramanian <balajiram@cpan.org>
1567
1568							=head1 COPYRIGHT AND LICENSE
1569
1570							This software is copyright (c) 2018-2019 by Balaji Ramasubramanian.
1571
1572							This is free software; you can redistribute it and/or modify it under
1573							the same terms as the Perl 5 programming language system itself.
1574
1575							=head1 CONTRIBUTORS
1576
1577							=for stopwords H.Merijn Brand - Tux Mohammad S Anwar
1578
1579							=over 4
1580
1581							=item *
1582
1583							H.Merijn Brand - Tux <h.m.brand@xs4all.nl>
1584
1585							=item *
1586
1587							Mohammad S Anwar <mohammad.anwar@yahoo.com>
1588
1589							=back
1590
1591							=cut
1592