File Coverage

blib/lib/Text/Parser.pm

Criterion	Covered	Total	%
statement	174	174	100.0
branch	75	76	98.6
condition	20	21	95.2
subroutine	46	46	100.0
pod	10	11	90.9
total	325	328	99.0

line	stmt	bran	cond	sub	pod	time	code
1	30			30		1781568	use warnings;
	30					147
	30					920
2	30			30		140	use strict;
	30					50
	30					609
3	30			30		131	use feature ':5.14';
	30					51
	30					4545
4
5							package Text::Parser 0.927;
6
7							# ABSTRACT: Simplifies text parsing. Easily extensible to parse any text format.
8
9
10	30			30		10551	use Moose;
	30					9369710
	30					204
11	30			30		218376	use MooseX::CoverableModifiers;
	30					149600
	30					175
12	30			30		15224	use MooseX::StrictConstructor;
	30					824731
	30					118
13	30			30		271049	use namespace::autoclean;
	30					66
	30					150
14	30			30		2605	use Moose::Util 'apply_all_roles', 'ensure_all_roles';
	30					71
	30					248
15	30			30		10091	use Moose::Util::TypeConstraints;
	30					64
	30					237
16	30			30		79853	use String::Util qw(trim ltrim rtrim eqq);
	30					90269
	30					2342
17	30			30		12037	use Text::Parser::Errors;
	30					120
	30					4507
18	30			30		15837	use Text::Parser::Rule;
	30					125
	30					2808
19
20							enum 'Text::Parser::Types::MultilineType' => [qw(join_next join_last)];
21							enum 'Text::Parser::Types::TrimType' => [qw(l r b n)];
22
23	30			30		275	no Moose::Util::TypeConstraints;
	30					67
	30					364
24	30			30		26613	use FileHandle;
	30					249646
	30					162
25	30			30		9373	use Try::Tiny;
	30					74
	30					8804
26
27
28							sub BUILD {
29	47			47	0	110114	my $self = shift;
30	47	100				2180	ensure_all_roles $self, 'Text::Parser::AutoSplit' if $self->auto_split;
31	47	100				6175	return if not defined $self->multiline_type;
32	11					56	ensure_all_roles $self, 'Text::Parser::Multiline';
33							}
34
35
36							has auto_chomp => (
37							is => 'rw',
38							isa => 'Bool',
39							lazy => 1,
40							default => 0,
41							);
42
43
44							has auto_split => (
45							is => 'rw',
46							isa => 'Bool',
47							lazy => 1,
48							default => 0,
49							trigger => \&__newval_auto_split,
50							);
51
52							sub __newval_auto_split {
53	81			81		37092	my ( $self, $newval, $oldval ) = ( shift, shift, shift );
54	81	100				357	ensure_all_roles $self, 'Text::Parser::AutoSplit' if $newval;
55	81	100	100			232973	$self->_clear_all_fields if not $newval and $oldval;
56							}
57
58
59							has auto_trim => (
60							is => 'rw',
61							isa => 'Text::Parser::Types::TrimType',
62							lazy => 1,
63							default => 'n',
64							);
65
66
67							has FS => (
68							is => 'rw',
69							isa => 'RegexpRef',
70							lazy => 1,
71							default => sub {qr/\s+/},
72							);
73
74
75							has multiline_type => (
76							is => 'rw',
77							isa => 'Text::Parser::Types::MultilineType\|Undef',
78							lazy => 1,
79							default => undef,
80							);
81
82							around multiline_type => sub {
83	1209			1209		62969	my ( $orig, $self ) = ( shift, shift );
84	1209					30242	my $oldval = $orig->($self);
85	1209	100	100			7122	return $oldval if not @_ or eqq( $_[0], $oldval );
86	8					115	return __newval_multi_line( $orig, $self, @_ );
87							};
88
89							sub __newval_multi_line {
90	8			8		256	my ( $orig, $self, $newval ) = ( shift, shift, shift );
91	8	100				64	ensure_all_roles( $self, 'Text::Parser::Multiline' )
92							if defined $newval;
93	8					32666	return $orig->( $self, $newval );
94							}
95
96
97							has _obj_rules => (
98							is => 'rw',
99							isa => 'ArrayRef[Text::Parser::Rule]',
100							lazy => 1,
101							default => sub { [] },
102							traits => ['Array'],
103							handles => {
104							_push_rule => 'push',
105							_has_no_rules => 'is_empty',
106							_get_rules => 'elements',
107							},
108							);
109
110							sub add_rule {
111	18			18	1	118	my $self = shift;
112	18	100				406	$self->auto_split(1) if not $self->auto_split;
113	18					537	my $rule = Text::Parser::Rule->new(@_);
114	18					549	$self->_push_rule($rule);
115							}
116
117
118							sub clear_rules {
119	2			2	1	5	my $self = shift;
120	2					64	$self->_obj_rules( [] );
121	2					59	$self->_clear_begin_rule;
122	2					59	$self->_clear_end_rule;
123							}
124
125
126							has _begin_rule => (
127							is => 'rw',
128							isa => 'Text::Parser::Rule',
129							predicate => '_has_begin_rule',
130							clearer => '_clear_begin_rule',
131							);
132
133							sub BEGIN_rule {
134	4			4	1	22	my $self = shift;
135	4	100				101	$self->auto_split(1) if not $self->auto_split;
136	4					14	my (%opt) = _defaults_for_begin_end(@_);
137	4					19	$self->_modify_rule( '_begin_rule', %opt );
138							}
139
140							sub _defaults_for_begin_end {
141	7			7		24	my (%opt) = @_;
142	7	100				24	$opt{dont_record} = 1 if not exists $opt{dont_record};
143	7	100				20	delete $opt{if} if exists $opt{if};
144	7	100				18	delete $opt{continue_to_next} if exists $opt{continue_to_next};
145	7					29	return (%opt);
146							}
147
148							sub _modify_rule {
149	7			7		21	my ( $self, $func, %opt ) = @_;
150	7					19	my $pred = '_has' . $func;
151	7	100				201	$self->_append_rule_lines( $func, \%opt ) if $self->$pred();
152	7					175	my $rule = Text::Parser::Rule->new(%opt);
153	7					163	$self->$func($rule);
154							}
155
156							sub _append_rule_lines {
157	3			3		9	my ( $self, $func, $opt ) = ( shift, shift, shift );
158	3					72	my $old = $self->$func();
159	3					64	$opt->{do} = $old->action . $opt->{do};
160							}
161
162
163							has _end_rule => (
164							is => 'rw',
165							isa => 'Text::Parser::Rule',
166							predicate => '_has_end_rule',
167							clearer => '_clear_end_rule',
168							);
169
170							sub END_rule {
171	3			3	1	21	my $self = shift;
172	3	100				68	$self->auto_split(1) if not $self->auto_split;
173	3					14	my (%opt) = _defaults_for_begin_end(@_);
174	3					14	$self->_modify_rule( '_end_rule', %opt );
175							}
176
177
178							sub read {
179	74			74	1	65983	my $self = shift;
180	74	100				379	return if not defined $self->_handle_read_inp(@_);
181	68					446	$self->_run_begin_end_block('_begin_rule');
182	68					336	$self->__read_and_close_filehandle;
183	59					299	$self->_run_begin_end_block('_end_rule');
184	59					1487	$self->_ExAWK_symbol_table( {} );
185							}
186
187							sub _handle_read_inp {
188	74			74		153	my $self = shift;
189	74	100				282	return $self->filehandle if not @_;
190	72	100	100			858	return if not ref( $_[0] ) and not $_[0];
191	71	100				2143	return $self->filename(@_) if not ref( $_[0] );
192	6					20	return $self->filehandle(@_);
193							}
194
195							has _ExAWK_symbol_table => (
196							is => 'rw',
197							isa => 'HashRef[Any]',
198							default => sub { {} },
199							lazy => 1,
200							);
201
202							sub _run_begin_end_block {
203	127			127		345	my ( $self, $func ) = ( shift, shift );
204	127					324	my $pred = '_has' . $func;
205	127	100				3999	return if not $self->$pred();
206	4					87	my $rule = $self->$func();
207	4					19	$rule->_run( $self, 0 );
208							}
209
210							sub __read_and_close_filehandle {
211	68			68		134	my $self = shift;
212	68					369	$self->_prep_to_read_file;
213	68					406	$self->__read_file_handle;
214	59	100				2053	$self->_close_filehandles if $self->_has_filename;
215	59					1058	$self->_clear_this_line;
216							}
217
218							sub _prep_to_read_file {
219	68			68		202	my $self = shift;
220	68					2219	$self->_reset_line_count;
221	68					2369	$self->_empty_records;
222	68					2160	$self->_clear_abort;
223							}
224
225							sub __read_file_handle {
226	68			68		816	my $self = shift;
227	68					507	my $fh = $self->filehandle();
228	68					1310	while (<$fh>) {
229	517	100				1336	last if not $self->__parse_line($_);
230							}
231							}
232
233							sub __parse_line {
234	517			517		1610	my ( $self, $line ) = ( shift, shift );
235	517					16682	$self->_next_line_parsed();
236	517					1631	$line = $self->_def_line_manip($line);
237	517					6590	$self->_set_this_line($line);
238	517					2424	$self->save_record($line);
239	511					13044	return not $self->has_aborted;
240							}
241
242							sub _def_line_manip {
243	517			517		930	my ( $self, $line ) = ( shift, shift );
244	517	100				9898	chomp $line if $self->auto_chomp;
245	517					1280	return $self->_trim_line($line);
246							}
247
248							sub _trim_line {
249	517			517		1588	my ( $self, $line ) = ( shift, shift );
250	517	100				9745	return $line if $self->auto_trim eq 'n';
251	69	100				521	return trim($line) if $self->auto_trim eq 'b';
252	8	100				148	return ltrim($line) if $self->auto_trim eq 'l';
253	4					11	return rtrim($line);
254							}
255
256
257							has filename => (
258							is => 'rw',
259							isa => 'Str\|Undef',
260							lazy => 1,
261							init_arg => undef,
262							default => undef,
263							predicate => '_has_filename',
264							clearer => '_clear_filename',
265							trigger => \&_set_filehandle,
266							);
267
268							sub _set_filehandle {
269	71			71		19560	my $self = shift;
270	71	100				2583	return $self->_clear_filename if not defined $self->filename;
271	70					352	$self->_save_filehandle( $self->__get_valid_fh );
272							}
273
274							sub __get_valid_fh {
275	70			70		155	my $self = shift;
276	70					313	my $fname = $self->_get_valid_text_filename;
277	70	100				910	return FileHandle->new( $fname, 'r' ) if defined $fname;
278	4					94	$fname = $self->filename;
279	4					100	$self->_clear_filename;
280	4					12	$self->_throw_invalid_file_exception($fname);
281							}
282
283							# Don't touch: Override this in Text::Parser::AutoUncompress
284							sub _get_valid_text_filename {
285	70			70		135	my $self = shift;
286	70					1636	my $fname = $self->filename;
287	70	100	66			7402	return $fname if -f $fname and -r $fname and -T $fname;
			100
288	4					15	return;
289							}
290
291							# Don't touch: Override this is Text::Parser::AutoUncompress
292							sub _throw_invalid_file_exception {
293	4			4		10	my ( $self, $fname ) = ( shift, shift );
294	4	100				44	die invalid_filename( name => $fname ) if not -f $fname;
295	1	50				14	die file_not_readable( name => $fname ) if not -r $fname;
296	1					8	die file_not_plain_text( name => $fname );
297							}
298
299
300							has filehandle => (
301							is => 'rw',
302							isa => 'FileHandle\|Undef',
303							lazy => 1,
304							init_arg => undef,
305							default => undef,
306							predicate => '_has_filehandle',
307							writer => '_save_filehandle',
308							reader => '_get_filehandle',
309							clearer => '_close_filehandles',
310							);
311
312							sub filehandle {
313	84			84	1	3548	my $self = shift;
314	84	100	100			3013	return if not @_ and not $self->_has_filehandle;
315	81	100				624	$self->_save_filehandle(@_) if @_;
316	77	100				393	$self->_clear_filename if @_;
317	77					2472	return $self->_get_filehandle;
318							}
319
320
321							has lines_parsed => (
322							is => 'ro',
323							isa => 'Int',
324							lazy => 1,
325							init_arg => undef,
326							default => 0,
327							traits => ['Counter'],
328							handles => {
329							_next_line_parsed => 'inc',
330							_reset_line_count => 'reset',
331							}
332							);
333
334
335							sub save_record {
336	313			313	1	7349	my ( $self, $record ) = ( shift, shift );
337	313	100				9682	$self->_has_no_rules
338							? $self->push_records($record)
339							: $self->_run_through_rules;
340							}
341
342							sub _run_through_rules {
343	144			144		190	my $self = shift;
344	144					3901	foreach my $rule ( $self->_get_rules ) {
345	277	100				655	next if not $rule->_test($self);
346	97					279	$rule->_run( $self, 0 );
347	97	100				2250	last if not $rule->continue_to_next;
348							}
349							}
350
351
352							has _current_line => (
353							is => 'ro',
354							isa => 'Str\|Undef',
355							init_arg => undef,
356							writer => '_set_this_line',
357							reader => 'this_line',
358							clearer => '_clear_this_line',
359							default => undef,
360							);
361
362
363
364							has abort => (
365							is => 'rw',
366							isa => 'Bool',
367							lazy => 1,
368							default => 0,
369							traits => ['Bool'],
370							reader => 'has_aborted',
371							handles => {
372							abort_reading => 'set',
373							_clear_abort => 'unset'
374							},
375							);
376
377
378							has records => (
379							isa => 'ArrayRef[Any]',
380							is => 'ro',
381							lazy => 1,
382							default => sub { return []; },
383							auto_deref => 1,
384							init_arg => undef,
385							traits => ['Array'],
386							handles => {
387							get_records => 'elements',
388							push_records => 'push',
389							pop_record => 'pop',
390							_empty_records => 'clear',
391							_num_records => 'count',
392							_access_record => 'accessor',
393							},
394							);
395
396
397							sub last_record {
398	16			16	1	608	my $self = shift;
399	16					567	my $count = $self->_num_records();
400	16	100				43	return if not $count;
401	15					766	return $self->_access_record( $count - 1 );
402							}
403
404
405							sub is_line_continued {
406	71			71	1	139	my $self = shift;
407	71	100				136	return 0 if not defined $self->multiline_type;
408	65	100	100			136	return 0
409							if $self->multiline_type eq 'join_last'
410							and $self->lines_parsed() == 1;
411	60					188	return 1;
412							}
413
414
415							sub join_last_line {
416	57			57	1	77	my $self = shift;
417	57					107	my ( $last, $line ) = ( shift, shift );
418	57					428	return $last . $line;
419							}
420
421
422							__PACKAGE__->meta->make_immutable;
423
424	30			30		63599	no Moose;
	30					73
	30					231
425
426							1;
427
428							__END__
429
430							=pod
431
432							=encoding UTF-8
433
434							=head1 NAME
435
436							Text::Parser - Simplifies text parsing. Easily extensible to parse any text format.
437
438							=head1 VERSION
439
440							version 0.927
441
442							=head1 SYNOPSIS
443
444							use Text::Parser;
445
446							my $parser = Text::Parser->new();
447							$parser->read(shift);
448							print $parser->get_records, "\n";
449
450							The above code prints the content of the file (named in the first argument) to C<STDOUT>.
451
452							my $parser = Text::Parser->new();
453							$parser->add_rule(do => 'print');
454							$parser->read(shift);
455
456							This example also dones the same as the earlier one. For more complex examples see the L<manual\|Text::Parser::Manual>.
457
458							=head1 OVERVIEW
459
460							The L<need\|Text::Parser::Manual/MOTIVATION> for this class stems from the fact that text parsing is the most common thing that programmers do, and yet there is no lean, simple way to do it efficiently. Most programmers still write boilerplate code with a C<while> loop.
461
462							Instead C<Text::Parser> allows programmers to parse text with terse, self-explanatory L<rules\|Text::Parser::Manual::ExtendedAWKSyntax>, whose structure is very similar to L<AWK\|https://books.google.com/books/about/The_AWK_Programming_Language.html?id=53ueQgAACAAJ>, but extends beyond the capability of AWK. Incidentally, AWK is L<one of the ancestors of Perl\|http://history.perl.org/PerlTimeline.html>! One would have expected Perl to extend the capabilities of AWK, although that's not really the case. Command-line C<perl -lane> or even C<perl -lan script.pl> are L<very limited\|Text::Parser::Manual::ComparingWithNativePerl> in what they can do. Programmers cannot use them for serious projects. And parsing text files in regular Perl involves writing the same C<while> loop again. L<This website\|https://perl-begin.org/uses/text-parsing/> summarizes the options available in Perl so far.
463
464							With C<Text::Parser>, a developer can focus on specifying a grammar and then simply C<read> the file. The C<L<read\|/read>> method automatically runs each rule collecting records from the text input into an array internally. And finally C<L<get_records\|/get_records>> can retrieve the records. Thus the programmer now has the power of Perl to create complex data structures, along with the elegance of AWK to parse text files. The L<manuals\|Text::Parser::Manual> illustrate this with L<examples\|Text::Parser::Manual::ComparingWithNativePerl>.
465
466							=head1 CONSTRUCTOR
467
468							=head2 new
469
470							Takes optional attributes as in example below. See section L<ATTRIBUTES\|/ATTRIBUTES> for a list of the attributes and their description.
471
472							my $parser = Text::Parser->new(
473							auto_chomp => 0,
474							multiline_type => 'join_last',
475							auto_trim => 'b',
476							auto_split => 1,
477							FS => qr/\s+/,
478							);
479
480							=head1 ATTRIBUTES
481
482							The attributes below can be used as options to the C<new> constructor. Each attribute has an accessor with the same name.
483
484							=head2 auto_chomp
485
486							Read-write attribute. Takes a boolean value as parameter. Defaults to C<0>.
487
488							print "Parser will chomp lines automatically\n" if $parser->auto_chomp;
489
490							=head2 auto_split
491
492							Read-write boolean attribute. Defaults to C<0> (false). Indicates if the parser will automatically split every line into fields.
493
494							If it is set to a true value, each line will be split into fields, and a set of methods (a quick list L<here\|/"Other methods available on auto_split">) become accessible within the C<L<save_record\|/save_record>> method. These methods are documented in L<Text::Parser::AutoSplit>.
495
496							=head2 auto_trim
497
498							Read-write attribute. The values this can take are shown under the C<L<new\|/new>> constructor also. Defaults to C<'n'> (neither side spaces will be trimmed).
499
500							$parser->auto_trim('l'); # 'l' (left), 'r' (right), 'b' (both), 'n' (neither) (Default)
501
502							=head2 FS
503
504							Read-write attribute that can be used to specify the field separator to be used by the C<auto_split> feature. It must be a regular expression reference enclosed in the C<qr> function, like C<qr/\s+\|[,]/> which will split across either spaces or commas. The default value for this argument is C<qr/\s+/>.
505
506							The name for this attribute comes from the built-in C<FS> variable in the popular L<GNU Awk program\|https://www.gnu.org/software/gawk/gawk.html>.
507
508							$parser->FS( qr/\s+$\|\s$/ );
509
510							C<FS> I<can> be changed in your implementation of C<save_record>. But the changes would take effect only on the next line.
511
512							=head2 multiline_type
513
514							If the target text format allows line-wrapping with a continuation character, the C<multiline_type> option tells the parser to join them into a single line. When setting this attribute, one must re-define L<two more methods\|/"PARSING LINE-WRAPPED FILES">.
515
516							By default, the read-write C<multiline_type> attribute has a value of C<undef>, i.e., the target text format will not have wrapped lines. It can be set to either C<'join_next'> or C<'join_last'>.
517
518							$parser->multiline_type(undef);
519							$parser->multiline_type('join_next');
520
521							my $mult = $parser->multiline_type;
522							print "Parser is a multi-line parser of type: $mult" if defined $mult;
523
524							=over 4
525
526							=item *
527
528							If the target format allows line-wrapping I<to the B<next>> line, set C<multiline_type> to C<join_next>.
529
530							=item *
531
532							If the target format allows line-wrapping I<from the B<last>> line, set C<multiline_type> to C<join_last>.
533
534							=item *
535
536							To "slurp" a file into a single string, set C<multiline_type> to C<join_last>. In this special case, you don't need to re-define the C<L<is_line_continued\|/is_line_continued>> and C<L<join_last_line\|/join_last_line>> methods.
537
538							=back
539
540							=head1 METHODS
541
542							These are meant to be called from the C<::main> program or within subclasses. In general, don't override them - just use them.
543
544							=head2 add_rule
545
546							Takes a hash as input. The keys of this hash must be the attributes of the L<Text::Parser::Rule> class constructor and the values should also meet the requirements of that constructor.
547
548							$parser->add_rule(do => '', dont_record => 1); # Empty rule: does nothing
549							$parser->add_rule(if => 'm/li/, do => 'print', dont_record); # Prints lines with 'li'
550							$parser->add_rule( do => 'uc($3)' ); # Saves records of upper-cased third elements
551
552							Calling this method without any arguments will throw an exception. The method internally sets the C<auto_split> attribute.
553
554							=head2 clear_rules
555
556							Takes no arguments, returns nothing. Clears the rules that were added to the object.
557
558							$parser->clear_rules;
559
560							This is useful to be able to re-use the parser after a C<read> call, to parse another text with another set of rules. The C<clear_rules> method does clear even the rules set up by C<L<BEGIN_rule\|/BEGIN_rule>> and C<L<END_rule\|/END_rule>>.
561
562							=head2 BEGIN_rule
563
564							Takes a hash input like C<add_rule>, but C<if> and C<continue_to_next> keys will be ignored.
565
566							$parser->BEGIN_rule(do => '~count = 0;');
567
568							=over 4
569
570							=item *
571
572							Since any C<if> key is ignored, the C<do> key is always C<eval>uated. Multiple calls to C<BEGIN_rule> will append to the previous calls; meaning, the actions of previous calls will be included.
573
574							=item *
575
576							The C<BEGIN> block is mainly used to initialize some variables. So by default C<dont_record> is set true. User I<can> change this and set C<dont_record> as false, thus forcing a record to be saved.
577
578							=back
579
580							=head2 END_rule
581
582							Takes a hash input like C<add_rule>, but C<if> and C<continue_to_next> keys will be ignored. Similar to C<BEGIN_rule>, but the actions in the C<END_rule> will be executed at the end of the C<read> method.
583
584							$parser->END_rule(do => 'print ~count, "\n";');
585
586							=over 4
587
588							=item *
589
590							Since any C<if> key is ignored, the C<do> key is always C<eval>uated. Multiple calls to C<END_rule> will append to the previous calls; meaning, the actions of previous calls will be included.
591
592							=item *
593
594							The C<END> block is mainly used to do final processing of collected records. So by default C<dont_record> is set true. User I<can> change this and set C<dont_record> as false, thus forcing a record to be saved.
595
596							=back
597
598							=head2 read
599
600							Takes a single optional argument that can be either a string containing the name of the file, or a filehandle reference (a C<GLOB>) like C<\*STDIN> or an object of the C<L<FileHandle>> class.
601
602							$parser->read($filename); # Read the file
603							$parser->read(\*STDIN); # Read the filehandle
604
605							The above could also be done in two steps if the developer so chooses.
606
607							$parser->filename($filename);
608							$parser->read(); # equiv: $parser->read($filename)
609
610							$parser->filehandle(\*STDIN);
611							$parser->read(); # equiv: $parser->read(\*STDIN)
612
613							The method returns once all records have been read, or if an exception is thrown, or if reading has been aborted with the C<L<abort_reading\|/abort_reading>> method.
614
615							Any C<close> operation will be handled (even if any exception is thrown), as long as C<read> is called with a file name parameter - not if you call with a file handle or C<GLOB> parameter.
616
617							$parser->read('myfile.txt'); # Will close file automatically
618
619							open MYFH, "<myfile.txt" or die "Can't open file myfile.txt at ";
620							$parser->read(\*MYFH); # Will not close MYFH
621							close MYFH;
622
623							B<Note:> To extend the class to other text formats, override C<L<save_record\|/save_record>>.
624
625							=head2 filename
626
627							Takes an optional string argument containing the name of a file. Returns the name of the file that was last opened if any. Returns C<undef> if no file has been opened.
628
629							print "Last read ", $parser->filename, "\n";
630
631							The value stored is "persistent" - meaning that the method remembers the last file that was C<L<read\|/read>>.
632
633							$parser->read(shift @ARGV);
634							print $parser->filename(), ":\n",
635							"=" x (length($parser->filename())+1),
636							"\n",
637							$parser->get_records(),
638							"\n";
639
640							A C<read> call with a filehandle, will clear the last file name.
641
642							$parser->read(\*MYFH);
643							print "Last file name is lost\n" if not defined $parser->filename();
644
645							=head2 filehandle
646
647							Takes an optional argument, that is a filehandle C<GLOB> (such as C<\*STDIN>) or an object of the C<FileHandle> class. Returns the filehandle last saved, or C<undef> if none was saved.
648
649							my $fh = $parser->filehandle();
650
651							Like C<L<filename\|/filename>>, C<filehandle> is also "persistent". Its old value is lost when either C<filename> is set, or C<read> is called with a filename.
652
653							$parser->read(\*STDOUT);
654							my $lastfh = $parser->filehandle(); # Will return glob of STDOUT
655
656							=head2 lines_parsed
657
658							Takes no arguments. Returns the number of lines last parsed. Every call to C<read>, causes the value to be auto-reset.
659
660							print $parser->lines_parsed, " lines were parsed\n";
661
662							=head2 has_aborted
663
664							Takes no arguments, returns a boolean to indicate if text reading was aborted in the middle.
665
666							print "Aborted\n" if $parser->has_aborted();
667
668							=head2 get_records
669
670							Takes no arguments. Returns an array containing all the records saved by the parser.
671
672							foreach my $record ( $parser->get_records ) {
673							$i++;
674							print "Record: $i: ", $record, "\n";
675							}
676
677							=head2 pop_record
678
679							Takes no arguments and pops the last saved record.
680
681							my $last_rec = $parser->pop_record;
682							$uc_last = uc $last_rec;
683							$parser->save_record($uc_last);
684
685							=head2 last_record
686
687							Takes no arguments and returns the last saved record. Leaves the saved records untouched.
688
689							my $last_rec = $parser->last_record;
690
691							=head1 USE ONLY IN RULES AND SUBCLASS
692
693							Do NOT override these methods. They are valid only within a subclass, inside the user-implementation of methods described under L<OVERRIDE IN SUBCLASS\|/"OVERRIDE IN SUBCLASS">.
694
695							=head2 this_line
696
697							Takes no arguments, and returns the current line being parsed. For example:
698
699							sub save_record {
700							# ...
701							do_something($self->this_line);
702							# ...
703							}
704
705							=head2 abort_reading
706
707							Takes no arguments. Returns C<1>. To be used only in the derived class to abort C<read> in the middle.
708
709							sub save_record {
710							# ...
711							$self->abort_reading if some_condition($self->this_line);
712							# ...
713							}
714
715							=head2 push_records
716
717							This is useful if one needs to implement an C<include>-like command in some text format. The example below illustrates this.
718
719							package OneParser;
720							use Moose;
721							extends 'Text::Parser';
722
723							my save_record {
724							# ...
725							# Under some condition:
726							my $parser = AnotherParser->new();
727							$parser->read($some_file)
728							$parser->push_records($parser->get_records);
729							# ...
730							}
731
732							=head2 Other methods available on C<auto_split>
733
734							When the C<L<auto_split\|/auto_split>> attribute is on, (or if it is turned on later), the following additional methods become available:
735
736							=over 4
737
738							=item *
739
740							L<NF\|Text::Parser::AutoSplit/NF>
741
742							=item *
743
744							L<fields\|Text::Parser::AutoSplit/fields>
745
746							=item *
747
748							L<field\|Text::Parser::AutoSplit/field>
749
750							=item *
751
752							L<field_range\|Text::Parser::AutoSplit/field_range>
753
754							=item *
755
756							L<join_range\|Text::Parser::AutoSplit/join_range>
757
758							=item *
759
760							L<find_field\|Text::Parser::AutoSplit/find_field>
761
762							=item *
763
764							L<find_field_index\|Text::Parser::AutoSplit/find_field_index>
765
766							=item *
767
768							L<splice_fields\|Text::Parser::AutoSplit/splice_fields>
769
770							=back
771
772							=head1 OVERRIDE IN SUBCLASS
773
774							The following methods should never be called in the C<::main> program. They may be overridden (or re-defined) in a subclass.
775
776							=head2 save_record
777
778							This method may be re-defined in a subclass to parse the target text format. The default implementation takes a single argument and stores it as a record. If no arguments are passed, C<undef> is stored as a record. Note that unlike earlier versions of C<Text::Parser> it is not required to override this method in your derived class. You can simply use the rules instead.
779
780							For a developer re-defining C<save_record>, in addition to C<L<this_line\|/"this_line">>, six additional methods become available if the C<auto_split> attribute is set. These methods are described in greater detail in L<Text::Parser::AutoSplit>, and they are accessible only within C<save_record>.
781
782							B<Note:> Developers may store records in any form - string, array reference, hash reference, complex data structure, or an object of some class. The program that reads these records using C<L<get_records\|/get_records>> has to interpret them. So developers should document the records created by their own implementation of C<save_record>.
783
784							=head2 PARSING LINE-WRAPPED FILES
785
786							These methods are useful when parsing line-wrapped files, i.e., if the target text format allows wrapping the content of one line into multiple lines. In such cases, you should C<extend> the C<Text::Parser> class and override the following methods.
787
788							=head3 is_line_continued
789
790							If the target text format supports line-wrapping, the developer must override and implement this method. Your method should take a string argument and return a boolean indicating if the line is continued or not.
791
792							There is a default implementation shipped with this class with return values as follows:
793
794							multiline_type \| Return value
795							------------------+---------------------------------
796							undef \| 0
797							join_last \| 0 for first line, 1 otherwise
798							join_next \| 1
799
800							=head3 join_last_line
801
802							Again, the developer should implement this method. This method should take two strings, join them while removing any continuation characters, and return the result. The default implementation just concatenates two strings and returns the result without removing anything (not even C<chomp>). See L<Text::Parser::Multiline> for more on this.
803
804							=head1 EXAMPLES
805
806							You can find example code in L<Text::Parser::Manual::ComparingWithNativePerl>.
807
808							=head1 THINGS TO BE DONE
809
810							This package is still a work in progress. Future versions are expected to include features to:
811
812							=over 4
813
814							=item *
815
816							read and parse from a buffer
817
818							=item *
819
820							automatically uncompress input
821
822							=item *
823
824							I<suggestions welcome ...>
825
826							=back
827
828							Contributions and suggestions are welcome and properly acknowledged.
829
830							=head1 SEE ALSO
831
832							=over 4
833
834							=item *
835
836							L<Text::Parser::Manual> - Read this manual
837
838							=item *
839
840							L<The AWK Programming Language\|https://books.google.com/books/about/The_AWK_Programming_Language.html?id=53ueQgAACAAJ> - by B<A>ho, B<W>einberg, and B<K>ernighan.
841
842							=item *
843
844							L<Text::Parser::Errors> - documentation of the exceptions this class throws
845
846							=item *
847
848							L<Text::Parser::Multiline> - how to read line-wrapped text input
849
850							=back
851
852							=head1 BUGS
853
854							Please report any bugs or feature requests on the bugtracker website
855							L<http://github.com/balajirama/Text-Parser/issues>
856
857							When submitting a bug or request, please include a test-file or a
858							patch to an existing test-file that illustrates the bug or desired
859							feature.
860
861							=head1 AUTHOR
862
863							Balaji Ramasubramanian <balajiram@cpan.org>
864
865							=head1 COPYRIGHT AND LICENSE
866
867							This software is copyright (c) 2018-2019 by Balaji Ramasubramanian.
868
869							This is free software; you can redistribute it and/or modify it under
870							the same terms as the Perl 5 programming language system itself.
871
872							=head1 CONTRIBUTORS
873
874							=for stopwords H.Merijn Brand - Tux Mohammad S Anwar
875
876							=over 4
877
878							=item *
879
880							H.Merijn Brand - Tux <h.m.brand@xs4all.nl>
881
882							=item *
883
884							Mohammad S Anwar <mohammad.anwar@yahoo.com>
885
886							=back
887
888							=cut
889