File Coverage

blib/lib/Text/Parser.pm

Criterion	Covered	Total	%
statement	183	183	100.0
branch	77	78	98.7
condition	20	21	95.2
subroutine	50	50	100.0
pod	10	11	90.9
total	340	343	99.1

line	stmt	bran	cond	sub	pod	time	code
1	30			30		1893039	use warnings;
	30					150
	30					937
2	30			30		149	use strict;
	30					55
	30					604
3	30			30		143	use feature ':5.14';
	30					49
	30					4660
4
5							package Text::Parser 0.926;
6
7							# ABSTRACT: Simplifies text parsing. Easily extensible to parse any text format.
8
9
10	30			30		11107	use Moose;
	30					9901451
	30					229
11	30			30		230820	use MooseX::CoverableModifiers;
	30					157775
	30					191
12	30			30		16343	use MooseX::StrictConstructor;
	30					860686
	30					122
13	30			30		285444	use namespace::autoclean;
	30					75
	30					158
14	30			30		2755	use Moose::Util 'apply_all_roles', 'ensure_all_roles';
	30					82
	30					265
15	30			30		10246	use Moose::Util::TypeConstraints;
	30					62
	30					245
16	30			30		82168	use String::Util qw(trim ltrim rtrim eqq);
	30					93116
	30					2362
17	30			30		12608	use Text::Parser::Errors;
	30					124
	30					4200
18	30			30		15739	use Text::Parser::Rule;
	30					142
	30					2785
19
20							enum 'Text::Parser::Types::MultilineType' => [qw(join_next join_last)];
21							enum 'Text::Parser::Types::TrimType' => [qw(l r b n)];
22
23	30			30		297	no Moose::Util::TypeConstraints;
	30					65
	30					318
24	30			30		26575	use FileHandle;
	30					259625
	30					177
25	30			30		10182	use Try::Tiny;
	30					73
	30					4906
26
27
28							sub BUILD {
29	47			47	0	151813	my $self = shift;
30	47	100				273	ensure_all_roles $self, 'Text::Parser::AutoSplit' if $self->auto_split;
31	47	100				106927	return if not defined $self->multiline_type;
32	11					71	ensure_all_roles $self, 'Text::Parser::Multiline';
33							}
34
35
36							has auto_chomp => (
37							is => 'rw',
38							isa => 'Bool',
39							lazy => 1,
40							default => 0,
41							);
42
43
44							has auto_split => (
45							is => 'rw',
46							isa => 'Bool',
47							lazy => 1,
48							default => 0,
49							);
50
51							around auto_split => sub {
52	995			995		12768	my ( $orig, $self ) = ( shift, shift );
53	995					2473	__newval_auto_split( $orig, $self, @_ );
54	995					156642	return $orig->($self);
55							};
56
57							sub __newval_auto_split {
58	995			995		1589	my ( $orig, $self, $newval ) = ( shift, shift, shift );
59	995	100				2171	return if not defined $newval;
60	68	100	100			694	$self->_clear_all_fields if not $newval and $orig->($self);
61	68					1548	$orig->( $self, $newval );
62	68	100				248	ensure_all_roles $self, 'Text::Parser::AutoSplit' if $newval;
63							}
64
65
66							has auto_trim => (
67							is => 'rw',
68							isa => 'Text::Parser::Types::TrimType',
69							lazy => 1,
70							default => 'n',
71							);
72
73
74							has FS => (
75							is => 'rw',
76							isa => 'RegexpRef',
77							lazy => 1,
78							default => sub {qr/\s+/},
79							);
80
81
82							has multiline_type => (
83							is => 'rw',
84							isa => 'Text::Parser::Types::MultilineType\|Undef',
85							lazy => 1,
86							default => undef,
87							);
88
89							around multiline_type => sub {
90	1209			1209		61784	my ( $orig, $self ) = ( shift, shift );
91	1209					27348	my $oldval = $orig->($self);
92	1209	100	100			6129	return $oldval if not @_ or eqq( $_[0], $oldval );
93	8					102	return __newval_multi_line( $orig, $self, @_ );
94							};
95
96							sub __newval_multi_line {
97	8			8		27	my ( $orig, $self, $newval ) = ( shift, shift, shift );
98	8	100				52	ensure_all_roles( $self, 'Text::Parser::Multiline' )
99							if defined $newval;
100	8					29361	return $orig->( $self, $newval );
101							}
102
103
104							has _obj_rules => (
105							is => 'rw',
106							isa => 'ArrayRef[Text::Parser::Rule]',
107							lazy => 1,
108							default => sub { [] },
109							traits => ['Array'],
110							handles => {
111							_push_rule => 'push',
112							_has_no_rules => 'is_empty',
113							_get_rules => 'elements',
114							},
115							);
116
117							sub add_rule {
118	18			18	1	120	my $self = shift;
119	18	100				65	$self->auto_split(1) if not $self->auto_split;
120	18					518	my $rule = Text::Parser::Rule->new(@_);
121	18					574	$self->_push_rule($rule);
122							}
123
124
125							sub clear_rules {
126	2			2	1	6	my $self = shift;
127	2					69	$self->_obj_rules( [] );
128	2					65	$self->_clear_begin_rule;
129	2					63	$self->_clear_end_rule;
130							}
131
132
133							has _begin_rule => (
134							is => 'rw',
135							isa => 'Text::Parser::Rule',
136							predicate => '_has_begin_rule',
137							clearer => '_clear_begin_rule',
138							);
139
140							sub BEGIN_rule {
141	4			4	1	24	my $self = shift;
142	4	100				11	$self->auto_split(1) if not $self->auto_split;
143	4					14	my (%opt) = _defaults_for_begin_end(@_);
144	4					17	$self->_modify_rule( '_begin_rule', %opt );
145							}
146
147							sub _defaults_for_begin_end {
148	7			7		28	my (%opt) = @_;
149	7	100				25	$opt{dont_record} = 1 if not exists $opt{dont_record};
150	7	100				18	delete $opt{if} if exists $opt{if};
151	7	100				18	delete $opt{continue_to_next} if exists $opt{continue_to_next};
152	7					32	return (%opt);
153							}
154
155							sub _modify_rule {
156	7			7		23	my ( $self, $func, %opt ) = @_;
157	7					25	my $pred = '_has' . $func;
158	7	100				219	$self->_append_rule_lines( $func, \%opt ) if $self->$pred();
159	7					188	my $rule = Text::Parser::Rule->new(%opt);
160	7					170	$self->$func($rule);
161							}
162
163							sub _append_rule_lines {
164	3			3		7	my ( $self, $func, $opt ) = ( shift, shift, shift );
165	3					73	my $old = $self->$func();
166	3					61	$opt->{do} = $old->action . $opt->{do};
167							}
168
169
170							has _end_rule => (
171							is => 'rw',
172							isa => 'Text::Parser::Rule',
173							predicate => '_has_end_rule',
174							clearer => '_clear_end_rule',
175							);
176
177							sub END_rule {
178	3			3	1	23	my $self = shift;
179	3	100				21	$self->auto_split(1) if not $self->auto_split;
180	3					12	my (%opt) = _defaults_for_begin_end(@_);
181	3					15	$self->_modify_rule( '_end_rule', %opt );
182							}
183
184
185							sub read {
186	74			74	1	68700	my $self = shift;
187	74	100				317	return if not defined $self->_handle_read_inp(@_);
188	68					365	$self->_run_begin_end_block('_begin_rule');
189	68					410	$self->__read_and_close_filehandle;
190	59					291	$self->_run_begin_end_block('_end_rule');
191	59					1459	$self->_ExAWK_symbol_table( {} );
192							}
193
194							sub _handle_read_inp {
195	74			74		149	my $self = shift;
196	74	100				242	return $self->filehandle if not @_;
197	72	100	100			440	return if not ref( $_[0] ) and not $_[0];
198	71	100				2117	return $self->filename(@_) if not ref( $_[0] );
199	6					20	return $self->filehandle(@_);
200							}
201
202							has _ExAWK_symbol_table => (
203							is => 'rw',
204							isa => 'HashRef[Any]',
205							default => sub { {} },
206							lazy => 1,
207							);
208
209							sub _run_begin_end_block {
210	127			127		311	my ( $self, $func ) = ( shift, shift );
211	127					310	my $pred = '_has' . $func;
212	127	100				3975	return if not $self->$pred();
213	4					92	my $rule = $self->$func();
214	4					20	$rule->run( $self, 0 );
215							}
216
217							sub __read_and_close_filehandle {
218	68			68		139	my $self = shift;
219	68					375	$self->_prep_to_read_file;
220	68					431	$self->__read_file_handle;
221	59	100				1952	$self->_close_filehandles if $self->_has_filename;
222	59					972	$self->_clear_this_line;
223							}
224
225							sub _prep_to_read_file {
226	68			68		143	my $self = shift;
227	68					2735	$self->_reset_line_count;
228	68					2324	$self->_empty_records;
229	68					2180	$self->_clear_abort;
230							}
231
232							sub __read_file_handle {
233	68			68		747	my $self = shift;
234	68					488	my $fh = $self->filehandle();
235	68					1364	while (<$fh>) {
236	517	100				1303	last if not $self->__parse_line($_);
237							}
238							}
239
240							sub __parse_line {
241	517			517		1033	my ( $self, $line ) = ( shift, shift );
242	517					15896	$self->_next_line_parsed();
243	517					1127	$line = $self->_def_line_manip($line);
244	517					2584	$self->__try_to_parse($line);
245	511					19267	return not $self->has_aborted;
246							}
247
248							sub _def_line_manip {
249	517			517		899	my ( $self, $line ) = ( shift, shift );
250	517	100				9815	chomp $line if $self->auto_chomp;
251	517					1233	return $self->_trim_line($line);
252							}
253
254							sub _trim_line {
255	517			517		960	my ( $self, $line ) = ( shift, shift );
256	517	100				9695	return $line if $self->auto_trim eq 'n';
257	69	100				493	return trim($line) if $self->auto_trim eq 'b';
258	8	100				147	return ltrim($line) if $self->auto_trim eq 'l';
259	4					11	return rtrim($line);
260							}
261
262							sub __try_to_parse {
263	517			517		874	my ( $self, $line ) = @_;
264	517					5398	$self->_set_this_line($line);
265	517			517		33972	try { $self->save_record($line); }
266	517			6		3522	catch { die $_; };
	6					4720
267							}
268
269
270							has filename => (
271							is => 'rw',
272							isa => 'Str\|Undef',
273							lazy => 1,
274							init_arg => undef,
275							default => undef,
276							predicate => '_has_filename',
277							clearer => '_clear_filename',
278							trigger => \&_set_filehandle,
279							);
280
281							sub _set_filehandle {
282	71			71		13187	my $self = shift;
283	71	100				1855	return $self->_clear_filename if not defined $self->filename;
284	70					347	$self->_save_filehandle( $self->__get_valid_fh );
285							}
286
287							sub __get_valid_fh {
288	70			70		176	my $self = shift;
289	70					273	my $fname = $self->_get_valid_text_filename;
290	70	100				797	return FileHandle->new( $fname, 'r' ) if defined $fname;
291	4					98	$fname = $self->filename;
292	4					140	$self->_clear_filename;
293	4					15	$self->_throw_invalid_file_exception($fname);
294							}
295
296							# Don't touch: Override this in Text::Parser::AutoUncompress
297							sub _get_valid_text_filename {
298	70			70		224	my $self = shift;
299	70					1554	my $fname = $self->filename;
300	70	100	66			6731	return $fname if -f $fname and -r $fname and -T $fname;
			100
301	4					19	return;
302							}
303
304							# Don't touch: Override this is Text::Parser::AutoUncompress
305							sub _throw_invalid_file_exception {
306	4			4		14	my ( $self, $fname ) = ( shift, shift );
307	4	100				52	die invalid_filename( name => $fname ) if not -f $fname;
308	1	50				15	die file_not_readable( name => $fname ) if not -r $fname;
309	1					9	die file_not_plain_text( name => $fname );
310							}
311
312
313							has filehandle => (
314							is => 'rw',
315							isa => 'FileHandle\|Undef',
316							lazy => 1,
317							init_arg => undef,
318							default => undef,
319							predicate => '_has_filehandle',
320							writer => '_save_filehandle',
321							reader => '_get_filehandle',
322							clearer => '_close_filehandles',
323							);
324
325							sub filehandle {
326	84			84	1	1090	my $self = shift;
327	84	100	100			2563	return if not @_ and not $self->_has_filehandle;
328	81	100				563	$self->_save_filehandle(@_) if @_;
329	77	100				382	$self->_clear_filename if @_;
330	77					2719	return $self->_get_filehandle;
331							}
332
333
334							has lines_parsed => (
335							is => 'ro',
336							isa => 'Int',
337							lazy => 1,
338							init_arg => undef,
339							default => 0,
340							traits => ['Counter'],
341							handles => {
342							_next_line_parsed => 'inc',
343							_reset_line_count => 'reset',
344							}
345							);
346
347
348							sub save_record {
349	313			313	1	6294	my ( $self, $record ) = ( shift, shift );
350	313	100				10360	$self->_has_no_rules
351							? $self->push_records($record)
352							: $self->_run_through_rules;
353							}
354
355							sub _run_through_rules {
356	144			144		225	my $self = shift;
357	144					4409	foreach my $rule ( $self->_get_rules ) {
358	277	100				791	next if not $rule->test($self);
359	97					330	$rule->run($self);
360	97	100				2594	last if not $rule->continue_to_next;
361							}
362							}
363
364
365							has _current_line => (
366							is => 'ro',
367							isa => 'Str\|Undef',
368							init_arg => undef,
369							writer => '_set_this_line',
370							reader => 'this_line',
371							clearer => '_clear_this_line',
372							default => undef,
373							);
374
375
376
377							has abort => (
378							is => 'rw',
379							isa => 'Bool',
380							lazy => 1,
381							default => 0,
382							traits => ['Bool'],
383							reader => 'has_aborted',
384							handles => {
385							abort_reading => 'set',
386							_clear_abort => 'unset'
387							},
388							);
389
390
391							has records => (
392							isa => 'ArrayRef[Any]',
393							is => 'ro',
394							lazy => 1,
395							default => sub { return []; },
396							auto_deref => 1,
397							init_arg => undef,
398							traits => ['Array'],
399							handles => {
400							get_records => 'elements',
401							push_records => 'push',
402							pop_record => 'pop',
403							_empty_records => 'clear',
404							_num_records => 'count',
405							_access_record => 'accessor',
406							},
407							);
408
409
410							sub last_record {
411	16			16	1	616	my $self = shift;
412	16					591	my $count = $self->_num_records();
413	16	100				48	return if not $count;
414	15					469	return $self->_access_record( $count - 1 );
415							}
416
417
418							sub is_line_continued {
419	71			71	1	127	my $self = shift;
420	71	100				141	return 0 if not defined $self->multiline_type;
421	65	100	100			128	return 0
422							if $self->multiline_type eq 'join_last'
423							and $self->lines_parsed() == 1;
424	60					181	return 1;
425							}
426
427
428							sub join_last_line {
429	57			57	1	95	my $self = shift;
430	57					88	my ( $last, $line ) = ( shift, shift );
431	57					188	return $last . $line;
432							}
433
434
435							__PACKAGE__->meta->make_immutable;
436
437	30			30		79444	no Moose;
	30					105
	30					277
438
439							1;
440
441							__END__
442
443							=pod
444
445							=encoding UTF-8
446
447							=head1 NAME
448
449							Text::Parser - Simplifies text parsing. Easily extensible to parse any text format.
450
451							=head1 VERSION
452
453							version 0.926
454
455							=head1 SYNOPSIS
456
457							use Text::Parser;
458
459							my $parser = Text::Parser->new();
460							$parser->read(shift);
461							print $parser->get_records, "\n";
462
463							The above code prints the content of the file (named in the first argument) to C<STDOUT>.
464
465							my $parser = Text::Parser->new();
466							$parser->add_rule(do => 'print');
467							$parser->read(shift);
468
469							This example also dones the same as the earlier one. For more complex examples see the L<manual\|Text::Parser::Manual>.
470
471							=head1 OVERVIEW
472
473							The L<need\|Text::Parser::Manual/MOTIVATION> for this class stems from the fact that text parsing is the most common thing that programmers do, and yet there is no lean, simple way to do it efficiently. Most programmers still write boilerplate code with a C<while> loop.
474
475							Instead C<Text::Parser> allows programmers to parse text with terse, self-explanatory L<rules\|Text::Parser::Manual::ExtendedAWKSyntax>, whose structure is very similar to L<AWK\|https://books.google.com/books/about/The_AWK_Programming_Language.html?id=53ueQgAACAAJ>, but extends beyond the capability of AWK. Incidentally, AWK is L<one of the ancestors of Perl\|http://history.perl.org/PerlTimeline.html>! One would have expected Perl to extend the capabilities of AWK, although that's not really the case. Command-line C<perl -lane> or even C<perl -lan script.pl> are L<very limited\|Text::Parser::Manual::ComparingWithNativePerl> in what they can do. Programmers cannot use them for serious projects. And parsing text files in regular Perl involves writing the same C<while> loop again. L<This website\|https://perl-begin.org/uses/text-parsing/> summarizes the options available in Perl so far.
476
477							With C<Text::Parser>, a developer can focus on specifying a grammar and then simply C<read> the file. The C<L<read\|/read>> method automatically runs each rule collecting records from the text input into an array internally. And finally C<L<get_records\|/get_records>> can retrieve the records. Thus the programmer now has the power of Perl to create complex data structures, along with the elegance of AWK to parse text files. The L<manuals\|Text::Parser::Manual> illustrate this with L<examples\|Text::Parser::Manual::ComparingWithNativePerl>.
478
479							=head1 CONSTRUCTOR
480
481							=head2 new
482
483							Takes optional attributes as in example below. See section L<ATTRIBUTES\|/ATTRIBUTES> for a list of the attributes and their description.
484
485							my $parser = Text::Parser->new(
486							auto_chomp => 0,
487							multiline_type => 'join_last',
488							auto_trim => 'b',
489							auto_split => 1,
490							FS => qr/\s+/,
491							);
492
493							=head1 ATTRIBUTES
494
495							The attributes below can be used as options to the C<new> constructor. Each attribute has an accessor with the same name.
496
497							=head2 auto_chomp
498
499							Read-write attribute. Takes a boolean value as parameter. Defaults to C<0>.
500
501							print "Parser will chomp lines automatically\n" if $parser->auto_chomp;
502
503							=head2 auto_split
504
505							Read-write boolean attribute. Defaults to C<0> (false). Indicates if the parser will automatically split every line into fields.
506
507							If it is set to a true value, each line will be split into fields, and a set of methods (a quick list L<here\|/"Other methods available on auto_split">) become accessible within the C<L<save_record\|/save_record>> method. These methods are documented in L<Text::Parser::AutoSplit>.
508
509							=head2 auto_trim
510
511							Read-write attribute. The values this can take are shown under the C<L<new\|/new>> constructor also. Defaults to C<'n'> (neither side spaces will be trimmed).
512
513							$parser->auto_trim('l'); # 'l' (left), 'r' (right), 'b' (both), 'n' (neither) (Default)
514
515							=head2 FS
516
517							Read-write attribute that can be used to specify the field separator to be used by the C<auto_split> feature. It must be a regular expression reference enclosed in the C<qr> function, like C<qr/\s+\|[,]/> which will split across either spaces or commas. The default value for this argument is C<qr/\s+/>.
518
519							The name for this attribute comes from the built-in C<FS> variable in the popular L<GNU Awk program\|https://www.gnu.org/software/gawk/gawk.html>.
520
521							$parser->FS( qr/\s+$\|\s$/ );
522
523							C<FS> I<can> be changed in your implementation of C<save_record>. But the changes would take effect only on the next line.
524
525							=head2 multiline_type
526
527							If the target text format allows line-wrapping with a continuation character, the C<multiline_type> option tells the parser to join them into a single line. When setting this attribute, one must re-define L<two more methods\|/"PARSING LINE-WRAPPED FILES">.
528
529							By default, the read-write C<multiline_type> attribute has a value of C<undef>, i.e., the target text format will not have wrapped lines. It can be set to either C<'join_next'> or C<'join_last'>.
530
531							$parser->multiline_type(undef);
532							$parser->multiline_type('join_next');
533
534							my $mult = $parser->multiline_type;
535							print "Parser is a multi-line parser of type: $mult" if defined $mult;
536
537							=over 4
538
539							=item *
540
541							If the target format allows line-wrapping I<to the B<next>> line, set C<multiline_type> to C<join_next>.
542
543							=item *
544
545							If the target format allows line-wrapping I<from the B<last>> line, set C<multiline_type> to C<join_last>.
546
547							=item *
548
549							To "slurp" a file into a single string, set C<multiline_type> to C<join_last>. In this special case, you don't need to re-define the C<L<is_line_continued\|/is_line_continued>> and C<L<join_last_line\|/join_last_line>> methods.
550
551							=back
552
553							=head1 METHODS
554
555							These are meant to be called from the C<::main> program or within subclasses. In general, don't override them - just use them.
556
557							=head2 add_rule
558
559							Takes a hash as input. The keys of this hash must be the attributes of the L<Text::Parser::Rule> class constructor and the values should also meet the requirements of that constructor.
560
561							$parser->add_rule(do => '', dont_record => 1); # Empty rule: does nothing
562							$parser->add_rule(if => 'm/li/, do => 'print', dont_record); # Prints lines with 'li'
563							$parser->add_rule( do => 'uc($3)' ); # Saves records of upper-cased third elements
564
565							Calling this method without any arguments will throw an exception. The method internally sets the C<auto_split> attribute.
566
567							=head2 clear_rules
568
569							Takes no arguments, returns nothing. Clears the rules that were added to the object.
570
571							$parser->clear_rules;
572
573							This is useful to be able to re-use the parser after a C<read> call, to parse another text with another set of rules. The C<clear_rules> method does clear even the rules set up by C<L<BEGIN_rule\|/BEGIN_rule>> and C<L<END_rule\|/END_rule>>.
574
575							=head2 BEGIN_rule
576
577							Takes a hash input like C<add_rule>, but C<if> and C<continue_to_next> keys will be ignored.
578
579							$parser->BEGIN_rule(do => '~count = 0;');
580
581							=over 4
582
583							=item *
584
585							Since any C<if> key is ignored, the C<do> key is always C<eval>uated. Multiple calls to C<BEGIN_rule> will append to the previous calls; meaning, the actions of previous calls will be included.
586
587							=item *
588
589							The C<BEGIN> block is mainly used to initialize some variables. So by default C<dont_record> is set true. User I<can> change this and set C<dont_record> as false, thus forcing a record to be saved.
590
591							=back
592
593							=head2 END_rule
594
595							Takes a hash input like C<add_rule>, but C<if> and C<continue_to_next> keys will be ignored. Similar to C<BEGIN_rule>, but the actions in the C<END_rule> will be executed at the end of the C<read> method.
596
597							$parser->END_rule(do => 'print ~count, "\n";');
598
599							=over 4
600
601							=item *
602
603							Since any C<if> key is ignored, the C<do> key is always C<eval>uated. Multiple calls to C<END_rule> will append to the previous calls; meaning, the actions of previous calls will be included.
604
605							=item *
606
607							The C<END> block is mainly used to do final processing of collected records. So by default C<dont_record> is set true. User I<can> change this and set C<dont_record> as false, thus forcing a record to be saved.
608
609							=back
610
611							=head2 read
612
613							Takes a single optional argument that can be either a string containing the name of the file, or a filehandle reference (a C<GLOB>) like C<\*STDIN> or an object of the C<L<FileHandle>> class.
614
615							$parser->read($filename); # Read the file
616							$parser->read(\*STDIN); # Read the filehandle
617
618							The above could also be done in two steps if the developer so chooses.
619
620							$parser->filename($filename);
621							$parser->read(); # equiv: $parser->read($filename)
622
623							$parser->filehandle(\*STDIN);
624							$parser->read(); # equiv: $parser->read(\*STDIN)
625
626							The method returns once all records have been read, or if an exception is thrown, or if reading has been aborted with the C<L<abort_reading\|/abort_reading>> method.
627
628							Any C<close> operation will be handled (even if any exception is thrown), as long as C<read> is called with a file name parameter - not if you call with a file handle or C<GLOB> parameter.
629
630							$parser->read('myfile.txt'); # Will close file automatically
631
632							open MYFH, "<myfile.txt" or die "Can't open file myfile.txt at ";
633							$parser->read(\*MYFH); # Will not close MYFH
634							close MYFH;
635
636							B<Note:> To extend the class to other text formats, override C<L<save_record\|/save_record>>.
637
638							=head2 filename
639
640							Takes an optional string argument containing the name of a file. Returns the name of the file that was last opened if any. Returns C<undef> if no file has been opened.
641
642							print "Last read ", $parser->filename, "\n";
643
644							The value stored is "persistent" - meaning that the method remembers the last file that was C<L<read\|/read>>.
645
646							$parser->read(shift @ARGV);
647							print $parser->filename(), ":\n",
648							"=" x (length($parser->filename())+1),
649							"\n",
650							$parser->get_records(),
651							"\n";
652
653							A C<read> call with a filehandle, will clear the last file name.
654
655							$parser->read(\*MYFH);
656							print "Last file name is lost\n" if not defined $parser->filename();
657
658							=head2 filehandle
659
660							Takes an optional argument, that is a filehandle C<GLOB> (such as C<\*STDIN>) or an object of the C<FileHandle> class. Returns the filehandle last saved, or C<undef> if none was saved.
661
662							my $fh = $parser->filehandle();
663
664							Like C<L<filename\|/filename>>, C<filehandle> is also "persistent". Its old value is lost when either C<filename> is set, or C<read> is called with a filename.
665
666							$parser->read(\*STDOUT);
667							my $lastfh = $parser->filehandle(); # Will return glob of STDOUT
668
669							=head2 lines_parsed
670
671							Takes no arguments. Returns the number of lines last parsed. Every call to C<read>, causes the value to be auto-reset.
672
673							print $parser->lines_parsed, " lines were parsed\n";
674
675							=head2 has_aborted
676
677							Takes no arguments, returns a boolean to indicate if text reading was aborted in the middle.
678
679							print "Aborted\n" if $parser->has_aborted();
680
681							=head2 get_records
682
683							Takes no arguments. Returns an array containing all the records saved by the parser.
684
685							foreach my $record ( $parser->get_records ) {
686							$i++;
687							print "Record: $i: ", $record, "\n";
688							}
689
690							=head2 pop_record
691
692							Takes no arguments and pops the last saved record.
693
694							my $last_rec = $parser->pop_record;
695							$uc_last = uc $last_rec;
696							$parser->save_record($uc_last);
697
698							=head2 last_record
699
700							Takes no arguments and returns the last saved record. Leaves the saved records untouched.
701
702							my $last_rec = $parser->last_record;
703
704							=head1 USE ONLY IN RULES AND SUBCLASS
705
706							Do NOT override these methods. They are valid only within a subclass, inside the user-implementation of methods described under L<OVERRIDE IN SUBCLASS\|/"OVERRIDE IN SUBCLASS">.
707
708							=head2 this_line
709
710							Takes no arguments, and returns the current line being parsed. For example:
711
712							sub save_record {
713							# ...
714							do_something($self->this_line);
715							# ...
716							}
717
718							=head2 abort_reading
719
720							Takes no arguments. Returns C<1>. To be used only in the derived class to abort C<read> in the middle.
721
722							sub save_record {
723							# ...
724							$self->abort_reading if some_condition($self->this_line);
725							# ...
726							}
727
728							=head2 push_records
729
730							This is useful if one needs to implement an C<include>-like command in some text format. The example below illustrates this.
731
732							package OneParser;
733							use Moose;
734							extends 'Text::Parser';
735
736							my save_record {
737							# ...
738							# Under some condition:
739							my $parser = AnotherParser->new();
740							$parser->read($some_file)
741							$parser->push_records($parser->get_records);
742							# ...
743							}
744
745							=head2 Other methods available on C<auto_split>
746
747							When the C<L<auto_split\|/auto_split>> attribute is on, (or if it is turned on later), the following additional methods become available:
748
749							=over 4
750
751							=item *
752
753							L<NF\|Text::Parser::AutoSplit/NF>
754
755							=item *
756
757							L<fields\|Text::Parser::AutoSplit/fields>
758
759							=item *
760
761							L<field\|Text::Parser::AutoSplit/field>
762
763							=item *
764
765							L<field_range\|Text::Parser::AutoSplit/field_range>
766
767							=item *
768
769							L<join_range\|Text::Parser::AutoSplit/join_range>
770
771							=item *
772
773							L<find_field\|Text::Parser::AutoSplit/find_field>
774
775							=item *
776
777							L<find_field_index\|Text::Parser::AutoSplit/find_field_index>
778
779							=item *
780
781							L<splice_fields\|Text::Parser::AutoSplit/splice_fields>
782
783							=back
784
785							=head1 OVERRIDE IN SUBCLASS
786
787							The following methods should never be called in the C<::main> program. They may be overridden (or re-defined) in a subclass.
788
789							=head2 save_record
790
791							This method may be re-defined in a subclass to parse the target text format. The default implementation takes a single argument and stores it as a record. If no arguments are passed, C<undef> is stored as a record. Note that unlike earlier versions of C<Text::Parser> it is not required to override this method in your derived class. You can simply use the rules instead.
792
793							For a developer re-defining C<save_record>, in addition to C<L<this_line\|/"this_line">>, six additional methods become available if the C<auto_split> attribute is set. These methods are described in greater detail in L<Text::Parser::AutoSplit>, and they are accessible only within C<save_record>.
794
795							B<Note:> Developers may store records in any form - string, array reference, hash reference, complex data structure, or an object of some class. The program that reads these records using C<L<get_records\|/get_records>> has to interpret them. So developers should document the records created by their own implementation of C<save_record>.
796
797							=head2 PARSING LINE-WRAPPED FILES
798
799							These methods are useful when parsing line-wrapped files, i.e., if the target text format allows wrapping the content of one line into multiple lines. In such cases, you should C<extend> the C<Text::Parser> class and override the following methods.
800
801							=head3 is_line_continued
802
803							If the target text format supports line-wrapping, the developer must override and implement this method. Your method should take a string argument and return a boolean indicating if the line is continued or not.
804
805							There is a default implementation shipped with this class with return values as follows:
806
807							multiline_type \| Return value
808							------------------+---------------------------------
809							undef \| 0
810							join_last \| 0 for first line, 1 otherwise
811							join_next \| 1
812
813							=head3 join_last_line
814
815							Again, the developer should implement this method. This method should take two strings, join them while removing any continuation characters, and return the result. The default implementation just concatenates two strings and returns the result without removing anything (not even C<chomp>). See L<Text::Parser::Multiline> for more on this.
816
817							=head1 EXAMPLES
818
819							You can find example code in L<Text::Parser::Manual::ComparingWithNativePerl>.
820
821							=head1 THINGS TO BE DONE
822
823							This package is still a work in progress. Future versions are expected to include features to:
824
825							=over 4
826
827							=item *
828
829							read and parse from a buffer
830
831							=item *
832
833							automatically uncompress input
834
835							=item *
836
837							I<suggestions welcome ...>
838
839							=back
840
841							Contributions and suggestions are welcome and properly acknowledged.
842
843							=head1 SEE ALSO
844
845							=over 4
846
847							=item *
848
849							L<Text::Parser::Manual> - Read this manual
850
851							=item *
852
853							L<The AWK Programming Language\|https://books.google.com/books/about/The_AWK_Programming_Language.html?id=53ueQgAACAAJ> - by B<A>ho, B<W>einberg, and B<K>ernighan.
854
855							=item *
856
857							L<Text::Parser::Errors> - documentation of the exceptions this class throws
858
859							=item *
860
861							L<Text::Parser::Multiline> - how to read line-wrapped text input
862
863							=back
864
865							=head1 BUGS
866
867							Please report any bugs or feature requests on the bugtracker website
868							L<http://github.com/balajirama/Text-Parser/issues>
869
870							When submitting a bug or request, please include a test-file or a
871							patch to an existing test-file that illustrates the bug or desired
872							feature.
873
874							=head1 AUTHOR
875
876							Balaji Ramasubramanian <balajiram@cpan.org>
877
878							=head1 COPYRIGHT AND LICENSE
879
880							This software is copyright (c) 2018-2019 by Balaji Ramasubramanian.
881
882							This is free software; you can redistribute it and/or modify it under
883							the same terms as the Perl 5 programming language system itself.
884
885							=head1 CONTRIBUTORS
886
887							=for stopwords H.Merijn Brand - Tux Mohammad S Anwar
888
889							=over 4
890
891							=item *
892
893							H.Merijn Brand - Tux <h.m.brand@xs4all.nl>
894
895							=item *
896
897							Mohammad S Anwar <mohammad.anwar@yahoo.com>
898
899							=back
900
901							=cut
902