File Coverage

blib/lib/Text/Parser/AutoSplit.pm

Criterion	Covered	Total	%
statement	49	49	100.0
branch	12	12	100.0
condition			n/a
subroutine	16	16	100.0
pod	2	2	100.0
total	79	79	100.0

line	stmt	bran	sub	pod	time	code
1	22		22		16104	use strict;
	22				54
	22				779
2	22		22		134	use warnings;
	22				51
	22				1057
3
4						package Text::Parser::AutoSplit 1.000;
5
6						# ABSTRACT: A role that adds the ability to auto-split a line into fields
7
8	22		22		147	use Moose::Role;
	22				51
	22				397
9	22		22		138798	use MooseX::CoverableModifiers;
	22				368
	22				234
10	22		22		3644	use String::Util qw(trim);
	22				156
	22				1532
11	22		22		359	use Text::Parser::Error;
	22				58
	22				231
12	22		22		26327	use English;
	22				24543
	22				184
13
14
15						has _fields => (
16						is => 'ro',
17						isa => 'ArrayRef[Str]',
18						lazy => 1,
19						init_arg => undef,
20						default => sub { [] },
21						traits => ['Array'],
22						writer => '_set_fields',
23						clearer => '_clear_all_fields',
24						handles => {
25						'NF' => 'count',
26						'fields' => 'elements',
27						'field' => 'get',
28						'find_field' => 'first',
29						'find_field_index' => 'first_index',
30						'splice_fields' => 'splice',
31						},
32						);
33
34						requires '_set_this_line', 'FS', '_clear_this_line', 'this_line',
35						'auto_split';
36
37						after _set_this_line => sub {
38	528		528		3429	my $self = shift;
39	528	100			12776	return if not $self->auto_split;
40	482				11314	$self->_set_fields( [ split $self->FS, trim( $self->this_line ) ] );
41						};
42
43						after _clear_this_line => sub {
44	44		44		395	my $self = shift;
45	44				1641	$self->_clear_all_fields;
46						};
47
48
49						sub field_range {
50	70		70	1	94113	my $self = shift;
51	70				232	my (@range) = $self->__validate_index_range(@_);
52	67				202	$self->_sub_field_range(@range);
53						}
54
55						sub __validate_index_range {
56	70		70		133	my $self = shift;
57
58	70				2318	$self->field($_) for (@_);
59	67				249	map { _pos_index( $_, $self->NF ) } __set_defaults(@_);
	134				4894
60						}
61
62						sub __set_defaults {
63	67		67		203	my ( $i, $j ) = @_;
64	67	100			214	$i = 0 if not defined $i;
65	67	100			515	$j = -1 if not defined $j;
66	67				182	return ( $i, $j );
67						}
68
69						sub _pos_index {
70	134		134		301	my ( $ind, $nf ) = ( shift, shift );
71	134	100			531	( $ind < 0 ) ? $ind + $nf : $ind;
72						}
73
74						sub _sub_field_range {
75	67		67		149	my ( $self, $start, $end ) = ( shift, shift, shift );
76	67	100			338	my (@range)
77						= ( $start <= $end ) ? ( $start .. $end ) : reverse( $end .. $start );
78	134				120	map { $self->field($_) } @range;
	1824				63455
79						}
80
81
82						sub join_range {
83	17		17	1	82	my $self = shift;
84	17	100			64	my $sep = ( @_ < 3 ) ? $LIST_SEPARATOR : pop;
85	17				80	join $sep, $self->field_range(@_);
86						}
87
88
89	22		22		29503	no Moose::Role;
	22				55
	22				196
90
91						1;
92
93						__END__
94
95						=pod
96
97						=encoding UTF-8
98
99						=head1 NAME
100
101						Text::Parser::AutoSplit - A role that adds the ability to auto-split a line into fields
102
103						=head1 VERSION
104
105						version 1.000
106
107						=head1 SYNOPSIS
108
109						use Text::Parser;
110
111						my $p1 = Text::Parser->new();
112						$p1->read('/path/to/file');
113						my $p2 = Text::Parser->new();
114						$p2->add_rule( do => '$this->field(0);' );
115						## add_rule method automatically sets up auto_split
116						$p2->read('/another/file');
117
118						=head1 DESCRIPTION
119
120						C<Text::Parser::AutoSplit> is a role that is automatically composed into an object of L<Text::Parser> if the C<auto_split> attribute is set during object construction, or when C<L<add_rule\|Text::Parser/"add_rule">> method is called. The field separator is controlled by another C<Text::Parser> attribute C<L<FS\|Text::Parser/"FS">>.
121
122						When the C<auto_split> attribute is set to a true value, the object of C<Text::Parser> will be able to use methods described in this role.
123
124						=head1 METHODS AVAILABLE ON AUTO-SPLIT
125
126						These methods become available when C<auto_split> attribute is true. A runtime error will be thrown if they are called without C<auto_split> being set. They can be used inside a subclass or in the rules.
127
128						=head2 NF
129
130						The name of this method comes from the C<NF> variable in the popular L<GNU Awk program\|https://www.gnu.org/software/gawk/gawk.html>.
131
132						Returns the number of fields on a line. The field separator is specified with C<FS> attribute.
133
134						$parser->applies_rule(
135						if => '$this->NF >= 2'
136						do => '$this->collect_info($2);',
137						dont_record => 1,
138						);
139
140						If your rule contains any positional identifiers (like C<$1>, C<$2>, C<$3> etc., to identify the field) the rule automatically checks that there are at least as many fields as the largest positional identifier. So the above rule could also be written as:
141
142						$parser->applies_rule(
143						do => '$this->collect_info($2);',
144						dont_record => 1,
145						);
146
147						It has the same results.
148
149						=head2 fields
150
151						Takes no argument and returns all the fields as an array. The C<FS> field separator controls how fields are defined. Leading and trailing spaces are trimmed.
152
153						$parser->add_rule( do => 'return [ $this->fields ];' );
154
155						=head2 field
156
157						Takes an integer argument and returns the field whose index is passed as argument.
158
159						$parser->add_rule(
160						if => '$this->field(0) eq "END"',
161						do => '$this->abort_reading;',
162						dont_record => 1,
163						);
164
165						You can specify negative elements to start counting from the end. For example index C<-1> is the last element, C<-2> is the penultimate one, etc. Let's say the following is the text on a line in a file:
166
167						THIS IS SOME TEXT
168						field(0) field(1) field(2) field(3)
169						field(-4) field(-3) field(-2) field(-1)
170
171						=head2 field_range
172
173						Takes two optional integers C<$i> and C<$j> as arguments and returns an array, where the first element is C<field($i)>, the second C<field($i+1)>, and so on, till C<field($j)>.
174
175						$parser->add_rule(
176						if => '$1 eq "NAME:"',
177						do => 'return [ $this->field_range(1, -1) ];',
178						);
179
180						Both C<$i> and C<$j> can be negative, as is allowed by the C<field()> method. So, for example:
181
182						$parser->add_rule(
183						do => 'return [ $this->field_range(-2, -1) ];' # Saves the last two fields of every line
184						);
185
186						If C<$j> argument is omitted or set to C<undef>, it will be treated as C<-1> and if C<$i> is omitted, it is treated as C<0>. For example the following may be used inside rules:
187
188						$this->field_range(1); # Returns all elements omitting the first
189						$this->field_range(); # same as fields()
190						$this->field_range(undef, -2); # Returns all elements omitting the last
191
192						=head2 join_range
193
194						This method essentially joins the return value of the C<field_range> method. It takes three arguments. The last argument is the joining string, and the first two are optional integer arguments C<$i> and C<$j> just like C<field_range> method.
195
196						$parser->add_rule(
197						do => qq(
198						$this->join_range(); # Joins all fields with $" (see perlvar)
199						$this->join_range(0, -1, '#'); # Joins with # separator
200						$this->join_range(2); # Joins all elements starting with index 2 to the end
201						# with $"
202						$this->join_range(1, -2); # Joins all elements in specified range with $"
203						));
204						## The return value of the last statement in the 'do' block is saved as a record
205
206						=head2 find_field
207
208						This method finds an element matching a given criterion. The match is done by a subroutine reference passed as argument to this method. The subroutine will be called against each field on the line, until one matches or all elements have been checked. Each field will be available in the subroutine as C<$_>. Its behavior is the same as the C<first> function of L<List::Util>.
209
210						sub save_record {
211						my $self = shift;
212						my $param = $self->find_field(
213						sub { $_ =~ /[=]/ }
214						);
215						}
216
217						=head2 find_field_index
218
219						This is similar to the C<L<find_field\|/find_field>> method above, except that it returns the index of the element instead of the element itself.
220
221						sub save_record {
222						my $self = shift;
223						my $idx = $self->find_field_index(
224						sub { $_ =~ /[=]/ }
225						);
226						}
227
228						=head2 splice_fields
229
230						Just like Perl's built-in C<splice> function.
231
232						## Inside your own save_record method ...
233						my (@removed1) = $self->splice_fields($offset, $length, @values);
234						my (@removed2) = $self->splice_fields($offset, $length);
235						my (@removed3) = $self->splice_fields($offset);
236
237						The offset above is a required argument and can be negative.
238
239						B<WARNING:> This is a destructive function. It I<will> remove elements just like Perl's built-in C<splice> does, and the removed will be returned. If you only want to get the elements in a specific range of indices, try the C<L<field_range\|/field_range>> method instead.
240
241						=head1 SEE ALSO
242
243						=over 4
244
245						=item *
246
247						L<List::Util>
248
249						=item *
250
251						L<List::SomeUtils>
252
253						=item *
254
255						L<GNU Awk program\|https://www.gnu.org/software/gawk/gawk.html>
256
257						=back
258
259						=head1 BUGS
260
261						Please report any bugs or feature requests on the bugtracker website
262						L<http://github.com/balajirama/Text-Parser/issues>
263
264						When submitting a bug or request, please include a test-file or a
265						patch to an existing test-file that illustrates the bug or desired
266						feature.
267
268						=head1 AUTHOR
269
270						Balaji Ramasubramanian <balajiram@cpan.org>
271
272						=head1 COPYRIGHT AND LICENSE
273
274						This software is copyright (c) 2018-2019 by Balaji Ramasubramanian.
275
276						This is free software; you can redistribute it and/or modify it under
277						the same terms as the Perl 5 programming language system itself.
278
279						=cut
280