line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
22
|
|
|
22
|
|
16104
|
use strict; |
|
22
|
|
|
|
|
54
|
|
|
22
|
|
|
|
|
779
|
|
2
|
22
|
|
|
22
|
|
134
|
use warnings; |
|
22
|
|
|
|
|
51
|
|
|
22
|
|
|
|
|
1057
|
|
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
package Text::Parser::AutoSplit 1.000; |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
|
8
|
22
|
|
|
22
|
|
147
|
use Moose::Role; |
|
22
|
|
|
|
|
51
|
|
|
22
|
|
|
|
|
397
|
|
9
|
22
|
|
|
22
|
|
138798
|
use MooseX::CoverableModifiers; |
|
22
|
|
|
|
|
368
|
|
|
22
|
|
|
|
|
234
|
|
10
|
22
|
|
|
22
|
|
3644
|
use String::Util qw(trim); |
|
22
|
|
|
|
|
156
|
|
|
22
|
|
|
|
|
1532
|
|
11
|
22
|
|
|
22
|
|
359
|
use Text::Parser::Error; |
|
22
|
|
|
|
|
58
|
|
|
22
|
|
|
|
|
231
|
|
12
|
22
|
|
|
22
|
|
26327
|
use English; |
|
22
|
|
|
|
|
24543
|
|
|
22
|
|
|
|
|
184
|
|
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
has _fields => ( |
16
|
|
|
|
|
|
|
is => 'ro', |
17
|
|
|
|
|
|
|
isa => 'ArrayRef[Str]', |
18
|
|
|
|
|
|
|
lazy => 1, |
19
|
|
|
|
|
|
|
init_arg => undef, |
20
|
|
|
|
|
|
|
default => sub { [] }, |
21
|
|
|
|
|
|
|
traits => ['Array'], |
22
|
|
|
|
|
|
|
writer => '_set_fields', |
23
|
|
|
|
|
|
|
clearer => '_clear_all_fields', |
24
|
|
|
|
|
|
|
handles => { |
25
|
|
|
|
|
|
|
'NF' => 'count', |
26
|
|
|
|
|
|
|
'fields' => 'elements', |
27
|
|
|
|
|
|
|
'field' => 'get', |
28
|
|
|
|
|
|
|
'find_field' => 'first', |
29
|
|
|
|
|
|
|
'find_field_index' => 'first_index', |
30
|
|
|
|
|
|
|
'splice_fields' => 'splice', |
31
|
|
|
|
|
|
|
}, |
32
|
|
|
|
|
|
|
); |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
requires '_set_this_line', 'FS', '_clear_this_line', 'this_line', |
35
|
|
|
|
|
|
|
'auto_split'; |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
after _set_this_line => sub { |
38
|
528
|
|
|
528
|
|
3429
|
my $self = shift; |
39
|
528
|
100
|
|
|
|
12776
|
return if not $self->auto_split; |
40
|
482
|
|
|
|
|
11314
|
$self->_set_fields( [ split $self->FS, trim( $self->this_line ) ] ); |
41
|
|
|
|
|
|
|
}; |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
after _clear_this_line => sub { |
44
|
44
|
|
|
44
|
|
395
|
my $self = shift; |
45
|
44
|
|
|
|
|
1641
|
$self->_clear_all_fields; |
46
|
|
|
|
|
|
|
}; |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
sub field_range { |
50
|
70
|
|
|
70
|
1
|
94113
|
my $self = shift; |
51
|
70
|
|
|
|
|
232
|
my (@range) = $self->__validate_index_range(@_); |
52
|
67
|
|
|
|
|
202
|
$self->_sub_field_range(@range); |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
sub __validate_index_range { |
56
|
70
|
|
|
70
|
|
133
|
my $self = shift; |
57
|
|
|
|
|
|
|
|
58
|
70
|
|
|
|
|
2318
|
$self->field($_) for (@_); |
59
|
67
|
|
|
|
|
249
|
map { _pos_index( $_, $self->NF ) } __set_defaults(@_); |
|
134
|
|
|
|
|
4894
|
|
60
|
|
|
|
|
|
|
} |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
sub __set_defaults { |
63
|
67
|
|
|
67
|
|
203
|
my ( $i, $j ) = @_; |
64
|
67
|
100
|
|
|
|
214
|
$i = 0 if not defined $i; |
65
|
67
|
100
|
|
|
|
515
|
$j = -1 if not defined $j; |
66
|
67
|
|
|
|
|
182
|
return ( $i, $j ); |
67
|
|
|
|
|
|
|
} |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
sub _pos_index { |
70
|
134
|
|
|
134
|
|
301
|
my ( $ind, $nf ) = ( shift, shift ); |
71
|
134
|
100
|
|
|
|
531
|
( $ind < 0 ) ? $ind + $nf : $ind; |
72
|
|
|
|
|
|
|
} |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
sub _sub_field_range { |
75
|
67
|
|
|
67
|
|
149
|
my ( $self, $start, $end ) = ( shift, shift, shift ); |
76
|
67
|
100
|
|
|
|
338
|
my (@range) |
77
|
|
|
|
|
|
|
= ( $start <= $end ) ? ( $start .. $end ) : reverse( $end .. $start ); |
78
|
134
|
|
|
|
|
120
|
map { $self->field($_) } @range; |
|
1824
|
|
|
|
|
63455
|
|
79
|
|
|
|
|
|
|
} |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
sub join_range { |
83
|
17
|
|
|
17
|
1
|
82
|
my $self = shift; |
84
|
17
|
100
|
|
|
|
64
|
my $sep = ( @_ < 3 ) ? $LIST_SEPARATOR : pop; |
85
|
17
|
|
|
|
|
80
|
join $sep, $self->field_range(@_); |
86
|
|
|
|
|
|
|
} |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
|
89
|
22
|
|
|
22
|
|
29503
|
no Moose::Role; |
|
22
|
|
|
|
|
55
|
|
|
22
|
|
|
|
|
196
|
|
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
1; |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
__END__ |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=pod |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
=encoding UTF-8 |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
=head1 NAME |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
Text::Parser::AutoSplit - A role that adds the ability to auto-split a line into fields |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=head1 VERSION |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
version 1.000 |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=head1 SYNOPSIS |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
use Text::Parser; |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
my $p1 = Text::Parser->new(); |
112
|
|
|
|
|
|
|
$p1->read('/path/to/file'); |
113
|
|
|
|
|
|
|
my $p2 = Text::Parser->new(); |
114
|
|
|
|
|
|
|
$p2->add_rule( do => '$this->field(0);' ); |
115
|
|
|
|
|
|
|
## add_rule method automatically sets up auto_split |
116
|
|
|
|
|
|
|
$p2->read('/another/file'); |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=head1 DESCRIPTION |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
C<Text::Parser::AutoSplit> is a role that is automatically composed into an object of L<Text::Parser> if the C<auto_split> attribute is set during object construction, or when C<L<add_rule|Text::Parser/"add_rule">> method is called. The field separator is controlled by another C<Text::Parser> attribute C<L<FS|Text::Parser/"FS">>. |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
When the C<auto_split> attribute is set to a true value, the object of C<Text::Parser> will be able to use methods described in this role. |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=head1 METHODS AVAILABLE ON AUTO-SPLIT |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
These methods become available when C<auto_split> attribute is true. A runtime error will be thrown if they are called without C<auto_split> being set. They can be used inside a subclass or in the rules. |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=head2 NF |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
The name of this method comes from the C<NF> variable in the popular L<GNU Awk program|https://www.gnu.org/software/gawk/gawk.html>. |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
Returns the number of fields on a line. The field separator is specified with C<FS> attribute. |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
$parser->applies_rule( |
135
|
|
|
|
|
|
|
if => '$this->NF >= 2' |
136
|
|
|
|
|
|
|
do => '$this->collect_info($2);', |
137
|
|
|
|
|
|
|
dont_record => 1, |
138
|
|
|
|
|
|
|
); |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
If your rule contains any positional identifiers (like C<$1>, C<$2>, C<$3> etc., to identify the field) the rule automatically checks that there are at least as many fields as the largest positional identifier. So the above rule could also be written as: |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
$parser->applies_rule( |
143
|
|
|
|
|
|
|
do => '$this->collect_info($2);', |
144
|
|
|
|
|
|
|
dont_record => 1, |
145
|
|
|
|
|
|
|
); |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
It has the same results. |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
=head2 fields |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
Takes no argument and returns all the fields as an array. The C<FS> field separator controls how fields are defined. Leading and trailing spaces are trimmed. |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
$parser->add_rule( do => 'return [ $this->fields ];' ); |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=head2 field |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
Takes an integer argument and returns the field whose index is passed as argument. |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
$parser->add_rule( |
160
|
|
|
|
|
|
|
if => '$this->field(0) eq "END"', |
161
|
|
|
|
|
|
|
do => '$this->abort_reading;', |
162
|
|
|
|
|
|
|
dont_record => 1, |
163
|
|
|
|
|
|
|
); |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
You can specify negative elements to start counting from the end. For example index C<-1> is the last element, C<-2> is the penultimate one, etc. Let's say the following is the text on a line in a file: |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
THIS IS SOME TEXT |
168
|
|
|
|
|
|
|
field(0) field(1) field(2) field(3) |
169
|
|
|
|
|
|
|
field(-4) field(-3) field(-2) field(-1) |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
=head2 field_range |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
Takes two optional integers C<$i> and C<$j> as arguments and returns an array, where the first element is C<field($i)>, the second C<field($i+1)>, and so on, till C<field($j)>. |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
$parser->add_rule( |
176
|
|
|
|
|
|
|
if => '$1 eq "NAME:"', |
177
|
|
|
|
|
|
|
do => 'return [ $this->field_range(1, -1) ];', |
178
|
|
|
|
|
|
|
); |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
Both C<$i> and C<$j> can be negative, as is allowed by the C<field()> method. So, for example: |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
$parser->add_rule( |
183
|
|
|
|
|
|
|
do => 'return [ $this->field_range(-2, -1) ];' # Saves the last two fields of every line |
184
|
|
|
|
|
|
|
); |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
If C<$j> argument is omitted or set to C<undef>, it will be treated as C<-1> and if C<$i> is omitted, it is treated as C<0>. For example the following may be used inside rules: |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
$this->field_range(1); # Returns all elements omitting the first |
189
|
|
|
|
|
|
|
$this->field_range(); # same as fields() |
190
|
|
|
|
|
|
|
$this->field_range(undef, -2); # Returns all elements omitting the last |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
=head2 join_range |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
This method essentially joins the return value of the C<field_range> method. It takes three arguments. The last argument is the joining string, and the first two are optional integer arguments C<$i> and C<$j> just like C<field_range> method. |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
$parser->add_rule( |
197
|
|
|
|
|
|
|
do => qq( |
198
|
|
|
|
|
|
|
$this->join_range(); # Joins all fields with $" (see perlvar) |
199
|
|
|
|
|
|
|
$this->join_range(0, -1, '#'); # Joins with # separator |
200
|
|
|
|
|
|
|
$this->join_range(2); # Joins all elements starting with index 2 to the end |
201
|
|
|
|
|
|
|
# with $" |
202
|
|
|
|
|
|
|
$this->join_range(1, -2); # Joins all elements in specified range with $" |
203
|
|
|
|
|
|
|
)); |
204
|
|
|
|
|
|
|
## The return value of the last statement in the 'do' block is saved as a record |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
=head2 find_field |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
This method finds an element matching a given criterion. The match is done by a subroutine reference passed as argument to this method. The subroutine will be called against each field on the line, until one matches or all elements have been checked. Each field will be available in the subroutine as C<$_>. Its behavior is the same as the C<first> function of L<List::Util>. |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
sub save_record { |
211
|
|
|
|
|
|
|
my $self = shift; |
212
|
|
|
|
|
|
|
my $param = $self->find_field( |
213
|
|
|
|
|
|
|
sub { $_ =~ /[=]/ } |
214
|
|
|
|
|
|
|
); |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
=head2 find_field_index |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
This is similar to the C<L<find_field|/find_field>> method above, except that it returns the index of the element instead of the element itself. |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
sub save_record { |
222
|
|
|
|
|
|
|
my $self = shift; |
223
|
|
|
|
|
|
|
my $idx = $self->find_field_index( |
224
|
|
|
|
|
|
|
sub { $_ =~ /[=]/ } |
225
|
|
|
|
|
|
|
); |
226
|
|
|
|
|
|
|
} |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
=head2 splice_fields |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
Just like Perl's built-in C<splice> function. |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
## Inside your own save_record method ... |
233
|
|
|
|
|
|
|
my (@removed1) = $self->splice_fields($offset, $length, @values); |
234
|
|
|
|
|
|
|
my (@removed2) = $self->splice_fields($offset, $length); |
235
|
|
|
|
|
|
|
my (@removed3) = $self->splice_fields($offset); |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
The offset above is a required argument and can be negative. |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
B<WARNING:> This is a destructive function. It I<will> remove elements just like Perl's built-in C<splice> does, and the removed will be returned. If you only want to get the elements in a specific range of indices, try the C<L<field_range|/field_range>> method instead. |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
=head1 SEE ALSO |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
=over 4 |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
=item * |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
L<List::Util> |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
=item * |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
L<List::SomeUtils> |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
=item * |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
L<GNU Awk program|https://www.gnu.org/software/gawk/gawk.html> |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
=back |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
=head1 BUGS |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
Please report any bugs or feature requests on the bugtracker website |
262
|
|
|
|
|
|
|
L<http://github.com/balajirama/Text-Parser/issues> |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
When submitting a bug or request, please include a test-file or a |
265
|
|
|
|
|
|
|
patch to an existing test-file that illustrates the bug or desired |
266
|
|
|
|
|
|
|
feature. |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=head1 AUTHOR |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
Balaji Ramasubramanian <balajiram@cpan.org> |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
This software is copyright (c) 2018-2019 by Balaji Ramasubramanian. |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
277
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
=cut |
280
|
|
|
|
|
|
|
|