File Coverage

blib/lib/Text/Parser/AutoSplit.pm
Criterion Covered Total %
statement 46 46 100.0
branch 12 12 100.0
condition n/a
subroutine 15 15 100.0
pod 2 2 100.0
total 75 75 100.0


line stmt bran cond sub pod time code
1 16     16   9644 use strict;
  16         36  
  16         470  
2 16     16   79 use warnings;
  16         30  
  16         677  
3              
4             package Text::Parser::AutoSplit 0.927;
5              
6             # ABSTRACT: A role that adds the ability to auto-split a line into fields
7              
8 16     16   85 use Moose::Role;
  16         28  
  16         163  
9 16     16   85791 use MooseX::CoverableModifiers;
  16         36  
  16         141  
10 16     16   2252 use String::Util qw(trim);
  16         33  
  16         975  
11 16     16   95 use Text::Parser::Errors;
  16         28  
  16         1928  
12 16     16   6934 use English;
  16         14944  
  16         87  
13              
14              
15             has _fields => (
16                 is => 'ro',
17                 isa => 'ArrayRef[Str]',
18                 lazy => 1,
19                 init_arg => undef,
20                 default => sub { [] },
21                 traits => ['Array'],
22                 writer => '_set_fields',
23                 clearer => '_clear_all_fields',
24                 handles => {
25                     'NF' => 'count',
26                     'fields' => 'elements',
27                     'field' => 'get',
28                     'find_field' => 'first',
29                     'find_field_index' => 'first_index',
30                     'splice_fields' => 'splice',
31                 },
32             );
33              
34             requires '_set_this_line', 'FS', '_clear_this_line', 'this_line',
35                 'auto_split';
36              
37             after _set_this_line => sub {
38 400     400   2166     my $self = shift;
39 400 100       8198     return if not $self->auto_split;
40 354         6695     $self->_set_fields( [ split $self->FS, trim( $self->this_line ) ] );
41             };
42              
43             after _clear_this_line => sub {
44 35     35   257     my $self = shift;
45 35         1134     $self->_clear_all_fields;
46             };
47              
48              
49             sub field_range {
50 32     32 1 70820     my $self = shift;
51 32         64     my (@range) = $self->__validate_index_range(@_);
52 29         66     $self->_sub_field_range(@range);
53             }
54              
55             sub __validate_index_range {
56 32     32   47     my $self = shift;
57              
58 32         742     $self->field($_) for (@_);
59 29         57     map { _pos_index( $_, $self->NF ) } __set_defaults(@_);
  58         1737  
60             }
61              
62             sub __set_defaults {
63 29     29   52     my ( $i, $j ) = @_;
64 29 100       50     $i = 0 if not defined $i;
65 29 100       56     $j = -1 if not defined $j;
66 29         59     return ( $i, $j );
67             }
68              
69             sub _pos_index {
70 58     58   104     my ( $ind, $nf ) = ( shift, shift );
71 58 100       170     ( $ind < 0 ) ? $ind + $nf : $ind;
72             }
73              
74             sub _sub_field_range {
75 29     29   58     my ( $self, $start, $end ) = ( shift, shift, shift );
76 29 100       124     my (@range)
77                     = ( $start <= $end ) ? ( $start .. $end ) : reverse( $end .. $start );
78 58         43     map { $self->field($_) } @range;
  1660         46336  
79             }
80              
81              
82             sub join_range {
83 17     17 1 54     my $self = shift;
84 17 100       45     my $sep = ( @_ < 3 ) ? $LIST_SEPARATOR : pop;
85 17         40     join $sep, $self->field_range(@_);
86             }
87              
88              
89             1;
90              
91             __END__
92            
93             =pod
94            
95             =encoding UTF-8
96            
97             =head1 NAME
98            
99             Text::Parser::AutoSplit - A role that adds the ability to auto-split a line into fields
100            
101             =head1 VERSION
102            
103             version 0.927
104            
105             =head1 SYNOPSIS
106            
107             package MyNewParser;
108            
109             use parent 'Text::Parser';
110            
111             sub new {
112             my $pkg = shift;
113             $pkg->SUPER::new(
114             auto_split => 1,
115             FS => qr/\s+\(*|\s*\)/,
116             @_,
117             );
118             }
119            
120             sub save_record {
121             my $self = shift;
122             return $self->abort_reading if $self->NF > 0 and $self->field(0) eq 'STOP_READING';
123             $self->SUPER::save_record(@_) if $self->NF > 0 and $self->field(0) !~ /^[#]/;
124             }
125            
126             package main;
127            
128             my $parser = MyNewParser->new();
129             $parser->read(shift);
130             print $parser->get_records(), "\n";
131            
132             =head1 DESCRIPTION
133            
134             C<Text::Parser::AutoSplit> is a role that gets automatically composed into an object of L<Text::Parser> if the C<auto_split> attribute is set during object construction. It is useful for writing complex parsers as derived classes of L<Text::Parser>, because one has access to the fields. The field separator is controlled by another attribute C<FS>, which can be accessed via an accessor method of the same name. When the C<auto_split> attribute is set to a true value, the object of C<Text::Parser> will be able to use methods described in this role.
135            
136             =head1 METHODS AVAILABLE ON AUTO-SPLIT
137            
138             These methods become available when C<auto_split> attribute is true. A runtime error will be thrown if they are called without C<auto_split> being set. They can used inside the subclass implementation of C<L<save_record|Text::Parser/save_record>>.
139            
140             =head2 NF
141            
142             The name of this method comes from the C<NF> variable in the popular L<GNU Awk program|https://www.gnu.org/software/gawk/gawk.html>. Takes no arguments, and returns the number of fields.
143            
144             sub save_record {
145             my $self = shift;
146             $self->save_record(@_) if $self->NF > 0;
147             }
148            
149             =head2 fields
150            
151             Takes no argument and returns all the fields as an array.
152            
153             ## Inside your own save_record method ...
154             foreach my $fld ($self->fields) {
155             # do something ...
156             }
157            
158             =head2 field
159            
160             Takes an integer argument and returns the field whose index is passed as argument.
161            
162             sub save_record {
163             my $self = shift;
164             $self->abort if $self->field(0) eq 'END';
165             }
166            
167             You can specify negative elements to start counting from the end. For example index C<-1> is the last element, C<-2> is the penultimate one, etc. Let's say the following is the text on a line in a file:
168            
169             THIS IS SOME TEXT
170             field(0) field(1) field(2) field(3)
171             field(-4) field(-3) field(-2) field(-1)
172            
173             =head2 field_range
174            
175             Takes two optional integers C<$i> and C<$j> as arguments and returns an array, where the first element is C<field($i)>, the second C<field($i+1)>, and so on, till C<field($j)>.
176            
177             ## returns 4 elements starting with field(3) upto field(6)
178             my (@flds) = $self->field_range(3, 6);
179            
180             Both C<$i> and C<$j> can be negative, as is allowed by the C<field()> method. So, for example:
181            
182             $self->field_range(-2, -1); # Returns the last two elements
183            
184             If C<$j> argument is omitted or set to C<undef>, it will be treated as C<-1> and if C<$i> is omitted, it is treated as C<0>. For example:
185            
186             $self->field_range(1); # Returns all elements omitting the first
187             $self->field_range(); # same as fields()
188             $self->field_range(undef, -2); # Returns all elements omitting the last
189            
190             =head2 join_range
191            
192             This method essentially joins the return value of the C<field_range> method. It takes three arguments. The first argument is the joining string, and the other two are optional integer arguments C<$i> and C<$j> just like C<field_range> method.
193            
194             $self->join_range(); # Joins all fields with $" (see perlvar)
195             $self->join_range(0, -1, '#'); # Joins with # separator
196             $self->join_range(2); # Joins all elements starting with index 2 to the end
197             # with $"
198             $self->join_range(1, -2); # Joins all elements in specified range with $"
199            
200             =head2 find_field
201            
202             This method finds an element matching a given criterion. The match is done by a subroutine reference passed as argument to this method. The subroutine will be called against each field on the line, until one matches or all elements have been checked. Each field will be available in the subroutine as C<$_>. Its behavior is the same as the C<first> function of L<List::Util>.
203            
204             sub save_record {
205             my $self = shift;
206             my $param = $self->find_field(
207             sub { $_ =~ /[=]/ }
208             );
209             }
210            
211             =head2 find_field_index
212            
213             This is similar to the C<L<find_field|/find_field>> method above, except that it returns the index of the element instead of the element itself.
214            
215             sub save_record {
216             my $self = shift;
217             my $idx = $self->find_field_index(
218             sub { $_ =~ /[=]/ }
219             );
220             }
221            
222             =head2 splice_fields
223            
224             Just like Perl's built-in C<splice> function.
225            
226             ## Inside your own save_record method ...
227             my (@removed1) = $self->splice_fields($offset, $length, @values);
228             my (@removed2) = $self->splice_fields($offset, $length);
229             my (@removed3) = $self->splice_fields($offset);
230            
231             The offset above is a required argument and can be negative.
232            
233             B<WARNING:> This is a destructive function. It I<will> remove elements just like Perl's built-in C<splice> does, and the removed will be returned. If you only want to get the elements in a specific range of indices, try the C<L<field_range|/field_range>> method instead.
234            
235             =head1 SEE ALSO
236            
237             =over 4
238            
239             =item *
240            
241             L<List::Util>
242            
243             =item *
244            
245             L<List::SomeUtils>
246            
247             =item *
248            
249             L<GNU Awk program|https://www.gnu.org/software/gawk/gawk.html>
250            
251             =back
252            
253             =head1 BUGS
254            
255             Please report any bugs or feature requests on the bugtracker website
256             L<http://github.com/balajirama/Text-Parser/issues>
257            
258             When submitting a bug or request, please include a test-file or a
259             patch to an existing test-file that illustrates the bug or desired
260             feature.
261            
262             =head1 AUTHOR
263            
264             Balaji Ramasubramanian <balajiram@cpan.org>
265            
266             =head1 COPYRIGHT AND LICENSE
267            
268             This software is copyright (c) 2018-2019 by Balaji Ramasubramanian.
269            
270             This is free software; you can redistribute it and/or modify it under
271             the same terms as the Perl 5 programming language system itself.
272            
273             =cut
274