line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
30
|
|
|
30
|
|
1893039
|
use warnings; |
|
30
|
|
|
|
|
150
|
|
|
30
|
|
|
|
|
937
|
|
2
|
30
|
|
|
30
|
|
149
|
use strict; |
|
30
|
|
|
|
|
55
|
|
|
30
|
|
|
|
|
604
|
|
3
|
30
|
|
|
30
|
|
143
|
use feature ':5.14'; |
|
30
|
|
|
|
|
49
|
|
|
30
|
|
|
|
|
4660
|
|
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
package Text::Parser 0.926; |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
|
10
|
30
|
|
|
30
|
|
11107
|
use Moose; |
|
30
|
|
|
|
|
9901451
|
|
|
30
|
|
|
|
|
229
|
|
11
|
30
|
|
|
30
|
|
230820
|
use MooseX::CoverableModifiers; |
|
30
|
|
|
|
|
157775
|
|
|
30
|
|
|
|
|
191
|
|
12
|
30
|
|
|
30
|
|
16343
|
use MooseX::StrictConstructor; |
|
30
|
|
|
|
|
860686
|
|
|
30
|
|
|
|
|
122
|
|
13
|
30
|
|
|
30
|
|
285444
|
use namespace::autoclean; |
|
30
|
|
|
|
|
75
|
|
|
30
|
|
|
|
|
158
|
|
14
|
30
|
|
|
30
|
|
2755
|
use Moose::Util 'apply_all_roles', 'ensure_all_roles'; |
|
30
|
|
|
|
|
82
|
|
|
30
|
|
|
|
|
265
|
|
15
|
30
|
|
|
30
|
|
10246
|
use Moose::Util::TypeConstraints; |
|
30
|
|
|
|
|
62
|
|
|
30
|
|
|
|
|
245
|
|
16
|
30
|
|
|
30
|
|
82168
|
use String::Util qw(trim ltrim rtrim eqq); |
|
30
|
|
|
|
|
93116
|
|
|
30
|
|
|
|
|
2362
|
|
17
|
30
|
|
|
30
|
|
12608
|
use Text::Parser::Errors; |
|
30
|
|
|
|
|
124
|
|
|
30
|
|
|
|
|
4200
|
|
18
|
30
|
|
|
30
|
|
15739
|
use Text::Parser::Rule; |
|
30
|
|
|
|
|
142
|
|
|
30
|
|
|
|
|
2785
|
|
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
enum 'Text::Parser::Types::MultilineType' => [qw(join_next join_last)]; |
21
|
|
|
|
|
|
|
enum 'Text::Parser::Types::TrimType' => [qw(l r b n)]; |
22
|
|
|
|
|
|
|
|
23
|
30
|
|
|
30
|
|
297
|
no Moose::Util::TypeConstraints; |
|
30
|
|
|
|
|
65
|
|
|
30
|
|
|
|
|
318
|
|
24
|
30
|
|
|
30
|
|
26575
|
use FileHandle; |
|
30
|
|
|
|
|
259625
|
|
|
30
|
|
|
|
|
177
|
|
25
|
30
|
|
|
30
|
|
10182
|
use Try::Tiny; |
|
30
|
|
|
|
|
73
|
|
|
30
|
|
|
|
|
4906
|
|
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
sub BUILD { |
29
|
47
|
|
|
47
|
0
|
151813
|
my $self = shift; |
30
|
47
|
100
|
|
|
|
273
|
ensure_all_roles $self, 'Text::Parser::AutoSplit' if $self->auto_split; |
31
|
47
|
100
|
|
|
|
106927
|
return if not defined $self->multiline_type; |
32
|
11
|
|
|
|
|
71
|
ensure_all_roles $self, 'Text::Parser::Multiline'; |
33
|
|
|
|
|
|
|
} |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
has auto_chomp => ( |
37
|
|
|
|
|
|
|
is => 'rw', |
38
|
|
|
|
|
|
|
isa => 'Bool', |
39
|
|
|
|
|
|
|
lazy => 1, |
40
|
|
|
|
|
|
|
default => 0, |
41
|
|
|
|
|
|
|
); |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
has auto_split => ( |
45
|
|
|
|
|
|
|
is => 'rw', |
46
|
|
|
|
|
|
|
isa => 'Bool', |
47
|
|
|
|
|
|
|
lazy => 1, |
48
|
|
|
|
|
|
|
default => 0, |
49
|
|
|
|
|
|
|
); |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
around auto_split => sub { |
52
|
995
|
|
|
995
|
|
12768
|
my ( $orig, $self ) = ( shift, shift ); |
53
|
995
|
|
|
|
|
2473
|
__newval_auto_split( $orig, $self, @_ ); |
54
|
995
|
|
|
|
|
156642
|
return $orig->($self); |
55
|
|
|
|
|
|
|
}; |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
sub __newval_auto_split { |
58
|
995
|
|
|
995
|
|
1589
|
my ( $orig, $self, $newval ) = ( shift, shift, shift ); |
59
|
995
|
100
|
|
|
|
2171
|
return if not defined $newval; |
60
|
68
|
100
|
100
|
|
|
694
|
$self->_clear_all_fields if not $newval and $orig->($self); |
61
|
68
|
|
|
|
|
1548
|
$orig->( $self, $newval ); |
62
|
68
|
100
|
|
|
|
248
|
ensure_all_roles $self, 'Text::Parser::AutoSplit' if $newval; |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
has auto_trim => ( |
67
|
|
|
|
|
|
|
is => 'rw', |
68
|
|
|
|
|
|
|
isa => 'Text::Parser::Types::TrimType', |
69
|
|
|
|
|
|
|
lazy => 1, |
70
|
|
|
|
|
|
|
default => 'n', |
71
|
|
|
|
|
|
|
); |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
has FS => ( |
75
|
|
|
|
|
|
|
is => 'rw', |
76
|
|
|
|
|
|
|
isa => 'RegexpRef', |
77
|
|
|
|
|
|
|
lazy => 1, |
78
|
|
|
|
|
|
|
default => sub {qr/\s+/}, |
79
|
|
|
|
|
|
|
); |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
has multiline_type => ( |
83
|
|
|
|
|
|
|
is => 'rw', |
84
|
|
|
|
|
|
|
isa => 'Text::Parser::Types::MultilineType|Undef', |
85
|
|
|
|
|
|
|
lazy => 1, |
86
|
|
|
|
|
|
|
default => undef, |
87
|
|
|
|
|
|
|
); |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
around multiline_type => sub { |
90
|
1209
|
|
|
1209
|
|
61784
|
my ( $orig, $self ) = ( shift, shift ); |
91
|
1209
|
|
|
|
|
27348
|
my $oldval = $orig->($self); |
92
|
1209
|
100
|
100
|
|
|
6129
|
return $oldval if not @_ or eqq( $_[0], $oldval ); |
93
|
8
|
|
|
|
|
102
|
return __newval_multi_line( $orig, $self, @_ ); |
94
|
|
|
|
|
|
|
}; |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
sub __newval_multi_line { |
97
|
8
|
|
|
8
|
|
27
|
my ( $orig, $self, $newval ) = ( shift, shift, shift ); |
98
|
8
|
100
|
|
|
|
52
|
ensure_all_roles( $self, 'Text::Parser::Multiline' ) |
99
|
|
|
|
|
|
|
if defined $newval; |
100
|
8
|
|
|
|
|
29361
|
return $orig->( $self, $newval ); |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
has _obj_rules => ( |
105
|
|
|
|
|
|
|
is => 'rw', |
106
|
|
|
|
|
|
|
isa => 'ArrayRef[Text::Parser::Rule]', |
107
|
|
|
|
|
|
|
lazy => 1, |
108
|
|
|
|
|
|
|
default => sub { [] }, |
109
|
|
|
|
|
|
|
traits => ['Array'], |
110
|
|
|
|
|
|
|
handles => { |
111
|
|
|
|
|
|
|
_push_rule => 'push', |
112
|
|
|
|
|
|
|
_has_no_rules => 'is_empty', |
113
|
|
|
|
|
|
|
_get_rules => 'elements', |
114
|
|
|
|
|
|
|
}, |
115
|
|
|
|
|
|
|
); |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
sub add_rule { |
118
|
18
|
|
|
18
|
1
|
120
|
my $self = shift; |
119
|
18
|
100
|
|
|
|
65
|
$self->auto_split(1) if not $self->auto_split; |
120
|
18
|
|
|
|
|
518
|
my $rule = Text::Parser::Rule->new(@_); |
121
|
18
|
|
|
|
|
574
|
$self->_push_rule($rule); |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
sub clear_rules { |
126
|
2
|
|
|
2
|
1
|
6
|
my $self = shift; |
127
|
2
|
|
|
|
|
69
|
$self->_obj_rules( [] ); |
128
|
2
|
|
|
|
|
65
|
$self->_clear_begin_rule; |
129
|
2
|
|
|
|
|
63
|
$self->_clear_end_rule; |
130
|
|
|
|
|
|
|
} |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
has _begin_rule => ( |
134
|
|
|
|
|
|
|
is => 'rw', |
135
|
|
|
|
|
|
|
isa => 'Text::Parser::Rule', |
136
|
|
|
|
|
|
|
predicate => '_has_begin_rule', |
137
|
|
|
|
|
|
|
clearer => '_clear_begin_rule', |
138
|
|
|
|
|
|
|
); |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
sub BEGIN_rule { |
141
|
4
|
|
|
4
|
1
|
24
|
my $self = shift; |
142
|
4
|
100
|
|
|
|
11
|
$self->auto_split(1) if not $self->auto_split; |
143
|
4
|
|
|
|
|
14
|
my (%opt) = _defaults_for_begin_end(@_); |
144
|
4
|
|
|
|
|
17
|
$self->_modify_rule( '_begin_rule', %opt ); |
145
|
|
|
|
|
|
|
} |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
sub _defaults_for_begin_end { |
148
|
7
|
|
|
7
|
|
28
|
my (%opt) = @_; |
149
|
7
|
100
|
|
|
|
25
|
$opt{dont_record} = 1 if not exists $opt{dont_record}; |
150
|
7
|
100
|
|
|
|
18
|
delete $opt{if} if exists $opt{if}; |
151
|
7
|
100
|
|
|
|
18
|
delete $opt{continue_to_next} if exists $opt{continue_to_next}; |
152
|
7
|
|
|
|
|
32
|
return (%opt); |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
sub _modify_rule { |
156
|
7
|
|
|
7
|
|
23
|
my ( $self, $func, %opt ) = @_; |
157
|
7
|
|
|
|
|
25
|
my $pred = '_has' . $func; |
158
|
7
|
100
|
|
|
|
219
|
$self->_append_rule_lines( $func, \%opt ) if $self->$pred(); |
159
|
7
|
|
|
|
|
188
|
my $rule = Text::Parser::Rule->new(%opt); |
160
|
7
|
|
|
|
|
170
|
$self->$func($rule); |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
sub _append_rule_lines { |
164
|
3
|
|
|
3
|
|
7
|
my ( $self, $func, $opt ) = ( shift, shift, shift ); |
165
|
3
|
|
|
|
|
73
|
my $old = $self->$func(); |
166
|
3
|
|
|
|
|
61
|
$opt->{do} = $old->action . $opt->{do}; |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
has _end_rule => ( |
171
|
|
|
|
|
|
|
is => 'rw', |
172
|
|
|
|
|
|
|
isa => 'Text::Parser::Rule', |
173
|
|
|
|
|
|
|
predicate => '_has_end_rule', |
174
|
|
|
|
|
|
|
clearer => '_clear_end_rule', |
175
|
|
|
|
|
|
|
); |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
sub END_rule { |
178
|
3
|
|
|
3
|
1
|
23
|
my $self = shift; |
179
|
3
|
100
|
|
|
|
21
|
$self->auto_split(1) if not $self->auto_split; |
180
|
3
|
|
|
|
|
12
|
my (%opt) = _defaults_for_begin_end(@_); |
181
|
3
|
|
|
|
|
15
|
$self->_modify_rule( '_end_rule', %opt ); |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
sub read { |
186
|
74
|
|
|
74
|
1
|
68700
|
my $self = shift; |
187
|
74
|
100
|
|
|
|
317
|
return if not defined $self->_handle_read_inp(@_); |
188
|
68
|
|
|
|
|
365
|
$self->_run_begin_end_block('_begin_rule'); |
189
|
68
|
|
|
|
|
410
|
$self->__read_and_close_filehandle; |
190
|
59
|
|
|
|
|
291
|
$self->_run_begin_end_block('_end_rule'); |
191
|
59
|
|
|
|
|
1459
|
$self->_ExAWK_symbol_table( {} ); |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
sub _handle_read_inp { |
195
|
74
|
|
|
74
|
|
149
|
my $self = shift; |
196
|
74
|
100
|
|
|
|
242
|
return $self->filehandle if not @_; |
197
|
72
|
100
|
100
|
|
|
440
|
return if not ref( $_[0] ) and not $_[0]; |
198
|
71
|
100
|
|
|
|
2117
|
return $self->filename(@_) if not ref( $_[0] ); |
199
|
6
|
|
|
|
|
20
|
return $self->filehandle(@_); |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
has _ExAWK_symbol_table => ( |
203
|
|
|
|
|
|
|
is => 'rw', |
204
|
|
|
|
|
|
|
isa => 'HashRef[Any]', |
205
|
|
|
|
|
|
|
default => sub { {} }, |
206
|
|
|
|
|
|
|
lazy => 1, |
207
|
|
|
|
|
|
|
); |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
sub _run_begin_end_block { |
210
|
127
|
|
|
127
|
|
311
|
my ( $self, $func ) = ( shift, shift ); |
211
|
127
|
|
|
|
|
310
|
my $pred = '_has' . $func; |
212
|
127
|
100
|
|
|
|
3975
|
return if not $self->$pred(); |
213
|
4
|
|
|
|
|
92
|
my $rule = $self->$func(); |
214
|
4
|
|
|
|
|
20
|
$rule->run( $self, 0 ); |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
sub __read_and_close_filehandle { |
218
|
68
|
|
|
68
|
|
139
|
my $self = shift; |
219
|
68
|
|
|
|
|
375
|
$self->_prep_to_read_file; |
220
|
68
|
|
|
|
|
431
|
$self->__read_file_handle; |
221
|
59
|
100
|
|
|
|
1952
|
$self->_close_filehandles if $self->_has_filename; |
222
|
59
|
|
|
|
|
972
|
$self->_clear_this_line; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
sub _prep_to_read_file { |
226
|
68
|
|
|
68
|
|
143
|
my $self = shift; |
227
|
68
|
|
|
|
|
2735
|
$self->_reset_line_count; |
228
|
68
|
|
|
|
|
2324
|
$self->_empty_records; |
229
|
68
|
|
|
|
|
2180
|
$self->_clear_abort; |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
sub __read_file_handle { |
233
|
68
|
|
|
68
|
|
747
|
my $self = shift; |
234
|
68
|
|
|
|
|
488
|
my $fh = $self->filehandle(); |
235
|
68
|
|
|
|
|
1364
|
while (<$fh>) { |
236
|
517
|
100
|
|
|
|
1303
|
last if not $self->__parse_line($_); |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
sub __parse_line { |
241
|
517
|
|
|
517
|
|
1033
|
my ( $self, $line ) = ( shift, shift ); |
242
|
517
|
|
|
|
|
15896
|
$self->_next_line_parsed(); |
243
|
517
|
|
|
|
|
1127
|
$line = $self->_def_line_manip($line); |
244
|
517
|
|
|
|
|
2584
|
$self->__try_to_parse($line); |
245
|
511
|
|
|
|
|
19267
|
return not $self->has_aborted; |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
sub _def_line_manip { |
249
|
517
|
|
|
517
|
|
899
|
my ( $self, $line ) = ( shift, shift ); |
250
|
517
|
100
|
|
|
|
9815
|
chomp $line if $self->auto_chomp; |
251
|
517
|
|
|
|
|
1233
|
return $self->_trim_line($line); |
252
|
|
|
|
|
|
|
} |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
sub _trim_line { |
255
|
517
|
|
|
517
|
|
960
|
my ( $self, $line ) = ( shift, shift ); |
256
|
517
|
100
|
|
|
|
9695
|
return $line if $self->auto_trim eq 'n'; |
257
|
69
|
100
|
|
|
|
493
|
return trim($line) if $self->auto_trim eq 'b'; |
258
|
8
|
100
|
|
|
|
147
|
return ltrim($line) if $self->auto_trim eq 'l'; |
259
|
4
|
|
|
|
|
11
|
return rtrim($line); |
260
|
|
|
|
|
|
|
} |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
sub __try_to_parse { |
263
|
517
|
|
|
517
|
|
874
|
my ( $self, $line ) = @_; |
264
|
517
|
|
|
|
|
5398
|
$self->_set_this_line($line); |
265
|
517
|
|
|
517
|
|
33972
|
try { $self->save_record($line); } |
266
|
517
|
|
|
6
|
|
3522
|
catch { die $_; }; |
|
6
|
|
|
|
|
4720
|
|
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
has filename => ( |
271
|
|
|
|
|
|
|
is => 'rw', |
272
|
|
|
|
|
|
|
isa => 'Str|Undef', |
273
|
|
|
|
|
|
|
lazy => 1, |
274
|
|
|
|
|
|
|
init_arg => undef, |
275
|
|
|
|
|
|
|
default => undef, |
276
|
|
|
|
|
|
|
predicate => '_has_filename', |
277
|
|
|
|
|
|
|
clearer => '_clear_filename', |
278
|
|
|
|
|
|
|
trigger => \&_set_filehandle, |
279
|
|
|
|
|
|
|
); |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
sub _set_filehandle { |
282
|
71
|
|
|
71
|
|
13187
|
my $self = shift; |
283
|
71
|
100
|
|
|
|
1855
|
return $self->_clear_filename if not defined $self->filename; |
284
|
70
|
|
|
|
|
347
|
$self->_save_filehandle( $self->__get_valid_fh ); |
285
|
|
|
|
|
|
|
} |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
sub __get_valid_fh { |
288
|
70
|
|
|
70
|
|
176
|
my $self = shift; |
289
|
70
|
|
|
|
|
273
|
my $fname = $self->_get_valid_text_filename; |
290
|
70
|
100
|
|
|
|
797
|
return FileHandle->new( $fname, 'r' ) if defined $fname; |
291
|
4
|
|
|
|
|
98
|
$fname = $self->filename; |
292
|
4
|
|
|
|
|
140
|
$self->_clear_filename; |
293
|
4
|
|
|
|
|
15
|
$self->_throw_invalid_file_exception($fname); |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
sub _get_valid_text_filename { |
298
|
70
|
|
|
70
|
|
224
|
my $self = shift; |
299
|
70
|
|
|
|
|
1554
|
my $fname = $self->filename; |
300
|
70
|
100
|
66
|
|
|
6731
|
return $fname if -f $fname and -r $fname and -T $fname; |
|
|
|
100
|
|
|
|
|
301
|
4
|
|
|
|
|
19
|
return; |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
sub _throw_invalid_file_exception { |
306
|
4
|
|
|
4
|
|
14
|
my ( $self, $fname ) = ( shift, shift ); |
307
|
4
|
100
|
|
|
|
52
|
die invalid_filename( name => $fname ) if not -f $fname; |
308
|
1
|
50
|
|
|
|
15
|
die file_not_readable( name => $fname ) if not -r $fname; |
309
|
1
|
|
|
|
|
9
|
die file_not_plain_text( name => $fname ); |
310
|
|
|
|
|
|
|
} |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
has filehandle => ( |
314
|
|
|
|
|
|
|
is => 'rw', |
315
|
|
|
|
|
|
|
isa => 'FileHandle|Undef', |
316
|
|
|
|
|
|
|
lazy => 1, |
317
|
|
|
|
|
|
|
init_arg => undef, |
318
|
|
|
|
|
|
|
default => undef, |
319
|
|
|
|
|
|
|
predicate => '_has_filehandle', |
320
|
|
|
|
|
|
|
writer => '_save_filehandle', |
321
|
|
|
|
|
|
|
reader => '_get_filehandle', |
322
|
|
|
|
|
|
|
clearer => '_close_filehandles', |
323
|
|
|
|
|
|
|
); |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
sub filehandle { |
326
|
84
|
|
|
84
|
1
|
1090
|
my $self = shift; |
327
|
84
|
100
|
100
|
|
|
2563
|
return if not @_ and not $self->_has_filehandle; |
328
|
81
|
100
|
|
|
|
563
|
$self->_save_filehandle(@_) if @_; |
329
|
77
|
100
|
|
|
|
382
|
$self->_clear_filename if @_; |
330
|
77
|
|
|
|
|
2719
|
return $self->_get_filehandle; |
331
|
|
|
|
|
|
|
} |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
has lines_parsed => ( |
335
|
|
|
|
|
|
|
is => 'ro', |
336
|
|
|
|
|
|
|
isa => 'Int', |
337
|
|
|
|
|
|
|
lazy => 1, |
338
|
|
|
|
|
|
|
init_arg => undef, |
339
|
|
|
|
|
|
|
default => 0, |
340
|
|
|
|
|
|
|
traits => ['Counter'], |
341
|
|
|
|
|
|
|
handles => { |
342
|
|
|
|
|
|
|
_next_line_parsed => 'inc', |
343
|
|
|
|
|
|
|
_reset_line_count => 'reset', |
344
|
|
|
|
|
|
|
} |
345
|
|
|
|
|
|
|
); |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
sub save_record { |
349
|
313
|
|
|
313
|
1
|
6294
|
my ( $self, $record ) = ( shift, shift ); |
350
|
313
|
100
|
|
|
|
10360
|
$self->_has_no_rules |
351
|
|
|
|
|
|
|
? $self->push_records($record) |
352
|
|
|
|
|
|
|
: $self->_run_through_rules; |
353
|
|
|
|
|
|
|
} |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
sub _run_through_rules { |
356
|
144
|
|
|
144
|
|
225
|
my $self = shift; |
357
|
144
|
|
|
|
|
4409
|
foreach my $rule ( $self->_get_rules ) { |
358
|
277
|
100
|
|
|
|
791
|
next if not $rule->test($self); |
359
|
97
|
|
|
|
|
330
|
$rule->run($self); |
360
|
97
|
100
|
|
|
|
2594
|
last if not $rule->continue_to_next; |
361
|
|
|
|
|
|
|
} |
362
|
|
|
|
|
|
|
} |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
has _current_line => ( |
366
|
|
|
|
|
|
|
is => 'ro', |
367
|
|
|
|
|
|
|
isa => 'Str|Undef', |
368
|
|
|
|
|
|
|
init_arg => undef, |
369
|
|
|
|
|
|
|
writer => '_set_this_line', |
370
|
|
|
|
|
|
|
reader => 'this_line', |
371
|
|
|
|
|
|
|
clearer => '_clear_this_line', |
372
|
|
|
|
|
|
|
default => undef, |
373
|
|
|
|
|
|
|
); |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
has abort => ( |
378
|
|
|
|
|
|
|
is => 'rw', |
379
|
|
|
|
|
|
|
isa => 'Bool', |
380
|
|
|
|
|
|
|
lazy => 1, |
381
|
|
|
|
|
|
|
default => 0, |
382
|
|
|
|
|
|
|
traits => ['Bool'], |
383
|
|
|
|
|
|
|
reader => 'has_aborted', |
384
|
|
|
|
|
|
|
handles => { |
385
|
|
|
|
|
|
|
abort_reading => 'set', |
386
|
|
|
|
|
|
|
_clear_abort => 'unset' |
387
|
|
|
|
|
|
|
}, |
388
|
|
|
|
|
|
|
); |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
has records => ( |
392
|
|
|
|
|
|
|
isa => 'ArrayRef[Any]', |
393
|
|
|
|
|
|
|
is => 'ro', |
394
|
|
|
|
|
|
|
lazy => 1, |
395
|
|
|
|
|
|
|
default => sub { return []; }, |
396
|
|
|
|
|
|
|
auto_deref => 1, |
397
|
|
|
|
|
|
|
init_arg => undef, |
398
|
|
|
|
|
|
|
traits => ['Array'], |
399
|
|
|
|
|
|
|
handles => { |
400
|
|
|
|
|
|
|
get_records => 'elements', |
401
|
|
|
|
|
|
|
push_records => 'push', |
402
|
|
|
|
|
|
|
pop_record => 'pop', |
403
|
|
|
|
|
|
|
_empty_records => 'clear', |
404
|
|
|
|
|
|
|
_num_records => 'count', |
405
|
|
|
|
|
|
|
_access_record => 'accessor', |
406
|
|
|
|
|
|
|
}, |
407
|
|
|
|
|
|
|
); |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
sub last_record { |
411
|
16
|
|
|
16
|
1
|
616
|
my $self = shift; |
412
|
16
|
|
|
|
|
591
|
my $count = $self->_num_records(); |
413
|
16
|
100
|
|
|
|
48
|
return if not $count; |
414
|
15
|
|
|
|
|
469
|
return $self->_access_record( $count - 1 ); |
415
|
|
|
|
|
|
|
} |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
sub is_line_continued { |
419
|
71
|
|
|
71
|
1
|
127
|
my $self = shift; |
420
|
71
|
100
|
|
|
|
141
|
return 0 if not defined $self->multiline_type; |
421
|
65
|
100
|
100
|
|
|
128
|
return 0 |
422
|
|
|
|
|
|
|
if $self->multiline_type eq 'join_last' |
423
|
|
|
|
|
|
|
and $self->lines_parsed() == 1; |
424
|
60
|
|
|
|
|
181
|
return 1; |
425
|
|
|
|
|
|
|
} |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
sub join_last_line { |
429
|
57
|
|
|
57
|
1
|
95
|
my $self = shift; |
430
|
57
|
|
|
|
|
88
|
my ( $last, $line ) = ( shift, shift ); |
431
|
57
|
|
|
|
|
188
|
return $last . $line; |
432
|
|
|
|
|
|
|
} |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
436
|
|
|
|
|
|
|
|
437
|
30
|
|
|
30
|
|
79444
|
no Moose; |
|
30
|
|
|
|
|
105
|
|
|
30
|
|
|
|
|
277
|
|
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
1; |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
__END__ |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
=pod |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
=encoding UTF-8 |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
=head1 NAME |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
Text::Parser - Simplifies text parsing. Easily extensible to parse any text format. |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
=head1 VERSION |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
version 0.926 |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
=head1 SYNOPSIS |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
use Text::Parser; |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
my $parser = Text::Parser->new(); |
460
|
|
|
|
|
|
|
$parser->read(shift); |
461
|
|
|
|
|
|
|
print $parser->get_records, "\n"; |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
The above code prints the content of the file (named in the first argument) to C<STDOUT>. |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
my $parser = Text::Parser->new(); |
466
|
|
|
|
|
|
|
$parser->add_rule(do => 'print'); |
467
|
|
|
|
|
|
|
$parser->read(shift); |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
This example also dones the same as the earlier one. For more complex examples see the L<manual|Text::Parser::Manual>. |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
=head1 OVERVIEW |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
The L<need|Text::Parser::Manual/MOTIVATION> for this class stems from the fact that text parsing is the most common thing that programmers do, and yet there is no lean, simple way to do it efficiently. Most programmers still write boilerplate code with a C<while> loop. |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
Instead C<Text::Parser> allows programmers to parse text with terse, self-explanatory L<rules|Text::Parser::Manual::ExtendedAWKSyntax>, whose structure is very similar to L<AWK|https://books.google.com/books/about/The_AWK_Programming_Language.html?id=53ueQgAACAAJ>, but extends beyond the capability of AWK. Incidentally, AWK is L<one of the ancestors of Perl|http://history.perl.org/PerlTimeline.html>! One would have expected Perl to extend the capabilities of AWK, although that's not really the case. Command-line C<perl -lane> or even C<perl -lan script.pl> are L<very limited|Text::Parser::Manual::ComparingWithNativePerl> in what they can do. Programmers cannot use them for serious projects. And parsing text files in regular Perl involves writing the same C<while> loop again. L<This website|https://perl-begin.org/uses/text-parsing/> summarizes the options available in Perl so far. |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
With C<Text::Parser>, a developer can focus on specifying a grammar and then simply C<read> the file. The C<L<read|/read>> method automatically runs each rule collecting records from the text input into an array internally. And finally C<L<get_records|/get_records>> can retrieve the records. Thus the programmer now has the power of Perl to create complex data structures, along with the elegance of AWK to parse text files. The L<manuals|Text::Parser::Manual> illustrate this with L<examples|Text::Parser::Manual::ComparingWithNativePerl>. |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
=head1 CONSTRUCTOR |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
=head2 new |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
Takes optional attributes as in example below. See section L<ATTRIBUTES|/ATTRIBUTES> for a list of the attributes and their description. |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
my $parser = Text::Parser->new( |
486
|
|
|
|
|
|
|
auto_chomp => 0, |
487
|
|
|
|
|
|
|
multiline_type => 'join_last', |
488
|
|
|
|
|
|
|
auto_trim => 'b', |
489
|
|
|
|
|
|
|
auto_split => 1, |
490
|
|
|
|
|
|
|
FS => qr/\s+/, |
491
|
|
|
|
|
|
|
); |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
=head1 ATTRIBUTES |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
The attributes below can be used as options to the C<new> constructor. Each attribute has an accessor with the same name. |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
=head2 auto_chomp |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
Read-write attribute. Takes a boolean value as parameter. Defaults to C<0>. |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
print "Parser will chomp lines automatically\n" if $parser->auto_chomp; |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
=head2 auto_split |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
Read-write boolean attribute. Defaults to C<0> (false). Indicates if the parser will automatically split every line into fields. |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
If it is set to a true value, each line will be split into fields, and a set of methods (a quick list L<here|/"Other methods available on auto_split">) become accessible within the C<L<save_record|/save_record>> method. These methods are documented in L<Text::Parser::AutoSplit>. |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
=head2 auto_trim |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
Read-write attribute. The values this can take are shown under the C<L<new|/new>> constructor also. Defaults to C<'n'> (neither side spaces will be trimmed). |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
$parser->auto_trim('l'); # 'l' (left), 'r' (right), 'b' (both), 'n' (neither) (Default) |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
=head2 FS |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
Read-write attribute that can be used to specify the field separator to be used by the C<auto_split> feature. It must be a regular expression reference enclosed in the C<qr> function, like C<qr/\s+|[,]/> which will split across either spaces or commas. The default value for this argument is C<qr/\s+/>. |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
The name for this attribute comes from the built-in C<FS> variable in the popular L<GNU Awk program|https://www.gnu.org/software/gawk/gawk.html>. |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
$parser->FS( qr/\s+\(*|\s*\)/ ); |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
C<FS> I<can> be changed in your implementation of C<save_record>. But the changes would take effect only on the next line. |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
=head2 multiline_type |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
If the target text format allows line-wrapping with a continuation character, the C<multiline_type> option tells the parser to join them into a single line. When setting this attribute, one must re-define L<two more methods|/"PARSING LINE-WRAPPED FILES">. |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
By default, the read-write C<multiline_type> attribute has a value of C<undef>, i.e., the target text format will not have wrapped lines. It can be set to either C<'join_next'> or C<'join_last'>. |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
$parser->multiline_type(undef); |
532
|
|
|
|
|
|
|
$parser->multiline_type('join_next'); |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
my $mult = $parser->multiline_type; |
535
|
|
|
|
|
|
|
print "Parser is a multi-line parser of type: $mult" if defined $mult; |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
=over 4 |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
=item * |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
If the target format allows line-wrapping I<to the B<next>> line, set C<multiline_type> to C<join_next>. |
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
=item * |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
If the target format allows line-wrapping I<from the B<last>> line, set C<multiline_type> to C<join_last>. |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
=item * |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
To "slurp" a file into a single string, set C<multiline_type> to C<join_last>. In this special case, you don't need to re-define the C<L<is_line_continued|/is_line_continued>> and C<L<join_last_line|/join_last_line>> methods. |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
=back |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
=head1 METHODS |
554
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
These are meant to be called from the C<::main> program or within subclasses. In general, don't override them - just use them. |
556
|
|
|
|
|
|
|
|
557
|
|
|
|
|
|
|
=head2 add_rule |
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
Takes a hash as input. The keys of this hash must be the attributes of the L<Text::Parser::Rule> class constructor and the values should also meet the requirements of that constructor. |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
$parser->add_rule(do => '', dont_record => 1); # Empty rule: does nothing |
562
|
|
|
|
|
|
|
$parser->add_rule(if => 'm/li/, do => 'print', dont_record); # Prints lines with 'li' |
563
|
|
|
|
|
|
|
$parser->add_rule( do => 'uc($3)' ); # Saves records of upper-cased third elements |
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
Calling this method without any arguments will throw an exception. The method internally sets the C<auto_split> attribute. |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
=head2 clear_rules |
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
Takes no arguments, returns nothing. Clears the rules that were added to the object. |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
$parser->clear_rules; |
572
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
This is useful to be able to re-use the parser after a C<read> call, to parse another text with another set of rules. The C<clear_rules> method does clear even the rules set up by C<L<BEGIN_rule|/BEGIN_rule>> and C<L<END_rule|/END_rule>>. |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
=head2 BEGIN_rule |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
Takes a hash input like C<add_rule>, but C<if> and C<continue_to_next> keys will be ignored. |
578
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
$parser->BEGIN_rule(do => '~count = 0;'); |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
=over 4 |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
=item * |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
Since any C<if> key is ignored, the C<do> key is always C<eval>uated. Multiple calls to C<BEGIN_rule> will append to the previous calls; meaning, the actions of previous calls will be included. |
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
=item * |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
The C<BEGIN> block is mainly used to initialize some variables. So by default C<dont_record> is set true. User I<can> change this and set C<dont_record> as false, thus forcing a record to be saved. |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
=back |
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
=head2 END_rule |
594
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
Takes a hash input like C<add_rule>, but C<if> and C<continue_to_next> keys will be ignored. Similar to C<BEGIN_rule>, but the actions in the C<END_rule> will be executed at the end of the C<read> method. |
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
$parser->END_rule(do => 'print ~count, "\n";'); |
598
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
=over 4 |
600
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
=item * |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
Since any C<if> key is ignored, the C<do> key is always C<eval>uated. Multiple calls to C<END_rule> will append to the previous calls; meaning, the actions of previous calls will be included. |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
=item * |
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
The C<END> block is mainly used to do final processing of collected records. So by default C<dont_record> is set true. User I<can> change this and set C<dont_record> as false, thus forcing a record to be saved. |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
=back |
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
=head2 read |
612
|
|
|
|
|
|
|
|
613
|
|
|
|
|
|
|
Takes a single optional argument that can be either a string containing the name of the file, or a filehandle reference (a C<GLOB>) like C<\*STDIN> or an object of the C<L<FileHandle>> class. |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
$parser->read($filename); # Read the file |
616
|
|
|
|
|
|
|
$parser->read(\*STDIN); # Read the filehandle |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
The above could also be done in two steps if the developer so chooses. |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
$parser->filename($filename); |
621
|
|
|
|
|
|
|
$parser->read(); # equiv: $parser->read($filename) |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
$parser->filehandle(\*STDIN); |
624
|
|
|
|
|
|
|
$parser->read(); # equiv: $parser->read(\*STDIN) |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
The method returns once all records have been read, or if an exception is thrown, or if reading has been aborted with the C<L<abort_reading|/abort_reading>> method. |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
Any C<close> operation will be handled (even if any exception is thrown), as long as C<read> is called with a file name parameter - not if you call with a file handle or C<GLOB> parameter. |
629
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
$parser->read('myfile.txt'); # Will close file automatically |
631
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
open MYFH, "<myfile.txt" or die "Can't open file myfile.txt at "; |
633
|
|
|
|
|
|
|
$parser->read(\*MYFH); # Will not close MYFH |
634
|
|
|
|
|
|
|
close MYFH; |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
B<Note:> To extend the class to other text formats, override C<L<save_record|/save_record>>. |
637
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
=head2 filename |
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
Takes an optional string argument containing the name of a file. Returns the name of the file that was last opened if any. Returns C<undef> if no file has been opened. |
641
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
print "Last read ", $parser->filename, "\n"; |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
The value stored is "persistent" - meaning that the method remembers the last file that was C<L<read|/read>>. |
645
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
$parser->read(shift @ARGV); |
647
|
|
|
|
|
|
|
print $parser->filename(), ":\n", |
648
|
|
|
|
|
|
|
"=" x (length($parser->filename())+1), |
649
|
|
|
|
|
|
|
"\n", |
650
|
|
|
|
|
|
|
$parser->get_records(), |
651
|
|
|
|
|
|
|
"\n"; |
652
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
A C<read> call with a filehandle, will clear the last file name. |
654
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
$parser->read(\*MYFH); |
656
|
|
|
|
|
|
|
print "Last file name is lost\n" if not defined $parser->filename(); |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
=head2 filehandle |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
Takes an optional argument, that is a filehandle C<GLOB> (such as C<\*STDIN>) or an object of the C<FileHandle> class. Returns the filehandle last saved, or C<undef> if none was saved. |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
my $fh = $parser->filehandle(); |
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
Like C<L<filename|/filename>>, C<filehandle> is also "persistent". Its old value is lost when either C<filename> is set, or C<read> is called with a filename. |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
$parser->read(\*STDOUT); |
667
|
|
|
|
|
|
|
my $lastfh = $parser->filehandle(); # Will return glob of STDOUT |
668
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
=head2 lines_parsed |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
Takes no arguments. Returns the number of lines last parsed. Every call to C<read>, causes the value to be auto-reset. |
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
print $parser->lines_parsed, " lines were parsed\n"; |
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
=head2 has_aborted |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
Takes no arguments, returns a boolean to indicate if text reading was aborted in the middle. |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
print "Aborted\n" if $parser->has_aborted(); |
680
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
=head2 get_records |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
Takes no arguments. Returns an array containing all the records saved by the parser. |
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
foreach my $record ( $parser->get_records ) { |
686
|
|
|
|
|
|
|
$i++; |
687
|
|
|
|
|
|
|
print "Record: $i: ", $record, "\n"; |
688
|
|
|
|
|
|
|
} |
689
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
=head2 pop_record |
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
Takes no arguments and pops the last saved record. |
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
my $last_rec = $parser->pop_record; |
695
|
|
|
|
|
|
|
$uc_last = uc $last_rec; |
696
|
|
|
|
|
|
|
$parser->save_record($uc_last); |
697
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
=head2 last_record |
699
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
Takes no arguments and returns the last saved record. Leaves the saved records untouched. |
701
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
my $last_rec = $parser->last_record; |
703
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
=head1 USE ONLY IN RULES AND SUBCLASS |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
Do NOT override these methods. They are valid only within a subclass, inside the user-implementation of methods described under L<OVERRIDE IN SUBCLASS|/"OVERRIDE IN SUBCLASS">. |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
=head2 this_line |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
Takes no arguments, and returns the current line being parsed. For example: |
711
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
sub save_record { |
713
|
|
|
|
|
|
|
# ... |
714
|
|
|
|
|
|
|
do_something($self->this_line); |
715
|
|
|
|
|
|
|
# ... |
716
|
|
|
|
|
|
|
} |
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
=head2 abort_reading |
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
Takes no arguments. Returns C<1>. To be used only in the derived class to abort C<read> in the middle. |
721
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
sub save_record { |
723
|
|
|
|
|
|
|
# ... |
724
|
|
|
|
|
|
|
$self->abort_reading if some_condition($self->this_line); |
725
|
|
|
|
|
|
|
# ... |
726
|
|
|
|
|
|
|
} |
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
=head2 push_records |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
This is useful if one needs to implement an C<include>-like command in some text format. The example below illustrates this. |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
package OneParser; |
733
|
|
|
|
|
|
|
use Moose; |
734
|
|
|
|
|
|
|
extends 'Text::Parser'; |
735
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
my save_record { |
737
|
|
|
|
|
|
|
# ... |
738
|
|
|
|
|
|
|
# Under some condition: |
739
|
|
|
|
|
|
|
my $parser = AnotherParser->new(); |
740
|
|
|
|
|
|
|
$parser->read($some_file) |
741
|
|
|
|
|
|
|
$parser->push_records($parser->get_records); |
742
|
|
|
|
|
|
|
# ... |
743
|
|
|
|
|
|
|
} |
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
=head2 Other methods available on C<auto_split> |
746
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
When the C<L<auto_split|/auto_split>> attribute is on, (or if it is turned on later), the following additional methods become available: |
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
=over 4 |
750
|
|
|
|
|
|
|
|
751
|
|
|
|
|
|
|
=item * |
752
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
L<NF|Text::Parser::AutoSplit/NF> |
754
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
=item * |
756
|
|
|
|
|
|
|
|
757
|
|
|
|
|
|
|
L<fields|Text::Parser::AutoSplit/fields> |
758
|
|
|
|
|
|
|
|
759
|
|
|
|
|
|
|
=item * |
760
|
|
|
|
|
|
|
|
761
|
|
|
|
|
|
|
L<field|Text::Parser::AutoSplit/field> |
762
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
=item * |
764
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
L<field_range|Text::Parser::AutoSplit/field_range> |
766
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
=item * |
768
|
|
|
|
|
|
|
|
769
|
|
|
|
|
|
|
L<join_range|Text::Parser::AutoSplit/join_range> |
770
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
=item * |
772
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
L<find_field|Text::Parser::AutoSplit/find_field> |
774
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
=item * |
776
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
L<find_field_index|Text::Parser::AutoSplit/find_field_index> |
778
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
=item * |
780
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
L<splice_fields|Text::Parser::AutoSplit/splice_fields> |
782
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
=back |
784
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
=head1 OVERRIDE IN SUBCLASS |
786
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
The following methods should never be called in the C<::main> program. They may be overridden (or re-defined) in a subclass. |
788
|
|
|
|
|
|
|
|
789
|
|
|
|
|
|
|
=head2 save_record |
790
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
This method may be re-defined in a subclass to parse the target text format. The default implementation takes a single argument and stores it as a record. If no arguments are passed, C<undef> is stored as a record. Note that unlike earlier versions of C<Text::Parser> it is not required to override this method in your derived class. You can simply use the rules instead. |
792
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
For a developer re-defining C<save_record>, in addition to C<L<this_line|/"this_line">>, six additional methods become available if the C<auto_split> attribute is set. These methods are described in greater detail in L<Text::Parser::AutoSplit>, and they are accessible only within C<save_record>. |
794
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
B<Note:> Developers may store records in any form - string, array reference, hash reference, complex data structure, or an object of some class. The program that reads these records using C<L<get_records|/get_records>> has to interpret them. So developers should document the records created by their own implementation of C<save_record>. |
796
|
|
|
|
|
|
|
|
797
|
|
|
|
|
|
|
=head2 PARSING LINE-WRAPPED FILES |
798
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
These methods are useful when parsing line-wrapped files, i.e., if the target text format allows wrapping the content of one line into multiple lines. In such cases, you should C<extend> the C<Text::Parser> class and override the following methods. |
800
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
=head3 is_line_continued |
802
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
If the target text format supports line-wrapping, the developer must override and implement this method. Your method should take a string argument and return a boolean indicating if the line is continued or not. |
804
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
There is a default implementation shipped with this class with return values as follows: |
806
|
|
|
|
|
|
|
|
807
|
|
|
|
|
|
|
multiline_type | Return value |
808
|
|
|
|
|
|
|
------------------+--------------------------------- |
809
|
|
|
|
|
|
|
undef | 0 |
810
|
|
|
|
|
|
|
join_last | 0 for first line, 1 otherwise |
811
|
|
|
|
|
|
|
join_next | 1 |
812
|
|
|
|
|
|
|
|
813
|
|
|
|
|
|
|
=head3 join_last_line |
814
|
|
|
|
|
|
|
|
815
|
|
|
|
|
|
|
Again, the developer should implement this method. This method should take two strings, join them while removing any continuation characters, and return the result. The default implementation just concatenates two strings and returns the result without removing anything (not even C<chomp>). See L<Text::Parser::Multiline> for more on this. |
816
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
=head1 EXAMPLES |
818
|
|
|
|
|
|
|
|
819
|
|
|
|
|
|
|
You can find example code in L<Text::Parser::Manual::ComparingWithNativePerl>. |
820
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
=head1 THINGS TO BE DONE |
822
|
|
|
|
|
|
|
|
823
|
|
|
|
|
|
|
This package is still a work in progress. Future versions are expected to include features to: |
824
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
=over 4 |
826
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
=item * |
828
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
read and parse from a buffer |
830
|
|
|
|
|
|
|
|
831
|
|
|
|
|
|
|
=item * |
832
|
|
|
|
|
|
|
|
833
|
|
|
|
|
|
|
automatically uncompress input |
834
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
=item * |
836
|
|
|
|
|
|
|
|
837
|
|
|
|
|
|
|
I<suggestions welcome ...> |
838
|
|
|
|
|
|
|
|
839
|
|
|
|
|
|
|
=back |
840
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
Contributions and suggestions are welcome and properly acknowledged. |
842
|
|
|
|
|
|
|
|
843
|
|
|
|
|
|
|
=head1 SEE ALSO |
844
|
|
|
|
|
|
|
|
845
|
|
|
|
|
|
|
=over 4 |
846
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
=item * |
848
|
|
|
|
|
|
|
|
849
|
|
|
|
|
|
|
L<Text::Parser::Manual> - Read this manual |
850
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
=item * |
852
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
L<The AWK Programming Language|https://books.google.com/books/about/The_AWK_Programming_Language.html?id=53ueQgAACAAJ> - by B<A>ho, B<W>einberg, and B<K>ernighan. |
854
|
|
|
|
|
|
|
|
855
|
|
|
|
|
|
|
=item * |
856
|
|
|
|
|
|
|
|
857
|
|
|
|
|
|
|
L<Text::Parser::Errors> - documentation of the exceptions this class throws |
858
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
=item * |
860
|
|
|
|
|
|
|
|
861
|
|
|
|
|
|
|
L<Text::Parser::Multiline> - how to read line-wrapped text input |
862
|
|
|
|
|
|
|
|
863
|
|
|
|
|
|
|
=back |
864
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
=head1 BUGS |
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
Please report any bugs or feature requests on the bugtracker website |
868
|
|
|
|
|
|
|
L<http://github.com/balajirama/Text-Parser/issues> |
869
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
When submitting a bug or request, please include a test-file or a |
871
|
|
|
|
|
|
|
patch to an existing test-file that illustrates the bug or desired |
872
|
|
|
|
|
|
|
feature. |
873
|
|
|
|
|
|
|
|
874
|
|
|
|
|
|
|
=head1 AUTHOR |
875
|
|
|
|
|
|
|
|
876
|
|
|
|
|
|
|
Balaji Ramasubramanian <balajiram@cpan.org> |
877
|
|
|
|
|
|
|
|
878
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
879
|
|
|
|
|
|
|
|
880
|
|
|
|
|
|
|
This software is copyright (c) 2018-2019 by Balaji Ramasubramanian. |
881
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
883
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
884
|
|
|
|
|
|
|
|
885
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
886
|
|
|
|
|
|
|
|
887
|
|
|
|
|
|
|
=for stopwords H.Merijn Brand - Tux Mohammad S Anwar |
888
|
|
|
|
|
|
|
|
889
|
|
|
|
|
|
|
=over 4 |
890
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
=item * |
892
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
H.Merijn Brand - Tux <h.m.brand@xs4all.nl> |
894
|
|
|
|
|
|
|
|
895
|
|
|
|
|
|
|
=item * |
896
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
Mohammad S Anwar <mohammad.anwar@yahoo.com> |
898
|
|
|
|
|
|
|
|
899
|
|
|
|
|
|
|
=back |
900
|
|
|
|
|
|
|
|
901
|
|
|
|
|
|
|
=cut |
902
|
|
|
|
|
|
|
|