line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
42
|
|
|
42
|
|
1720841
|
use warnings; |
|
42
|
|
|
|
|
339
|
|
|
42
|
|
|
|
|
1475
|
|
2
|
42
|
|
|
42
|
|
243
|
use strict; |
|
42
|
|
|
|
|
105
|
|
|
42
|
|
|
|
|
1041
|
|
3
|
42
|
|
|
42
|
|
219
|
use feature ':5.14'; |
|
42
|
|
|
|
|
88
|
|
|
42
|
|
|
|
|
7447
|
|
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
package Text::Parser 1.000; |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
|
10
|
42
|
|
|
42
|
|
14924
|
use Moose; |
|
42
|
|
|
|
|
12352250
|
|
|
42
|
|
|
|
|
326
|
|
11
|
42
|
|
|
42
|
|
354246
|
use MooseX::CoverableModifiers; |
|
42
|
|
|
|
|
250627
|
|
|
42
|
|
|
|
|
323
|
|
12
|
42
|
|
|
42
|
|
21346
|
use MooseX::StrictConstructor; |
|
42
|
|
|
|
|
1042105
|
|
|
42
|
|
|
|
|
229
|
|
13
|
42
|
|
|
42
|
|
365513
|
use namespace::autoclean; |
|
42
|
|
|
|
|
107
|
|
|
42
|
|
|
|
|
278
|
|
14
|
42
|
|
|
42
|
|
4228
|
use Moose::Util 'apply_all_roles', 'ensure_all_roles'; |
|
42
|
|
|
|
|
114
|
|
|
42
|
|
|
|
|
457
|
|
15
|
42
|
|
|
42
|
|
15937
|
use Moose::Util::TypeConstraints; |
|
42
|
|
|
|
|
100
|
|
|
42
|
|
|
|
|
400
|
|
16
|
42
|
|
|
42
|
|
123719
|
use String::Util qw(trim ltrim rtrim eqq); |
|
42
|
|
|
|
|
120725
|
|
|
42
|
|
|
|
|
3740
|
|
17
|
42
|
|
|
42
|
|
15481
|
use Text::Parser::Error; |
|
42
|
|
|
|
|
162
|
|
|
42
|
|
|
|
|
228
|
|
18
|
42
|
|
|
42
|
|
46073
|
use Text::Parser::Rule; |
|
42
|
|
|
|
|
196
|
|
|
42
|
|
|
|
|
2499
|
|
19
|
42
|
|
|
42
|
|
26544
|
use Text::Parser::RuleSpec; |
|
42
|
|
|
|
|
208
|
|
|
42
|
|
|
|
|
334
|
|
20
|
42
|
|
|
42
|
|
141544
|
use List::MoreUtils qw(natatime first_index); |
|
42
|
|
|
|
|
104
|
|
|
42
|
|
|
|
|
395
|
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
enum 'Text::Parser::Types::MultilineType' => [qw(join_next join_last)]; |
23
|
|
|
|
|
|
|
enum 'Text::Parser::Types::LineWrapStyle' => |
24
|
|
|
|
|
|
|
[qw(trailing_backslash spice just_next_line slurp custom)]; |
25
|
|
|
|
|
|
|
enum 'Text::Parser::Types::TrimType' => [qw(l r b n)]; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
subtype 'NonEmptyStr', as 'Str', where { length $_ > 0 }, |
28
|
|
|
|
|
|
|
message {"$_ is an empty string"}; |
29
|
|
|
|
|
|
|
|
30
|
42
|
|
|
42
|
|
41563
|
no Moose::Util::TypeConstraints; |
|
42
|
|
|
|
|
109
|
|
|
42
|
|
|
|
|
456
|
|
31
|
42
|
|
|
42
|
|
41692
|
use FileHandle; |
|
42
|
|
|
|
|
428211
|
|
|
42
|
|
|
|
|
271
|
|
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
has _origclass => ( |
34
|
|
|
|
|
|
|
is => 'ro', |
35
|
|
|
|
|
|
|
isa => 'Str', |
36
|
|
|
|
|
|
|
lazy => 1, |
37
|
|
|
|
|
|
|
default => '', |
38
|
|
|
|
|
|
|
); |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
around BUILDARGS => sub { |
42
|
55
|
|
|
55
|
|
1822
|
my ( $orig, $class ) = ( shift, shift ); |
43
|
55
|
100
|
100
|
|
|
613
|
return $class->$orig( @_, _origclass => $class ) if @_ > 1 or not @_; |
44
|
2
|
|
|
|
|
6
|
my $ptr = shift; |
45
|
2
|
100
|
|
|
|
13
|
parser_exception("Invalid parameters to Text::Parser constructor") |
46
|
|
|
|
|
|
|
if ref($ptr) ne 'HASH'; |
47
|
1
|
|
|
|
|
3
|
$class->$orig( %{$ptr}, _origclass => $class ); |
|
1
|
|
|
|
|
7
|
|
48
|
|
|
|
|
|
|
}; |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
sub BUILD { |
51
|
64
|
|
|
64
|
0
|
154994
|
my $self = shift; |
52
|
64
|
100
|
|
|
|
2266
|
ensure_all_roles $self, 'Text::Parser::AutoSplit' if $self->auto_split; |
53
|
64
|
100
|
|
|
|
32915
|
return if not defined $self->multiline_type; |
54
|
17
|
|
|
|
|
103
|
ensure_all_roles $self, 'Text::Parser::Multiline'; |
55
|
|
|
|
|
|
|
} |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
has auto_chomp => ( |
59
|
|
|
|
|
|
|
is => 'rw', |
60
|
|
|
|
|
|
|
isa => 'Bool', |
61
|
|
|
|
|
|
|
lazy => 1, |
62
|
|
|
|
|
|
|
default => 0, |
63
|
|
|
|
|
|
|
); |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
has auto_split => ( |
67
|
|
|
|
|
|
|
is => 'rw', |
68
|
|
|
|
|
|
|
isa => 'Bool', |
69
|
|
|
|
|
|
|
lazy => 1, |
70
|
|
|
|
|
|
|
default => 0, |
71
|
|
|
|
|
|
|
trigger => \&__newval_auto_split, |
72
|
|
|
|
|
|
|
); |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
sub __newval_auto_split { |
75
|
86
|
|
|
86
|
|
50070
|
my ( $self, $newval, $oldval ) = ( shift, shift, shift ); |
76
|
86
|
100
|
|
|
|
429
|
ensure_all_roles $self, 'Text::Parser::AutoSplit' if $newval; |
77
|
86
|
100
|
100
|
|
|
355651
|
$self->_clear_all_fields if not $newval and $oldval; |
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
has auto_trim => ( |
82
|
|
|
|
|
|
|
is => 'rw', |
83
|
|
|
|
|
|
|
isa => 'Text::Parser::Types::TrimType', |
84
|
|
|
|
|
|
|
lazy => 1, |
85
|
|
|
|
|
|
|
default => 'n', |
86
|
|
|
|
|
|
|
); |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
has custom_line_trimmer => ( |
90
|
|
|
|
|
|
|
is => 'rw', |
91
|
|
|
|
|
|
|
isa => 'CodeRef|Undef', |
92
|
|
|
|
|
|
|
lazy => 1, |
93
|
|
|
|
|
|
|
default => undef, |
94
|
|
|
|
|
|
|
); |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
has FS => ( |
98
|
|
|
|
|
|
|
is => 'rw', |
99
|
|
|
|
|
|
|
isa => 'RegexpRef', |
100
|
|
|
|
|
|
|
lazy => 1, |
101
|
|
|
|
|
|
|
default => sub {qr/\s+/}, |
102
|
|
|
|
|
|
|
); |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
has indentation_str => ( |
106
|
|
|
|
|
|
|
is => 'rw', |
107
|
|
|
|
|
|
|
isa => 'NonEmptyStr', |
108
|
|
|
|
|
|
|
lazy => 1, |
109
|
|
|
|
|
|
|
default => ' ', |
110
|
|
|
|
|
|
|
); |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
has line_wrap_style => ( |
114
|
|
|
|
|
|
|
is => 'rw', |
115
|
|
|
|
|
|
|
isa => 'Text::Parser::Types::LineWrapStyle|Undef', |
116
|
|
|
|
|
|
|
default => undef, |
117
|
|
|
|
|
|
|
lazy => 1, |
118
|
|
|
|
|
|
|
trigger => \&_on_line_unwrap, |
119
|
|
|
|
|
|
|
); |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
my %MULTILINE_VAL = ( |
122
|
|
|
|
|
|
|
default => undef, |
123
|
|
|
|
|
|
|
spice => 'join_last', |
124
|
|
|
|
|
|
|
trailing_backslash => 'join_next', |
125
|
|
|
|
|
|
|
just_next_line => 'join_last', |
126
|
|
|
|
|
|
|
slurp => 'join_last', |
127
|
|
|
|
|
|
|
custom => undef, |
128
|
|
|
|
|
|
|
); |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
sub _on_line_unwrap { |
131
|
16
|
|
|
16
|
|
44
|
my ( $self, $val, $oldval ) = (@_); |
132
|
16
|
100
|
100
|
|
|
75
|
return if not defined $val and not defined $oldval; |
133
|
15
|
100
|
|
|
|
46
|
$val = 'default' if not defined $val; |
134
|
15
|
100
|
100
|
|
|
69
|
return if $val eq 'custom' and defined $self->multiline_type; |
135
|
9
|
|
|
|
|
37
|
$self->multiline_type( $MULTILINE_VAL{$val} ); |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
has multiline_type => ( |
140
|
|
|
|
|
|
|
is => 'rw', |
141
|
|
|
|
|
|
|
isa => 'Text::Parser::Types::MultilineType|Undef', |
142
|
|
|
|
|
|
|
lazy => 1, |
143
|
|
|
|
|
|
|
default => undef, |
144
|
|
|
|
|
|
|
); |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
around multiline_type => sub { |
147
|
1783
|
|
|
1783
|
|
84484
|
my ( $orig, $self ) = ( shift, shift ); |
148
|
1783
|
|
|
|
|
44222
|
my $oldval = $orig->($self); |
149
|
1783
|
100
|
100
|
|
|
11104
|
return $oldval if not @_ or eqq( $_[0], $oldval ); |
150
|
17
|
|
|
|
|
224
|
return __newval_multi_line( $orig, $self, @_ ); |
151
|
|
|
|
|
|
|
}; |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
sub __newval_multi_line { |
154
|
17
|
|
|
17
|
|
51
|
my ( $orig, $self, $newval ) = ( shift, shift, shift ); |
155
|
17
|
|
|
|
|
52
|
delete $self->{records}; |
156
|
17
|
100
|
|
|
|
94
|
ensure_all_roles( $self, 'Text::Parser::Multiline' ) |
157
|
|
|
|
|
|
|
if defined $newval; |
158
|
17
|
|
|
|
|
88615
|
return $orig->( $self, $newval ); |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
has track_indentation => ( |
163
|
|
|
|
|
|
|
is => 'rw', |
164
|
|
|
|
|
|
|
isa => 'Bool', |
165
|
|
|
|
|
|
|
lazy => 1, |
166
|
|
|
|
|
|
|
default => 0, |
167
|
|
|
|
|
|
|
); |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
has _obj_rules => ( |
171
|
|
|
|
|
|
|
is => 'rw', |
172
|
|
|
|
|
|
|
isa => 'ArrayRef[Text::Parser::Rule]', |
173
|
|
|
|
|
|
|
lazy => 1, |
174
|
|
|
|
|
|
|
default => sub { [] }, |
175
|
|
|
|
|
|
|
traits => ['Array'], |
176
|
|
|
|
|
|
|
handles => { |
177
|
|
|
|
|
|
|
_push_obj_rule => 'push', |
178
|
|
|
|
|
|
|
_has_no_obj_rules => 'is_empty', |
179
|
|
|
|
|
|
|
_get_obj_rules => 'elements', |
180
|
|
|
|
|
|
|
}, |
181
|
|
|
|
|
|
|
); |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
sub add_rule { |
184
|
26
|
|
|
26
|
1
|
83263
|
my $self = shift; |
185
|
26
|
100
|
|
|
|
832
|
$self->auto_split(1) if not $self->auto_split; |
186
|
26
|
|
|
|
|
847
|
my $rule = Text::Parser::Rule->new(@_); |
187
|
26
|
|
|
|
|
1020
|
$self->_push_obj_rule($rule); |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
sub clear_rules { |
192
|
2
|
|
|
2
|
1
|
6
|
my $self = shift; |
193
|
2
|
|
|
|
|
77
|
$self->_obj_rules( [] ); |
194
|
2
|
|
|
|
|
77
|
$self->_clear_begin_rule; |
195
|
2
|
|
|
|
|
72
|
$self->_clear_end_rule; |
196
|
|
|
|
|
|
|
} |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
has _begin_rule => ( |
200
|
|
|
|
|
|
|
is => 'rw', |
201
|
|
|
|
|
|
|
isa => 'Text::Parser::Rule', |
202
|
|
|
|
|
|
|
predicate => '_has_begin_rule', |
203
|
|
|
|
|
|
|
clearer => '_clear_begin_rule', |
204
|
|
|
|
|
|
|
); |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
sub BEGIN_rule { |
207
|
5
|
|
|
5
|
1
|
416
|
my $self = shift; |
208
|
5
|
100
|
|
|
|
147
|
$self->auto_split(1) if not $self->auto_split; |
209
|
5
|
|
|
|
|
24
|
my (%opt) = _defaults_for_begin_end(@_); |
210
|
5
|
|
|
|
|
33
|
$self->_modify_rule( '_begin_rule', %opt ); |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
sub _defaults_for_begin_end { |
214
|
8
|
|
|
8
|
|
35
|
my (%opt) = @_; |
215
|
8
|
100
|
|
|
|
34
|
$opt{dont_record} = 1 if not exists $opt{dont_record}; |
216
|
8
|
100
|
|
|
|
26
|
delete $opt{if} if exists $opt{if}; |
217
|
8
|
100
|
|
|
|
26
|
delete $opt{continue_to_next} if exists $opt{continue_to_next}; |
218
|
8
|
|
|
|
|
40
|
return (%opt); |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
sub _modify_rule { |
222
|
8
|
|
|
8
|
|
28
|
my ( $self, $func, %opt ) = @_; |
223
|
8
|
|
|
|
|
26
|
my $pred = '_has' . $func; |
224
|
8
|
100
|
|
|
|
284
|
$self->_append_rule_lines( $func, \%opt ) if $self->$pred(); |
225
|
8
|
|
|
|
|
255
|
my $rule = Text::Parser::Rule->new(%opt); |
226
|
8
|
|
|
|
|
244
|
$self->$func($rule); |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
sub _append_rule_lines { |
230
|
3
|
|
|
3
|
|
19
|
my ( $self, $func, $opt ) = ( shift, shift, shift ); |
231
|
3
|
|
|
|
|
73
|
my $old = $self->$func(); |
232
|
3
|
|
|
|
|
90
|
$opt->{do} = $old->action . $opt->{do}; |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
has _end_rule => ( |
237
|
|
|
|
|
|
|
is => 'rw', |
238
|
|
|
|
|
|
|
isa => 'Text::Parser::Rule', |
239
|
|
|
|
|
|
|
predicate => '_has_end_rule', |
240
|
|
|
|
|
|
|
clearer => '_clear_end_rule', |
241
|
|
|
|
|
|
|
); |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
sub END_rule { |
244
|
3
|
|
|
3
|
1
|
28
|
my $self = shift; |
245
|
3
|
100
|
|
|
|
101
|
$self->auto_split(1) if not $self->auto_split; |
246
|
3
|
|
|
|
|
14
|
my (%opt) = _defaults_for_begin_end(@_); |
247
|
3
|
|
|
|
|
16
|
$self->_modify_rule( '_end_rule', %opt ); |
248
|
|
|
|
|
|
|
} |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
has _indent_level => ( |
252
|
|
|
|
|
|
|
is => 'ro', |
253
|
|
|
|
|
|
|
isa => 'Int|Undef', |
254
|
|
|
|
|
|
|
lazy => 1, |
255
|
|
|
|
|
|
|
default => undef, |
256
|
|
|
|
|
|
|
writer => '_set_indent_level', |
257
|
|
|
|
|
|
|
reader => 'this_indent', |
258
|
|
|
|
|
|
|
); |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
has _current_line => ( |
262
|
|
|
|
|
|
|
is => 'ro', |
263
|
|
|
|
|
|
|
isa => 'Str|Undef', |
264
|
|
|
|
|
|
|
init_arg => undef, |
265
|
|
|
|
|
|
|
writer => '_set_this_line', |
266
|
|
|
|
|
|
|
reader => 'this_line', |
267
|
|
|
|
|
|
|
clearer => '_clear_this_line', |
268
|
|
|
|
|
|
|
default => undef, |
269
|
|
|
|
|
|
|
); |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
has filename => ( |
273
|
|
|
|
|
|
|
is => 'rw', |
274
|
|
|
|
|
|
|
isa => 'Str|Undef', |
275
|
|
|
|
|
|
|
lazy => 1, |
276
|
|
|
|
|
|
|
init_arg => undef, |
277
|
|
|
|
|
|
|
default => undef, |
278
|
|
|
|
|
|
|
predicate => '_has_filename', |
279
|
|
|
|
|
|
|
clearer => '_clear_filename', |
280
|
|
|
|
|
|
|
trigger => \&_set_filehandle, |
281
|
|
|
|
|
|
|
); |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
sub _set_filehandle { |
284
|
93
|
|
|
93
|
|
22888
|
my $self = shift; |
285
|
93
|
100
|
|
|
|
2680
|
return $self->_clear_filename if not defined $self->filename; |
286
|
92
|
|
|
|
|
510
|
$self->_save_filehandle( $self->__get_valid_fh ); |
287
|
|
|
|
|
|
|
} |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
sub __get_valid_fh { |
290
|
92
|
|
|
92
|
|
217
|
my $self = shift; |
291
|
92
|
|
|
|
|
385
|
my $fname = $self->_get_valid_text_filename; |
292
|
92
|
100
|
|
|
|
1333
|
return FileHandle->new( $fname, 'r' ) if defined $fname; |
293
|
4
|
|
|
|
|
128
|
$fname = $self->filename; |
294
|
4
|
|
|
|
|
118
|
$self->_clear_filename; |
295
|
4
|
|
|
|
|
11
|
$self->_throw_invalid_file_exception($fname); |
296
|
|
|
|
|
|
|
} |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
sub _get_valid_text_filename { |
300
|
92
|
|
|
92
|
|
221
|
my $self = shift; |
301
|
92
|
|
|
|
|
2290
|
my $fname = $self->filename; |
302
|
92
|
100
|
66
|
|
|
30087
|
return $fname if -f $fname and -r $fname and -T $fname; |
|
|
|
100
|
|
|
|
|
303
|
4
|
|
|
|
|
22
|
return; |
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
sub _throw_invalid_file_exception { |
308
|
4
|
|
|
4
|
|
11
|
my ( $self, $fname ) = ( shift, shift ); |
309
|
4
|
100
|
|
|
|
67
|
parser_exception("Invalid filename $fname") if not -f $fname; |
310
|
1
|
50
|
|
|
|
17
|
parser_exception("Cannot read $fname") if not -r $fname; |
311
|
1
|
|
|
|
|
10
|
parser_exception("Not a plain text file $fname"); |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
has filehandle => ( |
316
|
|
|
|
|
|
|
is => 'rw', |
317
|
|
|
|
|
|
|
isa => 'FileHandle|Undef', |
318
|
|
|
|
|
|
|
lazy => 1, |
319
|
|
|
|
|
|
|
init_arg => undef, |
320
|
|
|
|
|
|
|
default => undef, |
321
|
|
|
|
|
|
|
predicate => '_has_filehandle', |
322
|
|
|
|
|
|
|
writer => '_save_filehandle', |
323
|
|
|
|
|
|
|
reader => '_get_filehandle', |
324
|
|
|
|
|
|
|
clearer => '_close_filehandles', |
325
|
|
|
|
|
|
|
); |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
sub filehandle { |
328
|
105
|
|
|
105
|
1
|
1358
|
my $self = shift; |
329
|
105
|
100
|
100
|
|
|
3744
|
return if not @_ and not $self->_has_filehandle; |
330
|
102
|
100
|
|
|
|
787
|
$self->_save_filehandle(@_) if @_; |
331
|
98
|
100
|
|
|
|
450
|
$self->_clear_filename if @_; |
332
|
98
|
|
|
|
|
3056
|
return $self->_get_filehandle; |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
sub read { |
337
|
95
|
|
|
95
|
1
|
95797
|
my $self = shift; |
338
|
95
|
100
|
|
|
|
456
|
return if not defined $self->_handle_read_inp(@_); |
339
|
89
|
|
|
|
|
589
|
$self->_run_begin_end_block('_begin_rule'); |
340
|
89
|
|
|
|
|
491
|
$self->__read_and_close_filehandle; |
341
|
78
|
|
|
|
|
295
|
$self->_run_begin_end_block('_end_rule'); |
342
|
|
|
|
|
|
|
} |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
sub _handle_read_inp { |
345
|
95
|
|
|
95
|
|
249
|
my $self = shift; |
346
|
95
|
100
|
|
|
|
318
|
return $self->filehandle if not @_; |
347
|
93
|
100
|
100
|
|
|
555
|
return if not ref( $_[0] ) and not $_[0]; |
348
|
92
|
100
|
|
|
|
3063
|
return $self->filename(@_) if not ref( $_[0] ); |
349
|
6
|
|
|
|
|
21
|
return $self->filehandle(@_); |
350
|
|
|
|
|
|
|
} |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
sub _before_begin { |
353
|
89
|
|
|
89
|
|
175
|
my $self = shift; |
354
|
89
|
|
|
|
|
437
|
$self->forget; |
355
|
89
|
100
|
|
|
|
3679
|
$self->_preset_vars( $self->_all_preset ) if not $self->_has_no_prestash; |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
sub _after_end { |
359
|
78
|
|
|
78
|
|
181
|
my $self = shift; |
360
|
78
|
|
|
|
|
2199
|
my $h = $self->_hidden_stash; |
361
|
78
|
|
|
|
|
165
|
$h->{$_} = $self->stashed($_) for ( keys %{$h} ); |
|
78
|
|
|
|
|
530
|
|
362
|
|
|
|
|
|
|
} |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
sub _run_begin_end_block { |
365
|
167
|
|
|
167
|
|
492
|
my ( $self, $func ) = ( shift, shift ); |
366
|
167
|
100
|
|
|
|
791
|
$self->_before_begin if $func eq '_begin_rule'; |
367
|
167
|
|
|
|
|
754
|
$self->_run_beg_end__($func); |
368
|
167
|
100
|
|
|
|
923
|
$self->_after_end if $func eq '_end_rule'; |
369
|
|
|
|
|
|
|
} |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
sub _run_beg_end__ { |
372
|
167
|
|
|
167
|
|
415
|
my ( $self, $func ) = ( shift, shift ); |
373
|
167
|
|
|
|
|
481
|
my $pred = '_has' . $func; |
374
|
167
|
100
|
|
|
|
5634
|
return if not $self->$pred(); |
375
|
6
|
|
|
|
|
170
|
my $rule = $self->$func(); |
376
|
6
|
|
|
|
|
33
|
$rule->_run( $self, 0 ); |
377
|
|
|
|
|
|
|
} |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
sub __read_and_close_filehandle { |
380
|
89
|
|
|
89
|
|
206
|
my $self = shift; |
381
|
89
|
|
|
|
|
423
|
$self->_prep_to_read_file; |
382
|
89
|
|
|
|
|
520
|
$self->__read_file_handle; |
383
|
78
|
|
|
|
|
748
|
$self->_final_operations_after_read; |
384
|
|
|
|
|
|
|
} |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
sub _prep_to_read_file { |
387
|
89
|
|
|
89
|
|
248
|
my $self = shift; |
388
|
89
|
|
|
|
|
3581
|
$self->_reset_line_count; |
389
|
89
|
|
|
|
|
3442
|
$self->_empty_records; |
390
|
89
|
|
|
|
|
3297
|
$self->_clear_abort; |
391
|
|
|
|
|
|
|
} |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
sub __read_file_handle { |
394
|
89
|
|
|
89
|
|
1221
|
my $self = shift; |
395
|
89
|
|
|
|
|
504
|
my $fh = $self->filehandle(); |
396
|
89
|
|
|
|
|
2079
|
while (<$fh>) { |
397
|
738
|
100
|
|
|
|
2068
|
last if not $self->__parse_line($_); |
398
|
|
|
|
|
|
|
} |
399
|
|
|
|
|
|
|
} |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
sub __parse_line { |
402
|
738
|
|
|
738
|
|
1648
|
my ( $self, $line ) = ( shift, shift ); |
403
|
738
|
|
|
|
|
1692
|
$line = $self->_prep_line_for_parsing($line); |
404
|
738
|
|
|
|
|
10687
|
$self->_set_this_line($line); |
405
|
738
|
|
|
|
|
3943
|
$self->save_record($line); |
406
|
730
|
|
|
|
|
19981
|
return not $self->has_aborted; |
407
|
|
|
|
|
|
|
} |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
sub _prep_line_for_parsing { |
410
|
738
|
|
|
738
|
|
1266
|
my ( $self, $line ) = ( shift, shift ); |
411
|
738
|
|
|
|
|
24667
|
$self->_next_line_parsed(); |
412
|
738
|
100
|
|
|
|
18410
|
$self->_find_indent_level($line) if $self->track_indentation; |
413
|
738
|
|
|
|
|
2046
|
$line = $self->_line_manip($line); |
414
|
|
|
|
|
|
|
} |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
sub _find_indent_level { |
417
|
18
|
|
|
18
|
|
31
|
my ( $self, $line ) = ( shift, shift ); |
418
|
18
|
|
|
|
|
34
|
chomp $line; |
419
|
18
|
100
|
|
|
|
439
|
length( $self->indentation_str ) >= 2 |
420
|
|
|
|
|
|
|
? $self->_find_long_indent_level($line) |
421
|
|
|
|
|
|
|
: $self->_singlechar_indent($line); |
422
|
|
|
|
|
|
|
} |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
sub _find_long_indent_level { |
425
|
9
|
|
|
9
|
|
20
|
my ( $self, $line ) = ( shift, shift ); |
426
|
9
|
|
|
|
|
211
|
my $n = _num_matching( $self->indentation_str, $line ); |
427
|
9
|
|
|
|
|
284
|
$self->_set_indent_level($n); |
428
|
|
|
|
|
|
|
} |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
sub _num_matching { |
431
|
9
|
|
|
9
|
|
21
|
my ( $ch, $line ) = ( shift, shift ); |
432
|
9
|
|
|
|
|
94
|
my $it = natatime( length($ch), ( split //, $line ) ); |
433
|
9
|
|
|
|
|
36
|
my ( $i, @x ) = ( 0, $it->() ); |
434
|
9
|
|
|
|
|
32
|
while ( $ch eq join( '', @x ) ) { |
435
|
1
|
|
|
|
|
6
|
( $i, @x ) = ( $i + 1, $it->() ); |
436
|
|
|
|
|
|
|
} |
437
|
9
|
|
|
|
|
40
|
return $i; |
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
sub _singlechar_indent { |
441
|
9
|
|
|
9
|
|
20
|
my ( $self, $line ) = ( shift, shift ); |
442
|
9
|
|
|
10
|
|
94
|
my $n = first_index { $_ ne $self->indentation_str } ( split //, $line ); |
|
10
|
|
|
|
|
244
|
|
443
|
9
|
|
|
|
|
283
|
$self->_set_indent_level($n); |
444
|
|
|
|
|
|
|
} |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
sub _line_manip { |
447
|
738
|
|
|
738
|
|
1576
|
my ( $self, $line ) = ( shift, shift ); |
448
|
738
|
100
|
|
|
|
18118
|
return $self->_def_line_manip($line) |
449
|
|
|
|
|
|
|
if not defined $self->custom_line_trimmer; |
450
|
1
|
|
|
|
|
26
|
my $cust = $self->custom_line_trimmer; |
451
|
1
|
|
|
|
|
6
|
return $cust->($line); |
452
|
|
|
|
|
|
|
} |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
sub _def_line_manip { |
455
|
737
|
|
|
737
|
|
1562
|
my ( $self, $line ) = ( shift, shift ); |
456
|
737
|
100
|
|
|
|
15608
|
chomp $line if $self->auto_chomp; |
457
|
737
|
|
|
|
|
1889
|
return $self->_trim_line($line); |
458
|
|
|
|
|
|
|
} |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
sub _trim_line { |
461
|
737
|
|
|
737
|
|
1434
|
my ( $self, $line ) = ( shift, shift ); |
462
|
737
|
100
|
|
|
|
15659
|
return $line if $self->auto_trim eq 'n'; |
463
|
69
|
100
|
|
|
|
563
|
return trim($line) if $self->auto_trim eq 'b'; |
464
|
8
|
100
|
|
|
|
179
|
return ltrim($line) if $self->auto_trim eq 'l'; |
465
|
4
|
|
|
|
|
15
|
return rtrim($line); |
466
|
|
|
|
|
|
|
} |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
sub _final_operations_after_read { |
469
|
78
|
|
|
78
|
|
199
|
my $self = shift; |
470
|
78
|
100
|
|
|
|
2844
|
$self->_close_filehandles if $self->_has_filename; |
471
|
78
|
|
|
|
|
1687
|
$self->_clear_this_line; |
472
|
78
|
100
|
|
|
|
2144
|
$self->_set_indent_level(undef) if $self->track_indentation; |
473
|
|
|
|
|
|
|
} |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
sub last_record { |
477
|
16
|
|
|
16
|
1
|
712
|
my $self = shift; |
478
|
16
|
|
|
|
|
653
|
my $count = $self->_num_records(); |
479
|
16
|
100
|
|
|
|
62
|
return if not $count; |
480
|
15
|
|
|
|
|
540
|
return $self->_access_record( $count - 1 ); |
481
|
|
|
|
|
|
|
} |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
has records => ( |
485
|
|
|
|
|
|
|
isa => 'ArrayRef[Any]', |
486
|
|
|
|
|
|
|
is => 'ro', |
487
|
|
|
|
|
|
|
lazy => 1, |
488
|
|
|
|
|
|
|
default => sub { return []; }, |
489
|
|
|
|
|
|
|
auto_deref => 1, |
490
|
|
|
|
|
|
|
init_arg => undef, |
491
|
|
|
|
|
|
|
traits => ['Array'], |
492
|
|
|
|
|
|
|
predicate => '_has_records_attrib', |
493
|
|
|
|
|
|
|
handles => { |
494
|
|
|
|
|
|
|
get_records => 'elements', |
495
|
|
|
|
|
|
|
push_records => 'push', |
496
|
|
|
|
|
|
|
pop_record => 'pop', |
497
|
|
|
|
|
|
|
_empty_records => 'clear', |
498
|
|
|
|
|
|
|
_num_records => 'count', |
499
|
|
|
|
|
|
|
_access_record => 'accessor', |
500
|
|
|
|
|
|
|
}, |
501
|
|
|
|
|
|
|
); |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
has _stashed_vars => ( |
505
|
|
|
|
|
|
|
is => 'ro', |
506
|
|
|
|
|
|
|
isa => 'HashRef[Any]', |
507
|
|
|
|
|
|
|
default => sub { {} }, |
508
|
|
|
|
|
|
|
lazy => 1, |
509
|
|
|
|
|
|
|
traits => ['Hash'], |
510
|
|
|
|
|
|
|
handles => { |
511
|
|
|
|
|
|
|
_clear_stash => 'clear', |
512
|
|
|
|
|
|
|
_stashed => 'elements', |
513
|
|
|
|
|
|
|
_has_stashed => 'exists', |
514
|
|
|
|
|
|
|
_forget => 'delete', |
515
|
|
|
|
|
|
|
has_empty_stash => 'is_empty', |
516
|
|
|
|
|
|
|
_get_vars => 'get', |
517
|
|
|
|
|
|
|
_preset_vars => 'set', |
518
|
|
|
|
|
|
|
} |
519
|
|
|
|
|
|
|
); |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
sub forget { |
522
|
93
|
|
|
93
|
1
|
235
|
my $self = shift; |
523
|
93
|
100
|
|
|
|
333
|
return $self->_forget_stashed(@_) if @_; |
524
|
90
|
|
|
|
|
3430
|
$self->_clear_stash; |
525
|
|
|
|
|
|
|
} |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
sub _forget_stashed { |
528
|
3
|
|
|
3
|
|
6
|
my $self = shift; |
529
|
3
|
|
|
|
|
7
|
foreach my $s (@_) { |
530
|
3
|
100
|
|
|
|
141
|
$self->_forget($s) if $self->_has_stashed($s); |
531
|
3
|
100
|
|
|
|
109
|
$self->_del_prestash($s) if $self->_has_prestash($s); |
532
|
|
|
|
|
|
|
} |
533
|
|
|
|
|
|
|
} |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
sub stashed { |
536
|
11
|
|
|
11
|
1
|
1224
|
my $self = shift; |
537
|
11
|
100
|
|
|
|
392
|
return $self->_get_vars(@_) if @_; |
538
|
1
|
|
|
|
|
37
|
return $self->_stashed; |
539
|
|
|
|
|
|
|
} |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
sub has_stashed { |
542
|
10
|
|
|
10
|
1
|
396
|
my $self = shift; |
543
|
10
|
100
|
|
|
|
391
|
return 1 if $self->_has_stashed(@_); |
544
|
8
|
|
|
|
|
276
|
return $self->_has_prestash(@_); |
545
|
|
|
|
|
|
|
} |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
has _hidden_stash => ( |
548
|
|
|
|
|
|
|
is => 'ro', |
549
|
|
|
|
|
|
|
isa => 'HashRef[Any]', |
550
|
|
|
|
|
|
|
default => sub { {} }, |
551
|
|
|
|
|
|
|
lazy => 1, |
552
|
|
|
|
|
|
|
traits => ['Hash'], |
553
|
|
|
|
|
|
|
handles => { |
554
|
|
|
|
|
|
|
prestash => 'set', |
555
|
|
|
|
|
|
|
_all_preset => 'elements', |
556
|
|
|
|
|
|
|
_has_prestash => 'exists', |
557
|
|
|
|
|
|
|
_has_no_prestash => 'is_empty', |
558
|
|
|
|
|
|
|
_del_prestash => 'delete', |
559
|
|
|
|
|
|
|
} |
560
|
|
|
|
|
|
|
); |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
has lines_parsed => ( |
564
|
|
|
|
|
|
|
is => 'ro', |
565
|
|
|
|
|
|
|
isa => 'Int', |
566
|
|
|
|
|
|
|
lazy => 1, |
567
|
|
|
|
|
|
|
init_arg => undef, |
568
|
|
|
|
|
|
|
default => 0, |
569
|
|
|
|
|
|
|
traits => ['Counter'], |
570
|
|
|
|
|
|
|
handles => { |
571
|
|
|
|
|
|
|
_next_line_parsed => 'inc', |
572
|
|
|
|
|
|
|
_reset_line_count => 'reset', |
573
|
|
|
|
|
|
|
} |
574
|
|
|
|
|
|
|
); |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
has abort => ( |
578
|
|
|
|
|
|
|
is => 'rw', |
579
|
|
|
|
|
|
|
isa => 'Bool', |
580
|
|
|
|
|
|
|
lazy => 1, |
581
|
|
|
|
|
|
|
default => 0, |
582
|
|
|
|
|
|
|
traits => ['Bool'], |
583
|
|
|
|
|
|
|
reader => 'has_aborted', |
584
|
|
|
|
|
|
|
handles => { |
585
|
|
|
|
|
|
|
abort_reading => 'set', |
586
|
|
|
|
|
|
|
_clear_abort => 'unset' |
587
|
|
|
|
|
|
|
}, |
588
|
|
|
|
|
|
|
); |
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
has _is_wrapped => ( |
592
|
|
|
|
|
|
|
is => 'rw', |
593
|
|
|
|
|
|
|
isa => 'CodeRef|Undef', |
594
|
|
|
|
|
|
|
default => undef, |
595
|
|
|
|
|
|
|
lazy => 1, |
596
|
|
|
|
|
|
|
); |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
has _unwrap_routine => ( |
599
|
|
|
|
|
|
|
is => 'rw', |
600
|
|
|
|
|
|
|
isa => 'CodeRef|Undef', |
601
|
|
|
|
|
|
|
default => undef, |
602
|
|
|
|
|
|
|
lazy => 1, |
603
|
|
|
|
|
|
|
); |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
sub custom_line_unwrap_routines { |
606
|
8
|
|
|
8
|
1
|
479
|
my $self = shift; |
607
|
8
|
|
|
|
|
34
|
$self->_prep_for_custom_unwrap_routines; |
608
|
7
|
|
|
|
|
28
|
my ( $is_wr, $un_wr ) = _check_custom_unwrap_args(@_); |
609
|
2
|
|
|
|
|
96
|
$self->_is_wrapped($is_wr); |
610
|
2
|
|
|
|
|
56
|
$self->_unwrap_routine($un_wr); |
611
|
|
|
|
|
|
|
} |
612
|
|
|
|
|
|
|
|
613
|
|
|
|
|
|
|
sub _prep_for_custom_unwrap_routines { |
614
|
8
|
|
|
8
|
|
11
|
my $self = shift; |
615
|
8
|
|
|
|
|
267
|
my $s = $self->line_wrap_style(); |
616
|
8
|
100
|
100
|
|
|
44
|
parser_exception("Line wrap style already set to $s") |
617
|
|
|
|
|
|
|
if defined $s and 'custom' ne $s; |
618
|
7
|
|
|
|
|
220
|
$self->line_wrap_style('custom'); |
619
|
|
|
|
|
|
|
} |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
my $unwrap_prefix = "Bad call to custom_line_unwrap_routines: "; |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
sub save_record { |
627
|
454
|
|
|
454
|
1
|
8655
|
my ( $self, $record ) = ( shift, shift ); |
628
|
454
|
100
|
|
|
|
1118
|
( $self->_has_no_rules ) |
629
|
|
|
|
|
|
|
? $self->push_records($record) |
630
|
|
|
|
|
|
|
: $self->_run_through_rules; |
631
|
|
|
|
|
|
|
} |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
sub _has_no_rules { |
634
|
454
|
|
|
454
|
|
710
|
my $self = shift; |
635
|
454
|
100
|
|
|
|
11816
|
return 0 if Text::Parser::RuleSpec->class_has_rules( $self->_origclass ); |
636
|
388
|
|
|
|
|
13450
|
return $self->_has_no_obj_rules; |
637
|
|
|
|
|
|
|
} |
638
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
sub _run_through_rules { |
640
|
265
|
|
|
265
|
|
445
|
my $self = shift; |
641
|
265
|
|
|
|
|
6263
|
my (@crules) = Text::Parser::RuleSpec->class_rules( $self->_origclass ); |
642
|
265
|
|
|
|
|
8827
|
foreach my $rule ( @crules, $self->_get_obj_rules ) { |
643
|
630
|
100
|
|
|
|
1793
|
next if not $rule->_test($self); |
644
|
174
|
|
|
|
|
619
|
$rule->_run( $self, 0 ); |
645
|
174
|
100
|
|
|
|
4866
|
last if not $rule->continue_to_next; |
646
|
|
|
|
|
|
|
} |
647
|
|
|
|
|
|
|
} |
648
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
my %IS_LINE_CONTINUED = ( |
651
|
|
|
|
|
|
|
default => \&_def_is_line_continued, |
652
|
|
|
|
|
|
|
spice => \&_spice_is_line_contd, |
653
|
|
|
|
|
|
|
trailing_backslash => \&_tbs_is_line_contd, |
654
|
|
|
|
|
|
|
just_next_line => \&_jnl_is_line_contd, |
655
|
|
|
|
|
|
|
slurp => \&_def_is_line_continued, |
656
|
|
|
|
|
|
|
custom => undef, |
657
|
|
|
|
|
|
|
); |
658
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
my %JOIN_LAST_LINE = ( |
660
|
|
|
|
|
|
|
default => \&_def_join_last_line, |
661
|
|
|
|
|
|
|
spice => \&_spice_join_last_line, |
662
|
|
|
|
|
|
|
trailing_backslash => \&_tbs_join_last_line, |
663
|
|
|
|
|
|
|
just_next_line => \&_jnl_join_last_line, |
664
|
|
|
|
|
|
|
slurp => \&_def_join_last_line, |
665
|
|
|
|
|
|
|
custom => undef, |
666
|
|
|
|
|
|
|
); |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
sub is_line_continued { |
669
|
162
|
|
|
162
|
1
|
291
|
my $self = shift; |
670
|
162
|
100
|
|
|
|
345
|
return 0 if not defined $self->multiline_type; |
671
|
156
|
|
|
|
|
412
|
my $routine = $self->_get_is_line_contd_routine; |
672
|
156
|
100
|
|
|
|
327
|
parser_exception("is_wrapped routine not defined") |
673
|
|
|
|
|
|
|
if not defined $routine; |
674
|
155
|
|
|
|
|
328
|
$routine->( $self, @_ ); |
675
|
|
|
|
|
|
|
} |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
sub _val_of_line_wrap_style { |
678
|
291
|
|
|
291
|
|
385
|
my $self = shift; |
679
|
291
|
100
|
|
|
|
7050
|
defined $self->line_wrap_style ? $self->line_wrap_style : 'default'; |
680
|
|
|
|
|
|
|
} |
681
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
sub _get_is_line_contd_routine { |
683
|
156
|
|
|
156
|
|
232
|
my $self = shift; |
684
|
156
|
|
|
|
|
300
|
my $val = $self->_val_of_line_wrap_style; |
685
|
|
|
|
|
|
|
( $val ne 'custom' ) |
686
|
156
|
100
|
|
|
|
712
|
? $IS_LINE_CONTINUED{$val} |
687
|
|
|
|
|
|
|
: $self->_is_wrapped; |
688
|
|
|
|
|
|
|
} |
689
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
sub join_last_line { |
691
|
135
|
|
|
135
|
1
|
218
|
my $self = shift; |
692
|
135
|
|
|
|
|
293
|
my $routine = $self->_get_join_last_line_routine; |
693
|
135
|
100
|
|
|
|
282
|
parser_exception("unwrap_routine not defined") |
694
|
|
|
|
|
|
|
if not defined $routine; |
695
|
134
|
|
|
|
|
276
|
$routine->( $self, @_ ); |
696
|
|
|
|
|
|
|
} |
697
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
sub _get_join_last_line_routine { |
699
|
135
|
|
|
135
|
|
563
|
my $self = shift; |
700
|
135
|
|
|
|
|
246
|
my $val = $self->_val_of_line_wrap_style; |
701
|
|
|
|
|
|
|
( $val ne 'custom' ) |
702
|
135
|
100
|
|
|
|
489
|
? $JOIN_LAST_LINE{$val} |
703
|
|
|
|
|
|
|
: $self->_unwrap_routine; |
704
|
|
|
|
|
|
|
} |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
sub _def_is_line_continued { |
707
|
116
|
|
|
116
|
|
168
|
my $self = shift; |
708
|
116
|
100
|
100
|
|
|
257
|
return 0 |
709
|
|
|
|
|
|
|
if $self->multiline_type eq 'join_last' |
710
|
|
|
|
|
|
|
and $self->lines_parsed() == 1; |
711
|
110
|
|
|
|
|
377
|
return 1; |
712
|
|
|
|
|
|
|
} |
713
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
sub _spice_is_line_contd { |
715
|
7
|
|
|
7
|
|
9
|
my $self = shift; |
716
|
7
|
|
|
|
|
158
|
substr( shift, 0, 1 ) eq '+'; |
717
|
|
|
|
|
|
|
} |
718
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
sub _tbs_is_line_contd { |
720
|
3
|
|
|
3
|
|
6
|
my $self = shift; |
721
|
3
|
|
|
|
|
10
|
substr( trim(shift), -1, 1 ) eq "\\"; |
722
|
|
|
|
|
|
|
} |
723
|
|
|
|
|
|
|
|
724
|
|
|
|
|
|
|
sub _jnl_is_line_contd { |
725
|
21
|
|
|
21
|
|
31
|
my $self = shift; |
726
|
21
|
100
|
|
|
|
497
|
return 0 if $self->lines_parsed == 1; |
727
|
20
|
|
|
|
|
65
|
return length( trim(shift) ) > 0; |
728
|
|
|
|
|
|
|
} |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
sub _def_join_last_line { |
731
|
107
|
|
|
107
|
|
220
|
my ( $self, $last, $line ) = ( shift, shift, shift ); |
732
|
107
|
|
|
|
|
378
|
return $last . $line; |
733
|
|
|
|
|
|
|
} |
734
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
sub _spice_join_last_line { |
736
|
3
|
|
|
3
|
|
7
|
my ( $self, $last, $line ) = ( shift, shift, shift ); |
737
|
3
|
|
|
|
|
8
|
chomp $last; |
738
|
3
|
|
|
|
|
21
|
$line =~ s/^[+]\s*/ /; |
739
|
3
|
|
|
|
|
14
|
$last . $line; |
740
|
|
|
|
|
|
|
} |
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
sub _tbs_join_last_line { |
743
|
2
|
|
|
2
|
|
5
|
my ( $self, $last, $line ) = ( shift, shift, shift ); |
744
|
2
|
|
|
|
|
6
|
chomp $last; |
745
|
2
|
|
|
|
|
10
|
$last =~ s/\\\s*$//; |
746
|
2
|
|
|
|
|
10
|
rtrim($last) . ' ' . ltrim($line); |
747
|
|
|
|
|
|
|
} |
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
sub _jnl_join_last_line { |
750
|
18
|
|
|
18
|
|
38
|
my ( $self, $last, $line ) = ( shift, shift, shift ); |
751
|
18
|
|
|
|
|
44
|
chomp $last; |
752
|
18
|
|
|
|
|
59
|
return $last . $line; |
753
|
|
|
|
|
|
|
} |
754
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
757
|
|
|
|
|
|
|
|
758
|
42
|
|
|
42
|
|
235822
|
no Moose; |
|
42
|
|
|
|
|
131
|
|
|
42
|
|
|
|
|
403
|
|
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
1; |
761
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
__END__ |
763
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
=pod |
765
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
=encoding UTF-8 |
767
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
=head1 NAME |
769
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
Text::Parser - Simplifies text parsing. Easily extensible to parse any text format. |
771
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
=head1 VERSION |
773
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
version 1.000 |
775
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
=head1 SYNOPSIS |
777
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
The following prints the content of the file (named in the first argument) to C<STDOUT>. |
779
|
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
use Text::Parser; |
781
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
my $parser = Text::Parser->new(); |
783
|
|
|
|
|
|
|
$parser->read(shift); |
784
|
|
|
|
|
|
|
print $parser->get_records, "\n"; |
785
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
The above code prints after reading the whole file, which can be slow if you have large fules. This following prints contents immediately. |
787
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
my $parser = Text::Parser->new(); |
789
|
|
|
|
|
|
|
$parser->add_rule(do => 'print', dont_record => 1); |
790
|
|
|
|
|
|
|
($#ARGV > 0) ? $parser->filename(shift) : $parser->filehandle(\*STDIN); |
791
|
|
|
|
|
|
|
$parser->read(); # Runs the rule for each line of input file |
792
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
Also, the third line there allows this program to read from a file name specified on command-line, or C<STDIN>. In effect, this makes this Perl code a good replica of the UNIX C<cat>. |
794
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
Here is an example with a simple rule that extracts the first error in the logfile and aborts reading further: |
796
|
|
|
|
|
|
|
|
797
|
|
|
|
|
|
|
my $parser = Text::Parser->new(); |
798
|
|
|
|
|
|
|
$parser->add_rule( |
799
|
|
|
|
|
|
|
if => '$1 eq "ERROR:"', |
800
|
|
|
|
|
|
|
# $1 is a positional identifier for first 'field' on the line |
801
|
|
|
|
|
|
|
do => '$this->abort_reading; return $_;' |
802
|
|
|
|
|
|
|
# $this is copy of $parser accessible from within the rule |
803
|
|
|
|
|
|
|
# abort_reading() tells parser to stop reading further |
804
|
|
|
|
|
|
|
# Returned values are saved as records. Any data structure can be saved. |
805
|
|
|
|
|
|
|
# $_ contains the full line as string, including any whitespaces |
806
|
|
|
|
|
|
|
); |
807
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
# Returns the first line starting with "ERROR:" |
809
|
|
|
|
|
|
|
$parser->read('/path/to/logfile'); |
810
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
print "Some errors were found:\n" if $parser->get_records(); |
812
|
|
|
|
|
|
|
|
813
|
|
|
|
|
|
|
See L<this important note|Text::Parser::Manual::ExtendedAWKSyntax/"Important note about quotes"> about using single quotes instead of double quotes. |
814
|
|
|
|
|
|
|
|
815
|
|
|
|
|
|
|
Here is an example that parses a table with field separators indicated by C<|> character: |
816
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
use Data::Dumper 'Dumper'; |
818
|
|
|
|
|
|
|
my $table_parser = Text::Parser->new( FS => qr/\s*[|]\s*/ ); |
819
|
|
|
|
|
|
|
$table_parser->add_rule( |
820
|
|
|
|
|
|
|
if => '$this->NF == 0', |
821
|
|
|
|
|
|
|
dont_record => 1 |
822
|
|
|
|
|
|
|
); |
823
|
|
|
|
|
|
|
$table_parser->add_rule( |
824
|
|
|
|
|
|
|
if => '$this->lines_parsed == 1', |
825
|
|
|
|
|
|
|
do => '~columns = [$this->fields()];' |
826
|
|
|
|
|
|
|
); |
827
|
|
|
|
|
|
|
$table_parser->add_rule( |
828
|
|
|
|
|
|
|
if => '$this->lines_parsed > 1', |
829
|
|
|
|
|
|
|
do => 'my %rec = (); |
830
|
|
|
|
|
|
|
foreach my $i (0..$#{~columns}) { |
831
|
|
|
|
|
|
|
my $k = ~columns->[$i]; |
832
|
|
|
|
|
|
|
$rec{$k} = $this->field($i); |
833
|
|
|
|
|
|
|
} |
834
|
|
|
|
|
|
|
return \%rec;', |
835
|
|
|
|
|
|
|
); |
836
|
|
|
|
|
|
|
$table_parser->read('table.txt'); |
837
|
|
|
|
|
|
|
print Dumper($table_parser->get_records()), "\n"; |
838
|
|
|
|
|
|
|
|
839
|
|
|
|
|
|
|
In the above example you see the use of a L<stashed variable|/"METHODS FOR ACCESSING STASHED VARIABLES"> named C<~columns>. Note that the sigil used here is not a Perl sigil, but is converted to native Perl code. In the above case, each record is a hash with fixed number of fields. |
840
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
More complex file-formats can be read and contents stored in a data-structure or an object. Here is an example: |
842
|
|
|
|
|
|
|
|
843
|
|
|
|
|
|
|
use strict; |
844
|
|
|
|
|
|
|
use warnings; |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
package ComplexFormatParser; |
847
|
|
|
|
|
|
|
|
848
|
|
|
|
|
|
|
use Text::Parser::RuleSpec; ## provides applies_rule + other sugar, imports Moose |
849
|
|
|
|
|
|
|
extends 'Text::Parser'; |
850
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
# This rule ignores all comments |
852
|
|
|
|
|
|
|
applies_rule ignore_comments => ( |
853
|
|
|
|
|
|
|
if => 'substr($1, 0, 1) eq "#"', |
854
|
|
|
|
|
|
|
dont_record => 1, |
855
|
|
|
|
|
|
|
); |
856
|
|
|
|
|
|
|
|
857
|
|
|
|
|
|
|
# An attribute of the parser class. |
858
|
|
|
|
|
|
|
has current_section => ( |
859
|
|
|
|
|
|
|
is => 'rw', |
860
|
|
|
|
|
|
|
isa => 'Str', |
861
|
|
|
|
|
|
|
default => undef, |
862
|
|
|
|
|
|
|
); |
863
|
|
|
|
|
|
|
|
864
|
|
|
|
|
|
|
applies_rule get_header => ( |
865
|
|
|
|
|
|
|
if => '$1 eq "SECTION"', |
866
|
|
|
|
|
|
|
do => '$this->current_section($2);', # $this : this parser object |
867
|
|
|
|
|
|
|
dont_record => 1, |
868
|
|
|
|
|
|
|
); |
869
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
# ... More can be done |
871
|
|
|
|
|
|
|
|
872
|
|
|
|
|
|
|
package main; |
873
|
|
|
|
|
|
|
use ComplexFormatParser; |
874
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
my $p = ComplexFormatParser->new(); |
876
|
|
|
|
|
|
|
$p->read('myfile.complex.fmt'); |
877
|
|
|
|
|
|
|
|
878
|
|
|
|
|
|
|
=head1 RATIONALE |
879
|
|
|
|
|
|
|
|
880
|
|
|
|
|
|
|
The L<need|Text::Parser::Manual/MOTIVATION> for this class stems from the fact that text parsing is the most common thing that programmers do, and yet there is no lean, simple way to do it in Perl. Most programmers still write boilerplate code with a C<while> loop. |
881
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
Instead C<Text::Parser> allows programmers to parse text with simple, self-explanatory L<rules|Text::Parser::Manual::ExtendedAWKSyntax>, whose structure is very similar to L<AWK|https://books.google.com/books/about/The_AWK_Programming_Language.html?id=53ueQgAACAAJ>, but extends beyond the capability of AWK. |
883
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
I<B<Sidenote:>> Incidentally, AWK is L<one of the ancestors of Perl|http://history.perl.org/PerlTimeline.html>! One would have expected Perl to do way better than AWK. But while you can use Perl to do what AWK already does, that is usually limited to one-liners like C<perl -lane>. Even C<perl -lan script.pl> is not meant for serious projects. And it seems that L<some people still prefer AWK to Perl|https://aplawrence.com/Unixart/awk-vs.perl.html>. This is not looking good. |
885
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
=head1 OVERVIEW |
887
|
|
|
|
|
|
|
|
888
|
|
|
|
|
|
|
With C<Text::Parser>, a developer can focus on specifying a grammar and then simply C<read> the file. The C<L<read|/read>> method automatically runs each rule collecting records from the text input into an internal array. Finally, C<L<get_records|/get_records>> can retrieve the records. |
889
|
|
|
|
|
|
|
|
890
|
|
|
|
|
|
|
Since C<Text::Parser> is a class, a programmer can subclass it to parse very complex file formats. L<Text::Parser::RuleSpec> provides intuitive rule sugar. Use of L<Moose> is encouraged. And data from parsed files can be turned into very complex data-structures or even objects. In this case, you wouldn't need to use C<get_records>. |
891
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
With B<L<Text::Parser>> programmers have the elegance and simplicity of AWK combined with the power of Perl at their disposal. |
893
|
|
|
|
|
|
|
|
894
|
|
|
|
|
|
|
=head1 CONSTRUCTOR |
895
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
=head2 new |
897
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
Takes optional attributes as in example below. See section L<ATTRIBUTES|/ATTRIBUTES> for a list of the attributes and their description. |
899
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
my $parser = Text::Parser->new(); |
901
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
my $parser2 = Text::Parser->new( line_wrap_style => 'trailing_backslash' ); |
903
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
=head1 ATTRIBUTES |
905
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
The attributes below can be used as options to the C<new> constructor. Each attribute has an accessor with the same name. |
907
|
|
|
|
|
|
|
|
908
|
|
|
|
|
|
|
=head2 auto_chomp |
909
|
|
|
|
|
|
|
|
910
|
|
|
|
|
|
|
Read-write attribute. Takes a boolean value as parameter. Defaults to C<0>. |
911
|
|
|
|
|
|
|
|
912
|
|
|
|
|
|
|
print "Parser will chomp lines automatically\n" if $parser->auto_chomp; |
913
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
=head2 auto_split |
915
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
Read-write boolean attribute. Defaults to C<0> (false). Indicates if the parser will automatically split every line into fields. |
917
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
If it is set to a true value, each line will be split into fields, and L<a set of methods|/"METHODS USED ONLY IN RULES AND SUBCLASSES"> become accessible to C<L<save_record|/save_record>> or the rules. |
919
|
|
|
|
|
|
|
|
920
|
|
|
|
|
|
|
=head2 auto_trim |
921
|
|
|
|
|
|
|
|
922
|
|
|
|
|
|
|
Read-write attribute. The values this can take are shown under the C<L<new|/new>> constructor also. Defaults to C<'n'> (neither side spaces will be trimmed). |
923
|
|
|
|
|
|
|
|
924
|
|
|
|
|
|
|
$parser->auto_trim('l'); # 'l' (left), 'r' (right), 'b' (both), 'n' (neither) (Default) |
925
|
|
|
|
|
|
|
|
926
|
|
|
|
|
|
|
=head2 custom_line_trimmer |
927
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
Read-write attribute which can be set to a custom subroutine that trims each line before applying any rules or saving any records. The function is expected to take a single argument containing the complete un-trimmed line, and is expected to return a manipulated line. |
929
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
sub _cust_trimmer { |
931
|
|
|
|
|
|
|
my $line = shift; |
932
|
|
|
|
|
|
|
chomp $line; |
933
|
|
|
|
|
|
|
return $line; |
934
|
|
|
|
|
|
|
} |
935
|
|
|
|
|
|
|
|
936
|
|
|
|
|
|
|
$parser->custom_line_trimmer(\&_cust_trimmer); |
937
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
B<Note:> If you set this attribute, you are entirely responsible for the trimming. Poorly written routines could causing the C<auto_split> operation to misbehave. |
939
|
|
|
|
|
|
|
|
940
|
|
|
|
|
|
|
By default it is undefined. |
941
|
|
|
|
|
|
|
|
942
|
|
|
|
|
|
|
=head2 FS |
943
|
|
|
|
|
|
|
|
944
|
|
|
|
|
|
|
Read-write attribute that can be used to specify the field separator to be used by the C<auto_split> feature. It must be a regular expression reference enclosed in the C<qr> function, like C<qr/\s+|[,]/> which will split across either spaces or commas. The default value for this attribute is C<qr/\s+/>. |
945
|
|
|
|
|
|
|
|
946
|
|
|
|
|
|
|
The name for this attribute comes from the built-in C<FS> variable in the popular L<GNU Awk program|https://www.gnu.org/software/gawk/gawk.html>. The ability to use a regular expression is an upgrade from AWK. |
947
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
$parser->FS( qr/\s+\(*|\s*\)/ ); |
949
|
|
|
|
|
|
|
|
950
|
|
|
|
|
|
|
C<FS> I<can> be changed from within a rule. Changes made even within a rule would take effect on the immediately next line read. |
951
|
|
|
|
|
|
|
|
952
|
|
|
|
|
|
|
=head2 indentation_str |
953
|
|
|
|
|
|
|
|
954
|
|
|
|
|
|
|
This can be used to set the indentation character or string. By default it is a single space C< >. But you may want to set it to be a tab (C<\t>) or perhaps some other character like a hyphen (C<->) or even a string (C< -E<gt>>). This attribute is used only if C<L<track_indentation|/track_indentation>> is set. |
955
|
|
|
|
|
|
|
|
956
|
|
|
|
|
|
|
=head2 line_wrap_style |
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
Read-write attribute used as a quick way to select from commonly known line-wrapping styles. If the target text format allows line-wrapping this attribute allows the programmer to write rules as if they were on a single line. |
959
|
|
|
|
|
|
|
|
960
|
|
|
|
|
|
|
$parser->line_wrap_style('trailing_backslash'); |
961
|
|
|
|
|
|
|
|
962
|
|
|
|
|
|
|
Allowed values are: |
963
|
|
|
|
|
|
|
|
964
|
|
|
|
|
|
|
trailing_backslash - very common style ending lines with \ |
965
|
|
|
|
|
|
|
and continuing on the next line |
966
|
|
|
|
|
|
|
|
967
|
|
|
|
|
|
|
spice - used for SPICE syntax, where the (+) |
968
|
|
|
|
|
|
|
+ symbol continues content of last line |
969
|
|
|
|
|
|
|
|
970
|
|
|
|
|
|
|
just_next_line - used in simple text files written to be |
971
|
|
|
|
|
|
|
humanly-readable. New paragraphs start |
972
|
|
|
|
|
|
|
on a new line after a blank line. |
973
|
|
|
|
|
|
|
|
974
|
|
|
|
|
|
|
slurp - used to "slurp" the whole file into |
975
|
|
|
|
|
|
|
a single line. |
976
|
|
|
|
|
|
|
|
977
|
|
|
|
|
|
|
custom - user-defined style. User must specify |
978
|
|
|
|
|
|
|
value of multiline_type and define |
979
|
|
|
|
|
|
|
two custom unwrap routines using the |
980
|
|
|
|
|
|
|
custom_line_unwrap_routines method |
981
|
|
|
|
|
|
|
when custom is chosen. |
982
|
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
When C<line_wrap_style> is set to one of these values, the value of C<multiline_type> is automatically set to an appropriate value. Read more about L<handling the common line-wrapping styles|/"Common line-wrapping styles">. |
984
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
=head2 multiline_type |
986
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
Read-write attribute used mainly if the programmer wishes to specify custom line-unwrapping methods. By default, this attribute is C<undef>, i.e., the target text format will not have wrapped lines. |
988
|
|
|
|
|
|
|
|
989
|
|
|
|
|
|
|
$parser->line_wrap_style('custom'); |
990
|
|
|
|
|
|
|
$parser->multiline_type('join_next'); |
991
|
|
|
|
|
|
|
|
992
|
|
|
|
|
|
|
my $mult = $parser->multiline_type; |
993
|
|
|
|
|
|
|
print "Parser is a multi-line parser of type: $mult" if defined $mult; |
994
|
|
|
|
|
|
|
|
995
|
|
|
|
|
|
|
Allowed values for C<multiline_type> are described below, but it can also be set back to C<undef>. |
996
|
|
|
|
|
|
|
|
997
|
|
|
|
|
|
|
=over 4 |
998
|
|
|
|
|
|
|
|
999
|
|
|
|
|
|
|
=item * |
1000
|
|
|
|
|
|
|
|
1001
|
|
|
|
|
|
|
If the target format allows line-wrapping I<to the B<next>> line, set C<multiline_type> to C<join_next>. |
1002
|
|
|
|
|
|
|
|
1003
|
|
|
|
|
|
|
=item * |
1004
|
|
|
|
|
|
|
|
1005
|
|
|
|
|
|
|
If the target format allows line-wrapping I<from the B<last>> line, set C<multiline_type> to C<join_last>. |
1006
|
|
|
|
|
|
|
|
1007
|
|
|
|
|
|
|
=back |
1008
|
|
|
|
|
|
|
|
1009
|
|
|
|
|
|
|
To know more about how to use this, read about L<specifying custom line-unwrap routines|/"Specifying custom line-unwrap routines">. |
1010
|
|
|
|
|
|
|
|
1011
|
|
|
|
|
|
|
=head2 track_indentation |
1012
|
|
|
|
|
|
|
|
1013
|
|
|
|
|
|
|
This boolean attribute enables tracking of the number of indentation characters are there at the beginning of each line. In some text formats, this is a very important information that can indicate the depth of some data. By default, this is false. When set to a true value, you can get the number of indentation characters on a given line with the C<L<this_indent|/this_indent>> method. |
1014
|
|
|
|
|
|
|
|
1015
|
|
|
|
|
|
|
$parser->track_indentation(1); |
1016
|
|
|
|
|
|
|
|
1017
|
|
|
|
|
|
|
Now you can use C<this_indent> method in the rules: |
1018
|
|
|
|
|
|
|
|
1019
|
|
|
|
|
|
|
$parser->add_rule(if => '$this->this_indent > 0', do => '~num_indented ++;') |
1020
|
|
|
|
|
|
|
|
1021
|
|
|
|
|
|
|
=head1 METHODS FOR SPECIFYING RULES |
1022
|
|
|
|
|
|
|
|
1023
|
|
|
|
|
|
|
These are meant to be called from the C<::main> program or within subclasses. |
1024
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
=head2 add_rule |
1026
|
|
|
|
|
|
|
|
1027
|
|
|
|
|
|
|
Takes a hash as input. The keys of this hash must be the attributes of the L<Text::Parser::Rule> class constructor and the values should also meet the requirements of that constructor. |
1028
|
|
|
|
|
|
|
|
1029
|
|
|
|
|
|
|
$parser->add_rule(do => '', dont_record => 1); # Empty rule: does nothing |
1030
|
|
|
|
|
|
|
$parser->add_rule(if => 'm/li/, do => 'print', dont_record); # Prints lines with 'li' |
1031
|
|
|
|
|
|
|
$parser->add_rule( do => 'uc($3)' ); # Saves records of upper-cased third elements |
1032
|
|
|
|
|
|
|
|
1033
|
|
|
|
|
|
|
Calling this method without any arguments will throw an exception. The method internally sets the C<auto_split> attribute. |
1034
|
|
|
|
|
|
|
|
1035
|
|
|
|
|
|
|
=head2 clear_rules |
1036
|
|
|
|
|
|
|
|
1037
|
|
|
|
|
|
|
Takes no arguments, returns nothing. Clears the rules that were added to the object. |
1038
|
|
|
|
|
|
|
|
1039
|
|
|
|
|
|
|
$parser->clear_rules; |
1040
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
This is useful to be able to re-use the parser after a C<read> call, to parse another text with another set of rules. The C<clear_rules> method does clear even the rules set up by C<L<BEGIN_rule|/BEGIN_rule>> and C<L<END_rule|/END_rule>>. |
1042
|
|
|
|
|
|
|
|
1043
|
|
|
|
|
|
|
=head2 BEGIN_rule |
1044
|
|
|
|
|
|
|
|
1045
|
|
|
|
|
|
|
Takes a hash input like C<add_rule>, but C<if> and C<continue_to_next> keys will be ignored. |
1046
|
|
|
|
|
|
|
|
1047
|
|
|
|
|
|
|
$parser->BEGIN_rule(do => '~count = 0;'); |
1048
|
|
|
|
|
|
|
|
1049
|
|
|
|
|
|
|
=over 4 |
1050
|
|
|
|
|
|
|
|
1051
|
|
|
|
|
|
|
=item * |
1052
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
Since any C<if> key is ignored, the C<do> key is required. Multiple calls to C<BEGIN_rule> will append to the previous calls; meaning, the actions of previous calls will be included. |
1054
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
=item * |
1056
|
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
The C<BEGIN_rule> is mainly used to initialize some variables. |
1058
|
|
|
|
|
|
|
|
1059
|
|
|
|
|
|
|
=item * |
1060
|
|
|
|
|
|
|
|
1061
|
|
|
|
|
|
|
By default C<dont_record> is set true. User I<can> change this and set C<dont_record> as false, thus forcing a record to be saved even before reading the first line of text. |
1062
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
=back |
1064
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
=head2 END_rule |
1066
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
Takes a hash input like C<add_rule>, but C<if> and C<continue_to_next> keys will be ignored. Similar to C<BEGIN_rule>, but the actions in the C<END_rule> will be executed at the end of the C<read> method. |
1068
|
|
|
|
|
|
|
|
1069
|
|
|
|
|
|
|
$parser->END_rule(do => 'print ~count, "\n";'); |
1070
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
=over 4 |
1072
|
|
|
|
|
|
|
|
1073
|
|
|
|
|
|
|
=item * |
1074
|
|
|
|
|
|
|
|
1075
|
|
|
|
|
|
|
Since any C<if> key is ignored, the C<do> key is required. Multiple calls to C<END_rule> will append to the previous calls; meaning, the actions of previous calls will be included. |
1076
|
|
|
|
|
|
|
|
1077
|
|
|
|
|
|
|
=item * |
1078
|
|
|
|
|
|
|
|
1079
|
|
|
|
|
|
|
The C<END_rule> is mainly used to do final processing of collected records. |
1080
|
|
|
|
|
|
|
|
1081
|
|
|
|
|
|
|
=item * |
1082
|
|
|
|
|
|
|
|
1083
|
|
|
|
|
|
|
By default C<dont_record> is set true. User I<can> change this and set C<dont_record> as false, thus forcing a record to be saved after the end rule is processed. |
1084
|
|
|
|
|
|
|
|
1085
|
|
|
|
|
|
|
=back |
1086
|
|
|
|
|
|
|
|
1087
|
|
|
|
|
|
|
=head1 METHODS USED ONLY IN RULES AND SUBCLASSES |
1088
|
|
|
|
|
|
|
|
1089
|
|
|
|
|
|
|
These methods can be used only inside rules, or methods of a subclass. Some of these methods are available only when C<auto_split> is on. They are listed as follows: |
1090
|
|
|
|
|
|
|
|
1091
|
|
|
|
|
|
|
=over 4 |
1092
|
|
|
|
|
|
|
|
1093
|
|
|
|
|
|
|
=item * |
1094
|
|
|
|
|
|
|
|
1095
|
|
|
|
|
|
|
L<NF|Text::Parser::AutoSplit/NF> - number of fields on this line |
1096
|
|
|
|
|
|
|
|
1097
|
|
|
|
|
|
|
=item * |
1098
|
|
|
|
|
|
|
|
1099
|
|
|
|
|
|
|
L<fields|Text::Parser::AutoSplit/fields> - all the fields as an array of strings ; trailing C<\n> removed |
1100
|
|
|
|
|
|
|
|
1101
|
|
|
|
|
|
|
=item * |
1102
|
|
|
|
|
|
|
|
1103
|
|
|
|
|
|
|
L<field|Text::Parser::AutoSplit/field> - access individual elements of the array above ; negative arguments count from back |
1104
|
|
|
|
|
|
|
|
1105
|
|
|
|
|
|
|
=item * |
1106
|
|
|
|
|
|
|
|
1107
|
|
|
|
|
|
|
L<field_range|Text::Parser::AutoSplit/field_range> - array of fields in the given range of indices ; negative arguments allowed |
1108
|
|
|
|
|
|
|
|
1109
|
|
|
|
|
|
|
=item * |
1110
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
L<join_range|Text::Parser::AutoSplit/join_range> - join the fields in the range of indices ; negative arguments allowed |
1112
|
|
|
|
|
|
|
|
1113
|
|
|
|
|
|
|
=item * |
1114
|
|
|
|
|
|
|
|
1115
|
|
|
|
|
|
|
L<find_field|Text::Parser::AutoSplit/find_field> - returns field for which a given subroutine is true ; each field is passed to the subroutine in C<$_> |
1116
|
|
|
|
|
|
|
|
1117
|
|
|
|
|
|
|
=item * |
1118
|
|
|
|
|
|
|
|
1119
|
|
|
|
|
|
|
L<find_field_index|Text::Parser::AutoSplit/find_field_index> - similar to above, except it returns the index of the field instead of the field itself |
1120
|
|
|
|
|
|
|
|
1121
|
|
|
|
|
|
|
=item * |
1122
|
|
|
|
|
|
|
|
1123
|
|
|
|
|
|
|
L<splice_fields|Text::Parser::AutoSplit/splice_fields> - like the native Perl C<splice> |
1124
|
|
|
|
|
|
|
|
1125
|
|
|
|
|
|
|
=back |
1126
|
|
|
|
|
|
|
|
1127
|
|
|
|
|
|
|
Other methods described below are also to be used only inside a rule, or inside methods called by the rules. |
1128
|
|
|
|
|
|
|
|
1129
|
|
|
|
|
|
|
=head2 abort_reading |
1130
|
|
|
|
|
|
|
|
1131
|
|
|
|
|
|
|
Takes no arguments. Returns C<1>. Aborts C<read>ing any more lines, and C<read> method exits gracefully as if nothing unusual happened. |
1132
|
|
|
|
|
|
|
|
1133
|
|
|
|
|
|
|
$parser->add_rule( |
1134
|
|
|
|
|
|
|
do => '$this->abort_reading;', |
1135
|
|
|
|
|
|
|
if => '$1 eq "EOF"', |
1136
|
|
|
|
|
|
|
dont_record => 1, |
1137
|
|
|
|
|
|
|
); |
1138
|
|
|
|
|
|
|
|
1139
|
|
|
|
|
|
|
=head2 this_indent |
1140
|
|
|
|
|
|
|
|
1141
|
|
|
|
|
|
|
Takes no arguments, and returns the number of indentation characters found at the front of the current line. This can be called from within a rule: |
1142
|
|
|
|
|
|
|
|
1143
|
|
|
|
|
|
|
$parser->add_rule( if => '$this->this_indent > 0', ); |
1144
|
|
|
|
|
|
|
|
1145
|
|
|
|
|
|
|
=head2 this_line |
1146
|
|
|
|
|
|
|
|
1147
|
|
|
|
|
|
|
Takes no arguments, and returns the current line being parsed. For example: |
1148
|
|
|
|
|
|
|
|
1149
|
|
|
|
|
|
|
$parser->add_rule( |
1150
|
|
|
|
|
|
|
if => 'length($this->this_line) > 256', |
1151
|
|
|
|
|
|
|
); |
1152
|
|
|
|
|
|
|
## Saves all lines longer than 256 characters |
1153
|
|
|
|
|
|
|
|
1154
|
|
|
|
|
|
|
Inside rules, instead of using this method, one may also use C<$_>: |
1155
|
|
|
|
|
|
|
|
1156
|
|
|
|
|
|
|
$parser->add_rule( |
1157
|
|
|
|
|
|
|
if => 'length($_) > 256', |
1158
|
|
|
|
|
|
|
); |
1159
|
|
|
|
|
|
|
|
1160
|
|
|
|
|
|
|
=head1 METHODS FOR READING INPUT |
1161
|
|
|
|
|
|
|
|
1162
|
|
|
|
|
|
|
=head2 filename |
1163
|
|
|
|
|
|
|
|
1164
|
|
|
|
|
|
|
Takes an optional string argument containing the name of a file. Returns the name of the file that was last opened if any. Returns C<undef> if no file has been opened. |
1165
|
|
|
|
|
|
|
|
1166
|
|
|
|
|
|
|
print "Last read ", $parser->filename, "\n"; |
1167
|
|
|
|
|
|
|
|
1168
|
|
|
|
|
|
|
The value stored is "persistent" - meaning that the method remembers the last file that was C<L<read|/read>>. |
1169
|
|
|
|
|
|
|
|
1170
|
|
|
|
|
|
|
$parser->read(shift @ARGV); |
1171
|
|
|
|
|
|
|
print $parser->filename(), ":\n", |
1172
|
|
|
|
|
|
|
"=" x (length($parser->filename())+1), |
1173
|
|
|
|
|
|
|
"\n", |
1174
|
|
|
|
|
|
|
$parser->get_records(), |
1175
|
|
|
|
|
|
|
"\n"; |
1176
|
|
|
|
|
|
|
|
1177
|
|
|
|
|
|
|
A C<read> call with a filehandle, will clear the last file name. |
1178
|
|
|
|
|
|
|
|
1179
|
|
|
|
|
|
|
$parser->read(\*MYFH); |
1180
|
|
|
|
|
|
|
print "Last file name is lost\n" if not defined $parser->filename(); |
1181
|
|
|
|
|
|
|
|
1182
|
|
|
|
|
|
|
=head2 filehandle |
1183
|
|
|
|
|
|
|
|
1184
|
|
|
|
|
|
|
Takes an optional argument, that is a filehandle C<GLOB> (such as C<\*STDIN>) or an object of the C<FileHandle> class. Returns the filehandle last saved, or C<undef> if none was saved. |
1185
|
|
|
|
|
|
|
|
1186
|
|
|
|
|
|
|
my $fh = $parser->filehandle(); |
1187
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
Like C<L<filename|/filename>>, C<filehandle> is also "persistent". Its old value is lost when either C<filename> is set, or C<read> is called with a filename. |
1189
|
|
|
|
|
|
|
|
1190
|
|
|
|
|
|
|
$parser->read(\*STDOUT); |
1191
|
|
|
|
|
|
|
my $lastfh = $parser->filehandle(); # Will return glob of STDOUT |
1192
|
|
|
|
|
|
|
|
1193
|
|
|
|
|
|
|
=head2 read |
1194
|
|
|
|
|
|
|
|
1195
|
|
|
|
|
|
|
Takes a single optional argument that can be either a string containing the name of the file, or a filehandle reference (a C<GLOB>) like C<\*STDIN> or an object of the C<L<FileHandle>> class. |
1196
|
|
|
|
|
|
|
|
1197
|
|
|
|
|
|
|
$parser->read($filename); # Read the file |
1198
|
|
|
|
|
|
|
$parser->read(\*STDIN); # Read the filehandle |
1199
|
|
|
|
|
|
|
|
1200
|
|
|
|
|
|
|
The above could also be done in two steps if the developer so chooses. |
1201
|
|
|
|
|
|
|
|
1202
|
|
|
|
|
|
|
$parser->filename($filename); |
1203
|
|
|
|
|
|
|
$parser->read(); # equiv: $parser->read($filename) |
1204
|
|
|
|
|
|
|
|
1205
|
|
|
|
|
|
|
$parser->filehandle(\*STDIN); |
1206
|
|
|
|
|
|
|
$parser->read(); # equiv: $parser->read(\*STDIN) |
1207
|
|
|
|
|
|
|
|
1208
|
|
|
|
|
|
|
The method returns once all records have been read, or if an exception is thrown, or if reading has been aborted with the C<L<abort_reading|/abort_reading>> method. |
1209
|
|
|
|
|
|
|
|
1210
|
|
|
|
|
|
|
Any C<close> operation will be handled (even if any exception is thrown), as long as C<read> is called with a file name parameter - not if you call with a file handle or C<GLOB> parameter. |
1211
|
|
|
|
|
|
|
|
1212
|
|
|
|
|
|
|
$parser->read('myfile.txt'); # Will close file automatically |
1213
|
|
|
|
|
|
|
|
1214
|
|
|
|
|
|
|
open MYFH, "<myfile.txt" or die "Can't open file myfile.txt at "; |
1215
|
|
|
|
|
|
|
$parser->read(\*MYFH); # Will not close MYFH |
1216
|
|
|
|
|
|
|
close MYFH; |
1217
|
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
=head1 METHODS FOR HANDLING RECORDS |
1219
|
|
|
|
|
|
|
|
1220
|
|
|
|
|
|
|
=head2 get_records |
1221
|
|
|
|
|
|
|
|
1222
|
|
|
|
|
|
|
Takes no arguments. Returns an array containing all the records saved by the parser. |
1223
|
|
|
|
|
|
|
|
1224
|
|
|
|
|
|
|
foreach my $record ( $parser->get_records ) { |
1225
|
|
|
|
|
|
|
$i++; |
1226
|
|
|
|
|
|
|
print "Record: $i: ", $record, "\n"; |
1227
|
|
|
|
|
|
|
} |
1228
|
|
|
|
|
|
|
|
1229
|
|
|
|
|
|
|
=head2 last_record |
1230
|
|
|
|
|
|
|
|
1231
|
|
|
|
|
|
|
Takes no arguments and returns the last saved record. Leaves the saved records untouched. |
1232
|
|
|
|
|
|
|
|
1233
|
|
|
|
|
|
|
my $last_rec = $parser->last_record; |
1234
|
|
|
|
|
|
|
|
1235
|
|
|
|
|
|
|
=head2 pop_record |
1236
|
|
|
|
|
|
|
|
1237
|
|
|
|
|
|
|
Takes no arguments and pops the last saved record. |
1238
|
|
|
|
|
|
|
|
1239
|
|
|
|
|
|
|
my $last_rec = $parser->pop_record; |
1240
|
|
|
|
|
|
|
|
1241
|
|
|
|
|
|
|
=head2 push_records |
1242
|
|
|
|
|
|
|
|
1243
|
|
|
|
|
|
|
Takes an array as input, and stores each element as a separate record. Returns the number of elements in the new array. |
1244
|
|
|
|
|
|
|
|
1245
|
|
|
|
|
|
|
$parser->push_records(qw(insert these as separate records)); |
1246
|
|
|
|
|
|
|
|
1247
|
|
|
|
|
|
|
=head1 METHODS FOR ACCESSING STASHED VARIABLES |
1248
|
|
|
|
|
|
|
|
1249
|
|
|
|
|
|
|
Stashed variables can be data structures or simple scalar variables stored as elements in the parser object. Hence they are accessible across different rules. Stashed variables start with a tilde (~). So you could set up rules like these: |
1250
|
|
|
|
|
|
|
|
1251
|
|
|
|
|
|
|
$parser->BEGIN_rule( do => '~count=0;' ); |
1252
|
|
|
|
|
|
|
$parser->add_rule( if => '$1 eq "SECTION"', do => '~count++;' ); |
1253
|
|
|
|
|
|
|
|
1254
|
|
|
|
|
|
|
In the above rule C<~count> is a stashed variable. Internally this is just a hash element with key named C<count>. After the C<read> call is over, this variable can be accessed. |
1255
|
|
|
|
|
|
|
|
1256
|
|
|
|
|
|
|
$parser->read('some_text_file.txt'); |
1257
|
|
|
|
|
|
|
print "Found ", $parser->stashed('count'), " sections in file.\n"; |
1258
|
|
|
|
|
|
|
|
1259
|
|
|
|
|
|
|
Stashed variables that are created entirely within the rules are forgotten at the beginning of the next C<read> call. This means, you can C<read> another text file and don't have to bother to clear out the stashed variable C<~count>. |
1260
|
|
|
|
|
|
|
|
1261
|
|
|
|
|
|
|
$parser->read('another_text_file.txt'); |
1262
|
|
|
|
|
|
|
print "Found ", $parser->stashed('count'), " sections in file.\n"; |
1263
|
|
|
|
|
|
|
|
1264
|
|
|
|
|
|
|
In contrast, stashed variables created by calling C<prestash> continue to persist for subsequent calls of C<read>, unless an explicit call to C<forget> names these pre-stashed variables. |
1265
|
|
|
|
|
|
|
|
1266
|
|
|
|
|
|
|
$parser->prestash( max_err => 100 ); |
1267
|
|
|
|
|
|
|
$parser->BEGIN_rule( do => '~err_count = 0;' ); |
1268
|
|
|
|
|
|
|
$parser->add_rule( |
1269
|
|
|
|
|
|
|
if => '$1 eq "ERROR:" && ~err_count < ~max_err', |
1270
|
|
|
|
|
|
|
do => '~err_count++;', |
1271
|
|
|
|
|
|
|
continue_to_next => 1, |
1272
|
|
|
|
|
|
|
); |
1273
|
|
|
|
|
|
|
$parser->add_rule( |
1274
|
|
|
|
|
|
|
if => '$1 eq "ERROR:" && ~err_count == ~max_err', |
1275
|
|
|
|
|
|
|
do => '$this->abort_reading;', |
1276
|
|
|
|
|
|
|
); |
1277
|
|
|
|
|
|
|
$parser->read('first.log'); |
1278
|
|
|
|
|
|
|
print "Top 100 errors:\n", $parser->get_records, "\n"; |
1279
|
|
|
|
|
|
|
|
1280
|
|
|
|
|
|
|
$parser->read('another.log'); # max_err is still set to 100, but err_count is forgotten and reset to 0 by the BEGIN_rule |
1281
|
|
|
|
|
|
|
print "Top 100 errors:\n", $parser->get_records, "\n"; |
1282
|
|
|
|
|
|
|
|
1283
|
|
|
|
|
|
|
=head2 forget |
1284
|
|
|
|
|
|
|
|
1285
|
|
|
|
|
|
|
Takes an optional list of string arguments which must be the names of stashed variables. This method forgets those stashed variables for ever. So be sure you really intend to do this. In list context, this method returns the values of the variables whose names were passed to the method. In scalar context, it returns the last value of the last stashed variable passed. |
1286
|
|
|
|
|
|
|
|
1287
|
|
|
|
|
|
|
my $pop_and_forget_me = $parser->forget('forget_me_totally', 'pop_and_forget_me'); |
1288
|
|
|
|
|
|
|
|
1289
|
|
|
|
|
|
|
Inside rules, you could simply C<delete> the stashed variable like this: |
1290
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
$parser->add_rule( do => 'delete ~forget_me;' ); |
1292
|
|
|
|
|
|
|
|
1293
|
|
|
|
|
|
|
The above C<delete> statement works because the stashed variable C<~forget_me> is just a hash key named C<forget_me> internally. Using this on pre-stashed variables, will only temporarily delete the variable. It will be present in subsequent calls to C<read>. If you want to delete it completely call C<forget> with the pre-stashed variable name as an argument. |
1294
|
|
|
|
|
|
|
|
1295
|
|
|
|
|
|
|
When no arguments are passed, it clears all stashed variables (not pre-stashed). |
1296
|
|
|
|
|
|
|
|
1297
|
|
|
|
|
|
|
$parser->forget; |
1298
|
|
|
|
|
|
|
|
1299
|
|
|
|
|
|
|
Note that when C<forget> is called with no arguments, pre-stashed variables are not deleted and are still accessible in subsequent calls to C<read>. To forget a pre-stashed variable, it needs to be explicitly named in a call to forget. Then it is forgotten. |
1300
|
|
|
|
|
|
|
|
1301
|
|
|
|
|
|
|
A call to C<forget> method is done without any arguments, right before C<read> starts reading a new text input. That is how we can reset the values of stashed variables, but still retain pre-stashed variables. |
1302
|
|
|
|
|
|
|
|
1303
|
|
|
|
|
|
|
=head2 has_empty_stash |
1304
|
|
|
|
|
|
|
|
1305
|
|
|
|
|
|
|
Takes no arguments and returns a true value if the stash of variables is empty (i.e., no stashed variables are present). If not, it returns a boolean false. |
1306
|
|
|
|
|
|
|
|
1307
|
|
|
|
|
|
|
if ( not $parser->has_empty_stash ) { |
1308
|
|
|
|
|
|
|
my $myvar = $parser->stashed('myvar'); |
1309
|
|
|
|
|
|
|
print "myvar = $myvar\n"; |
1310
|
|
|
|
|
|
|
} |
1311
|
|
|
|
|
|
|
|
1312
|
|
|
|
|
|
|
=head2 has_stashed |
1313
|
|
|
|
|
|
|
|
1314
|
|
|
|
|
|
|
Takes a single string argument and returns a boolean indicating if there is a stashed variable with that name or not: |
1315
|
|
|
|
|
|
|
|
1316
|
|
|
|
|
|
|
if ( $parser->has_stashed('stashed_var') ) { |
1317
|
|
|
|
|
|
|
print "Here is what stashed_var contains: ", $parser->stashed('stashed_var'); |
1318
|
|
|
|
|
|
|
} |
1319
|
|
|
|
|
|
|
|
1320
|
|
|
|
|
|
|
Inside rules you could check this with the C<exists> keyword: |
1321
|
|
|
|
|
|
|
|
1322
|
|
|
|
|
|
|
$parser->add_rule( if => 'exists ~stashed_var' ); |
1323
|
|
|
|
|
|
|
|
1324
|
|
|
|
|
|
|
=head2 prestash |
1325
|
|
|
|
|
|
|
|
1326
|
|
|
|
|
|
|
Takes an even number of arguments, or a hash, with variable name and value as pairs. This is useful to preset some stash variables before C<read> is called so that the rules have some variables accessible inside them. The main difference between pre-stashed variables created via C<prestash> and those created in the rules or using C<stashed> is that the pre-stashed ones are static. |
1327
|
|
|
|
|
|
|
|
1328
|
|
|
|
|
|
|
$parser->prestash(pattern => 'string'); |
1329
|
|
|
|
|
|
|
$parser->add_rule( if => 'my $patt = ~pattern; m/$patt/;' ); |
1330
|
|
|
|
|
|
|
|
1331
|
|
|
|
|
|
|
You may change the value of a C<prestash>ed variable inside any of the rules. |
1332
|
|
|
|
|
|
|
|
1333
|
|
|
|
|
|
|
=head2 stashed |
1334
|
|
|
|
|
|
|
|
1335
|
|
|
|
|
|
|
Takes an optional list of string arguments each with the name of a stashed variable you want to query, i.e., get the value of. In list context, it returns their values in the same order as the queried variables, and in scalar context it returns the value of the last variable queried. |
1336
|
|
|
|
|
|
|
|
1337
|
|
|
|
|
|
|
my (%var_vals) = $parser->stashed; |
1338
|
|
|
|
|
|
|
my (@vars) = $parser->stashed( qw(first second third) ); |
1339
|
|
|
|
|
|
|
my $third = $parser->stashed( qw(first second third) ); # returns value of last variable listed |
1340
|
|
|
|
|
|
|
my $myvar = $parser->stashed('myvar'); |
1341
|
|
|
|
|
|
|
|
1342
|
|
|
|
|
|
|
Or you could do this: |
1343
|
|
|
|
|
|
|
|
1344
|
|
|
|
|
|
|
use Data::Dumper 'Dumper'; |
1345
|
|
|
|
|
|
|
|
1346
|
|
|
|
|
|
|
if ( $parser->has_empty_stash ) { |
1347
|
|
|
|
|
|
|
print "Nothing on my stash\n"; |
1348
|
|
|
|
|
|
|
} else { |
1349
|
|
|
|
|
|
|
my %stash = $parser->stashed; |
1350
|
|
|
|
|
|
|
print Dumper(\%stash), "\n"; |
1351
|
|
|
|
|
|
|
} |
1352
|
|
|
|
|
|
|
|
1353
|
|
|
|
|
|
|
=head1 MISCELLANEOUS METHODS |
1354
|
|
|
|
|
|
|
|
1355
|
|
|
|
|
|
|
=head2 lines_parsed |
1356
|
|
|
|
|
|
|
|
1357
|
|
|
|
|
|
|
Takes no arguments. Returns the number of lines last parsed. Every call to C<read>, causes the value to be auto-reset. |
1358
|
|
|
|
|
|
|
|
1359
|
|
|
|
|
|
|
print $parser->lines_parsed, " lines were parsed\n"; |
1360
|
|
|
|
|
|
|
|
1361
|
|
|
|
|
|
|
=head2 has_aborted |
1362
|
|
|
|
|
|
|
|
1363
|
|
|
|
|
|
|
Takes no arguments, returns a boolean to indicate if text reading was aborted in the middle. |
1364
|
|
|
|
|
|
|
|
1365
|
|
|
|
|
|
|
print "Aborted\n" if $parser->has_aborted(); |
1366
|
|
|
|
|
|
|
|
1367
|
|
|
|
|
|
|
=head2 custom_line_unwrap_routines |
1368
|
|
|
|
|
|
|
|
1369
|
|
|
|
|
|
|
This method should be used only when the line-wrapping supported by the text format is not already among the L<known line-wrapping styles supported|/"Common line-wrapping styles">. |
1370
|
|
|
|
|
|
|
|
1371
|
|
|
|
|
|
|
Takes a hash argument with required keys C<is_wrapped> and C<unwrap_routine>. Used in setting up L<custom line-unwrapping routines|/"Specifying custom line-unwrap routines">. |
1372
|
|
|
|
|
|
|
|
1373
|
|
|
|
|
|
|
Here is an example of setting custom line-unwrapping routines: |
1374
|
|
|
|
|
|
|
|
1375
|
|
|
|
|
|
|
$parser->multiline_type('join_last'); |
1376
|
|
|
|
|
|
|
$parser->custom_line_unwrap_routines( |
1377
|
|
|
|
|
|
|
is_wrapped => sub { # A method that detects if this line is wrapped or not |
1378
|
|
|
|
|
|
|
my ($self, $this_line) = @_; |
1379
|
|
|
|
|
|
|
$this_line =~ /^[~]/; |
1380
|
|
|
|
|
|
|
}, |
1381
|
|
|
|
|
|
|
unwrap_routine => sub { # A method to unwrap the line by joining it with the last line |
1382
|
|
|
|
|
|
|
my ($self, $last_line, $this_line) = @_; |
1383
|
|
|
|
|
|
|
chomp $last_line; |
1384
|
|
|
|
|
|
|
$last_line =~ s/\s*$//g; |
1385
|
|
|
|
|
|
|
$this_line =~ s/^[~]\s*//g; |
1386
|
|
|
|
|
|
|
"$last_line $this_line"; |
1387
|
|
|
|
|
|
|
}, |
1388
|
|
|
|
|
|
|
); |
1389
|
|
|
|
|
|
|
|
1390
|
|
|
|
|
|
|
Now you can parse a file with the following content: |
1391
|
|
|
|
|
|
|
|
1392
|
|
|
|
|
|
|
This is a long line that is wrapped around with a custom |
1393
|
|
|
|
|
|
|
~ character - the tilde. It is unusual, but hey, we're |
1394
|
|
|
|
|
|
|
~ showing an example. |
1395
|
|
|
|
|
|
|
|
1396
|
|
|
|
|
|
|
When C<$parser> gets to C<read> this, these three lines get unwrapped and processed by the rules, as if it were a single line. |
1397
|
|
|
|
|
|
|
|
1398
|
|
|
|
|
|
|
L<Text::Parser::Multiline> shows another example with C<join_next> type. |
1399
|
|
|
|
|
|
|
|
1400
|
|
|
|
|
|
|
=head1 METHODS THAT MAY BE OVERRIDDEN IN SUBCLASSES |
1401
|
|
|
|
|
|
|
|
1402
|
|
|
|
|
|
|
The following methods should never be called in the C<::main> program. They may be overridden (or re-defined) in a subclass. |
1403
|
|
|
|
|
|
|
|
1404
|
|
|
|
|
|
|
Starting version 0.925, users should never need to override any of these methods to make their own parser. |
1405
|
|
|
|
|
|
|
|
1406
|
|
|
|
|
|
|
=head2 save_record |
1407
|
|
|
|
|
|
|
|
1408
|
|
|
|
|
|
|
The default implementation takes a single argument, runs any rules, and saves the returned value as a record in an internal array. If nothing is returned from the rule, C<undef> is stored as a record. |
1409
|
|
|
|
|
|
|
|
1410
|
|
|
|
|
|
|
B<Note>: Starting C<0.925> version of C<Text::Parser> it is not required to override this method in your derived class. In most cases, you should use the rules. |
1411
|
|
|
|
|
|
|
|
1412
|
|
|
|
|
|
|
B<Importnant Note:> Starting version C<1.0> of C<Text::Parser> this method will be deprecated to improve performance. So avoid inheriting this method. |
1413
|
|
|
|
|
|
|
|
1414
|
|
|
|
|
|
|
=head2 is_line_continued |
1415
|
|
|
|
|
|
|
|
1416
|
|
|
|
|
|
|
The default implementation of this routine: |
1417
|
|
|
|
|
|
|
|
1418
|
|
|
|
|
|
|
multiline_type | Return value |
1419
|
|
|
|
|
|
|
------------------+--------------------------------- |
1420
|
|
|
|
|
|
|
undef | 0 |
1421
|
|
|
|
|
|
|
join_last | 0 for first line, 1 otherwise |
1422
|
|
|
|
|
|
|
join_next | 1 |
1423
|
|
|
|
|
|
|
|
1424
|
|
|
|
|
|
|
In earlier versions of L<Text::Parser> you had no way but to subclass L<Text::Parser> to change the routine that detects if a line is wrapped. Now you can instead select from a list of known C<line_wrap_style>s, or even set custom methods for this. |
1425
|
|
|
|
|
|
|
|
1426
|
|
|
|
|
|
|
=head2 join_last_line |
1427
|
|
|
|
|
|
|
|
1428
|
|
|
|
|
|
|
The default implementation of this routine takes two string arguments, joins them without any C<chomp> or any other operation, and returns that result. |
1429
|
|
|
|
|
|
|
|
1430
|
|
|
|
|
|
|
In earlier versions of L<Text::Parser> you had no way but to subclass L<Text::Parser> to select a line-unwrapping routine. Now you can instead select from a list of known C<line_wrap_style>s, or even set custom methods for this. |
1431
|
|
|
|
|
|
|
|
1432
|
|
|
|
|
|
|
=head1 THINGS TO DO FURTHER |
1433
|
|
|
|
|
|
|
|
1434
|
|
|
|
|
|
|
Future versions are expected to include features to: |
1435
|
|
|
|
|
|
|
|
1436
|
|
|
|
|
|
|
=over 4 |
1437
|
|
|
|
|
|
|
|
1438
|
|
|
|
|
|
|
=item * |
1439
|
|
|
|
|
|
|
|
1440
|
|
|
|
|
|
|
read and parse from a buffer |
1441
|
|
|
|
|
|
|
|
1442
|
|
|
|
|
|
|
=item * |
1443
|
|
|
|
|
|
|
|
1444
|
|
|
|
|
|
|
automatically uncompress input |
1445
|
|
|
|
|
|
|
|
1446
|
|
|
|
|
|
|
=item * |
1447
|
|
|
|
|
|
|
|
1448
|
|
|
|
|
|
|
I<suggestions welcome ...> |
1449
|
|
|
|
|
|
|
|
1450
|
|
|
|
|
|
|
=back |
1451
|
|
|
|
|
|
|
|
1452
|
|
|
|
|
|
|
Contributions and suggestions are welcome and properly acknowledged. |
1453
|
|
|
|
|
|
|
|
1454
|
|
|
|
|
|
|
=head1 HANDLING LINE-WRAPPING |
1455
|
|
|
|
|
|
|
|
1456
|
|
|
|
|
|
|
Different text formats sometimes allow line-wrapping to make their content more human-readable. Handling this can be rather complicated if you use native Perl, but extremely easy with L<Text::Parser>. |
1457
|
|
|
|
|
|
|
|
1458
|
|
|
|
|
|
|
=head2 Common line-wrapping styles |
1459
|
|
|
|
|
|
|
|
1460
|
|
|
|
|
|
|
L<Text::Parser> supports a range of commonly-used line-unwrapping routines which can be selected using the C<L<line_wrap_style|Text::Parser/"line_wrap_style">> attribute. The attribute automatically sets up the parser to handle line-unwrapping for that specific text format. |
1461
|
|
|
|
|
|
|
|
1462
|
|
|
|
|
|
|
$parser->line_wrap_style('trailing_backslash'); |
1463
|
|
|
|
|
|
|
# Now when read runs the rules, all the back-slash |
1464
|
|
|
|
|
|
|
# line-wrapped lines are auto-unwrapped to a single |
1465
|
|
|
|
|
|
|
# line, and rules are applied on that single line |
1466
|
|
|
|
|
|
|
|
1467
|
|
|
|
|
|
|
When C<read> reads each line of text, it looks for any trailing backslash and unwraps the line. The next line may have a trailing back-slash too, and that too is unwrapped. Once the fully-unwrapped line has been identified, the rules are run on that unwrapped line, as if the file had no line-wrapping at all. So say the content of a line is like this: |
1468
|
|
|
|
|
|
|
|
1469
|
|
|
|
|
|
|
This is a long line wrapped into multiple lines \ |
1470
|
|
|
|
|
|
|
with a back-slash character. This is a very common \ |
1471
|
|
|
|
|
|
|
way to wrap long lines. In general, line-wrapping \ |
1472
|
|
|
|
|
|
|
can be much easier on the reader's eyes. |
1473
|
|
|
|
|
|
|
|
1474
|
|
|
|
|
|
|
When C<read> runs any rules in C<$parser>, the text above appears as a single line to the rules. |
1475
|
|
|
|
|
|
|
|
1476
|
|
|
|
|
|
|
=head2 Specifying custom line-unwrap routines |
1477
|
|
|
|
|
|
|
|
1478
|
|
|
|
|
|
|
I have included the common types of line-wrapping styles known to me. But obviously there can be more. To specify a custom line-unwrapping style follow these steps: |
1479
|
|
|
|
|
|
|
|
1480
|
|
|
|
|
|
|
=over 4 |
1481
|
|
|
|
|
|
|
|
1482
|
|
|
|
|
|
|
=item * |
1483
|
|
|
|
|
|
|
|
1484
|
|
|
|
|
|
|
Set the C<L<multiline_type|/"multiline_type">> attribute appropriately. If you do not set this, your custom unwrapping routines won't have any effect. |
1485
|
|
|
|
|
|
|
|
1486
|
|
|
|
|
|
|
=item * |
1487
|
|
|
|
|
|
|
|
1488
|
|
|
|
|
|
|
Call C<L<custom_line_unwrap_routines|/"custom_line_unwrap_routines">> method. If you forget to call this method, or if you don't provide appropriate arguments, then an exception is thrown. |
1489
|
|
|
|
|
|
|
|
1490
|
|
|
|
|
|
|
=back |
1491
|
|
|
|
|
|
|
|
1492
|
|
|
|
|
|
|
L<Here|/"custom_line_unwrap_routines"> is an example with C<join_last> value for C<multiline_type>. And L<here|Text::Parser::Multiline/"SYNOPSIS"> is an example using C<join_next>. You'll notice that in both examples, you need to specify both routines. In fact, if you don't |
1493
|
|
|
|
|
|
|
|
1494
|
|
|
|
|
|
|
=head2 Line-unwrapping in a subclass |
1495
|
|
|
|
|
|
|
|
1496
|
|
|
|
|
|
|
You may subclass C<Text::Paser> to parse your specific text format. And that format may support some line-wrapping. To handle the known common line-wrapping styles, set a default value for C<line_wrap_style>. For example: |
1497
|
|
|
|
|
|
|
|
1498
|
|
|
|
|
|
|
=over 4 |
1499
|
|
|
|
|
|
|
|
1500
|
|
|
|
|
|
|
=item * |
1501
|
|
|
|
|
|
|
|
1502
|
|
|
|
|
|
|
Set a default value for C<line_wrap_style>. For example, the following uses one of the supported common line-unwrap methods. has '+line_wrap_style' => ( default => 'spice', ); |
1503
|
|
|
|
|
|
|
|
1504
|
|
|
|
|
|
|
=back |
1505
|
|
|
|
|
|
|
|
1506
|
|
|
|
|
|
|
* Setup custom line-unwrap routines with C<unwraps_lines> from L<Text::Parser::RuleSpec>. |
1507
|
|
|
|
|
|
|
|
1508
|
|
|
|
|
|
|
use Text::Parser::RuleSpec; |
1509
|
|
|
|
|
|
|
extends 'Text::Parser'; |
1510
|
|
|
|
|
|
|
|
1511
|
|
|
|
|
|
|
has '+line_wrap_style' => ( default => 'slurp', is => 'ro'); |
1512
|
|
|
|
|
|
|
has '+multiline_type' => ( is => 'ro' ); |
1513
|
|
|
|
|
|
|
|
1514
|
|
|
|
|
|
|
Of course, you don't I<have> to make them read-only. |
1515
|
|
|
|
|
|
|
|
1516
|
|
|
|
|
|
|
To setup custom line-unwrapping routines in a subclass, you can use the C<L<unwraps_lines_using|Text::Parser::RuleSpec/"unwraps_lines_using">> syntax sugar from L<Text::Parser::RuleSpec>. For example: |
1517
|
|
|
|
|
|
|
|
1518
|
|
|
|
|
|
|
package MyParser; |
1519
|
|
|
|
|
|
|
|
1520
|
|
|
|
|
|
|
use Text::Parser::RuleSpec; |
1521
|
|
|
|
|
|
|
extends 'Text::Parser'; |
1522
|
|
|
|
|
|
|
|
1523
|
|
|
|
|
|
|
has '+multiline_type' => ( |
1524
|
|
|
|
|
|
|
default => 'join_next', |
1525
|
|
|
|
|
|
|
is => 'ro', |
1526
|
|
|
|
|
|
|
); |
1527
|
|
|
|
|
|
|
|
1528
|
|
|
|
|
|
|
unwraps_lines_using( |
1529
|
|
|
|
|
|
|
is_wrapped => \&_my_is_wrapped_routine, |
1530
|
|
|
|
|
|
|
unwrap_routine => \&_my_unwrap_routine, |
1531
|
|
|
|
|
|
|
); |
1532
|
|
|
|
|
|
|
|
1533
|
|
|
|
|
|
|
=head1 SEE ALSO |
1534
|
|
|
|
|
|
|
|
1535
|
|
|
|
|
|
|
=over 4 |
1536
|
|
|
|
|
|
|
|
1537
|
|
|
|
|
|
|
=item * |
1538
|
|
|
|
|
|
|
|
1539
|
|
|
|
|
|
|
L<Text::Parser::Manual> - Read this manual to learn how to do cool things with this class |
1540
|
|
|
|
|
|
|
|
1541
|
|
|
|
|
|
|
=item * |
1542
|
|
|
|
|
|
|
|
1543
|
|
|
|
|
|
|
L<Text::Parser::Error> - there is a change in how exceptions are thrown by this class. Read this page for more information. |
1544
|
|
|
|
|
|
|
|
1545
|
|
|
|
|
|
|
=item * |
1546
|
|
|
|
|
|
|
|
1547
|
|
|
|
|
|
|
L<The AWK Programming Language|https://books.google.com/books/about/The_AWK_Programming_Language.html?id=53ueQgAACAAJ> - by B<A>ho, B<W>einberg, and B<K>ernighan. |
1548
|
|
|
|
|
|
|
|
1549
|
|
|
|
|
|
|
=item * |
1550
|
|
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
L<Text::Parser::Multiline> - how to read line-wrapped text input |
1552
|
|
|
|
|
|
|
|
1553
|
|
|
|
|
|
|
=back |
1554
|
|
|
|
|
|
|
|
1555
|
|
|
|
|
|
|
=head1 BUGS |
1556
|
|
|
|
|
|
|
|
1557
|
|
|
|
|
|
|
Please report any bugs or feature requests on the bugtracker website |
1558
|
|
|
|
|
|
|
L<http://github.com/balajirama/Text-Parser/issues> |
1559
|
|
|
|
|
|
|
|
1560
|
|
|
|
|
|
|
When submitting a bug or request, please include a test-file or a |
1561
|
|
|
|
|
|
|
patch to an existing test-file that illustrates the bug or desired |
1562
|
|
|
|
|
|
|
feature. |
1563
|
|
|
|
|
|
|
|
1564
|
|
|
|
|
|
|
=head1 AUTHOR |
1565
|
|
|
|
|
|
|
|
1566
|
|
|
|
|
|
|
Balaji Ramasubramanian <balajiram@cpan.org> |
1567
|
|
|
|
|
|
|
|
1568
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
1569
|
|
|
|
|
|
|
|
1570
|
|
|
|
|
|
|
This software is copyright (c) 2018-2019 by Balaji Ramasubramanian. |
1571
|
|
|
|
|
|
|
|
1572
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
1573
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
1574
|
|
|
|
|
|
|
|
1575
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
1576
|
|
|
|
|
|
|
|
1577
|
|
|
|
|
|
|
=for stopwords H.Merijn Brand - Tux Mohammad S Anwar |
1578
|
|
|
|
|
|
|
|
1579
|
|
|
|
|
|
|
=over 4 |
1580
|
|
|
|
|
|
|
|
1581
|
|
|
|
|
|
|
=item * |
1582
|
|
|
|
|
|
|
|
1583
|
|
|
|
|
|
|
H.Merijn Brand - Tux <h.m.brand@xs4all.nl> |
1584
|
|
|
|
|
|
|
|
1585
|
|
|
|
|
|
|
=item * |
1586
|
|
|
|
|
|
|
|
1587
|
|
|
|
|
|
|
Mohammad S Anwar <mohammad.anwar@yahoo.com> |
1588
|
|
|
|
|
|
|
|
1589
|
|
|
|
|
|
|
=back |
1590
|
|
|
|
|
|
|
|
1591
|
|
|
|
|
|
|
=cut |
1592
|
|
|
|
|
|
|
|