line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Text::FixedLengthMultiline; |
2
|
|
|
|
|
|
|
|
3
|
4
|
|
|
4
|
|
143474
|
use utf8; |
|
4
|
|
|
|
|
42
|
|
|
4
|
|
|
|
|
24
|
|
4
|
4
|
|
|
4
|
|
137
|
use strict; |
|
4
|
|
|
|
|
7
|
|
|
4
|
|
|
|
|
124
|
|
5
|
4
|
|
|
4
|
|
22
|
use warnings; |
|
4
|
|
|
|
|
12
|
|
|
4
|
|
|
|
|
113
|
|
6
|
|
|
|
|
|
|
|
7
|
4
|
|
|
4
|
|
21
|
use Carp; |
|
4
|
|
|
|
|
6
|
|
|
4
|
|
|
|
|
356
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
BEGIN { |
10
|
4
|
|
|
4
|
|
83
|
our $VERSION = '0.071'; |
11
|
|
|
|
|
|
|
} |
12
|
|
|
|
|
|
|
|
13
|
4
|
|
|
4
|
|
21
|
use constant FIRST => 1; |
|
4
|
|
|
|
|
10
|
|
|
4
|
|
|
|
|
344
|
|
14
|
4
|
|
|
4
|
|
29
|
use constant LAST => 2; |
|
4
|
|
|
|
|
10
|
|
|
4
|
|
|
|
|
166
|
|
15
|
4
|
|
|
4
|
|
22
|
use constant ANY => 3; # FIRST | LAST |
|
4
|
|
|
|
|
6
|
|
|
4
|
|
|
|
|
15600
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
my %continue_styles = ( |
18
|
|
|
|
|
|
|
'first' => FIRST, |
19
|
|
|
|
|
|
|
'last' => LAST, |
20
|
|
|
|
|
|
|
'any' => ANY |
21
|
|
|
|
|
|
|
); |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=encoding utf8 |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=head1 NAME |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
Text::FixedLengthMultiline - Parse text data formatted in space separated columns optionnaly on multiple lines |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head1 SYNOPSIS |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
use Text::FixedLengthMultiline; |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
#234567890 12345678901234567890 12 |
35
|
|
|
|
|
|
|
my $text = <
|
36
|
|
|
|
|
|
|
Alice Pretty girl! |
37
|
|
|
|
|
|
|
Bob Good old uncle Bob, |
38
|
|
|
|
|
|
|
very old. 92 |
39
|
|
|
|
|
|
|
Charlie Best known as Waldo 14 |
40
|
|
|
|
|
|
|
or Wally. Where's |
41
|
|
|
|
|
|
|
he? |
42
|
|
|
|
|
|
|
EOT |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
my $fmt = Text::FixedLengthMultiline->new(format => ['!name' => 10, 1, 'comment~' => 20, 1, 'age' => -2 ]); |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# Compute the RegExp that matches the first line |
47
|
|
|
|
|
|
|
my $first_line_re = $fmt->get_first_line_re(); |
48
|
|
|
|
|
|
|
# Compute the RegExp that matches a continuation line |
49
|
|
|
|
|
|
|
my $continue_line_re = $fmt->get_continue_line_re(); |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
my @data; |
52
|
|
|
|
|
|
|
my $err; |
53
|
|
|
|
|
|
|
while ($text =~ /^([^\n]+)$/gm) { |
54
|
|
|
|
|
|
|
my $line = $1; |
55
|
|
|
|
|
|
|
push @data, {} if $line =~ $first_line_re; |
56
|
|
|
|
|
|
|
if (($err = $fmt->parse_line($line, $data[$#data])) > 0) { |
57
|
|
|
|
|
|
|
warn "Parse error at column $err"; |
58
|
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=head1 DESCRIPTION |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
A row of data can be splitted on multiple lines of text with cell content |
64
|
|
|
|
|
|
|
flowing in the same column space. |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=head1 FORMAT SPECIFICATION |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
The format is given at the contruction time as an array ref. Modifying the |
69
|
|
|
|
|
|
|
array content after the construction call is done at your own risks. |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
The array contains the ordered sequence of columns. Each colmun can either be: |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=over |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=item * |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
a positive integer representing the size of a separating column which is |
78
|
|
|
|
|
|
|
expected to always be filled with spaces. |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=item * |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
a string that matches this regexp: /^(?#mandatory)!?(?#name)[:alnum:]\w*(?:(?#multi)~(?#cont).?)?$/ |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=over |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=item * |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
C means the column is mandatory |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=item * |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
C is the column name. This will be the key for the hash after parsing. |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=item * |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
C<~> means the column data can be on multiple lines. |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
=back |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=back |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
=head1 METHODS |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
=head2 new() |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
Arguments: |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=over |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=item * |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
C: an array reference following the L. |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=item * |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
C |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=back |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
Example: |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
my $format = Text::FixedLengthMultiline->new(format => [ 2, col1 => 4, 1, '!col2' => 4 ]); |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=cut |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
# TODO add 'continue-style': first/last/any |
127
|
|
|
|
|
|
|
sub new |
128
|
|
|
|
|
|
|
{ |
129
|
41
|
|
|
41
|
1
|
23045
|
my $class = shift; |
130
|
41
|
|
|
|
|
133
|
my %params = @_; |
131
|
41
|
100
|
66
|
|
|
479
|
(%params && exists $params{'format'}) or croak('['.__PACKAGE__."] Missing format"); |
132
|
40
|
100
|
|
|
|
488
|
ref $params{'format'} eq 'ARRAY' or croak('['.__PACKAGE__."] Invalid format: array ref expected"); |
133
|
37
|
|
|
|
|
56
|
my $continue_style = ANY; |
134
|
37
|
100
|
|
|
|
108
|
if (exists $params{'continue_style'}) { |
135
|
23
|
|
|
|
|
33
|
my $style = $params{'continue_style'}; |
136
|
23
|
100
|
|
|
|
575
|
croak('['.__PACKAGE__."] Invalid continue_style: first/last/any expected") unless exists $continue_styles{$style}; |
137
|
19
|
|
|
|
|
527
|
$continue_style = $continue_styles{$style}; |
138
|
|
|
|
|
|
|
} |
139
|
|
|
|
|
|
|
# TODO Check the format, and report errors |
140
|
33
|
|
33
|
|
|
200
|
my $self = { |
141
|
|
|
|
|
|
|
FORMAT => $params{'format'}, |
142
|
|
|
|
|
|
|
# Maybe doing a copy would be a good idea... |
143
|
|
|
|
|
|
|
# But we trust the user even if we all know |
144
|
|
|
|
|
|
|
# he's a crazy programmer |
145
|
|
|
|
|
|
|
DEBUG => exists $params{'debug'} && $params{'debug'}, |
146
|
|
|
|
|
|
|
CONTINUE_STYLE => $continue_style |
147
|
|
|
|
|
|
|
}; |
148
|
33
|
|
|
|
|
88
|
bless $self, $class; |
149
|
33
|
|
|
|
|
109
|
return $self; |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=head2 C |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
Parse a table. |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
my @table = $fmt->parse_table($text); |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
Returns an array of hashes. Each hash is a row of data. |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=cut |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
sub parse_table |
164
|
|
|
|
|
|
|
{ |
165
|
1
|
|
|
1
|
1
|
10
|
my ($self, $text) = @_; |
166
|
1
|
|
|
|
|
3
|
my $first_re = $self->get_first_line_re(); |
167
|
1
|
|
|
|
|
2
|
my @table; |
168
|
|
|
|
|
|
|
my $err; |
169
|
1
|
|
|
|
|
1
|
my $linenum = 1; |
170
|
1
|
|
|
|
|
19
|
(pos $text) = 0; |
171
|
1
|
|
|
|
|
8
|
while ($text =~ /^([^\n]+)$/gm) { |
172
|
6
|
|
|
|
|
11
|
my $line = $1; |
173
|
6
|
100
|
|
|
|
62
|
push @table, {} if $line =~ $first_re; |
174
|
6
|
50
|
|
|
|
16
|
if (($err = $self->parse_line($line, $table[$#table])) > 0) { |
175
|
0
|
|
|
|
|
0
|
croak "Parse error at line $linenum, column $err"; |
176
|
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
|
} |
178
|
1
|
|
|
|
|
4
|
return @table; |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=head2 C |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
Parse a line of text and add parsed data to the hash. |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
my $error = $fmt->parse_line($line, \%row_data); |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
Multiple calls to C with the same hashref may be needed to fully |
191
|
|
|
|
|
|
|
read a "logical line" in case some columns are multiline. |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
Returns: |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=over |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
=item * |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
C<-col>: Parse error. The value is a negative integer indicating the |
200
|
|
|
|
|
|
|
character position in the line where the parse error occured. |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
=item * |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
C<0>: OK |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
=item * |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
C: Missing data: need to feed next line to fill remining columns. |
209
|
|
|
|
|
|
|
The value is the character position of the column where data is expected. |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
=back |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
=cut |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
# TODO: return a RE in case of missing data |
216
|
|
|
|
|
|
|
sub parse_line |
217
|
|
|
|
|
|
|
{ |
218
|
40
|
|
|
40
|
1
|
16571
|
my ($self, $line, $data) = @_; |
219
|
40
|
|
|
|
|
56
|
my @fmt = @{$self->{FORMAT}}; |
|
40
|
|
|
|
|
116
|
|
220
|
40
|
|
|
|
|
52
|
my $col = 1; |
221
|
40
|
|
|
|
|
45
|
my $ret = 0; |
222
|
40
|
100
|
|
|
|
99
|
$line = '' unless defined $line; |
223
|
40
|
|
|
|
|
92
|
while ($#fmt >= 0) { |
224
|
86
|
|
|
|
|
114
|
my $f = shift @fmt; |
225
|
86
|
|
|
|
|
88
|
my $data_len; |
226
|
86
|
100
|
|
|
|
390
|
if ($f =~ /^\d+$/) { |
|
|
50
|
|
|
|
|
|
227
|
|
|
|
|
|
|
# Spaces to skip |
228
|
36
|
50
|
|
|
|
69
|
next if $f == 0; |
229
|
36
|
|
|
|
|
140
|
$line =~ /^( {0,$f})/; |
230
|
36
|
|
|
|
|
54
|
$data_len = length $1; |
231
|
36
|
100
|
|
|
|
127
|
return -($col+$data_len) if $data_len < $f; |
232
|
|
|
|
|
|
|
} elsif ($f =~ /^(!?)([A-Za-z_]\w*)(?:(~)(.?))?$/) { |
233
|
50
|
|
|
|
|
141
|
my ($mandatory, $field, $multi, $cont) = ($1, $2, $3, $4); |
234
|
50
|
100
|
|
|
|
110
|
$multi = 0 unless defined $multi; |
235
|
50
|
50
|
66
|
|
|
137
|
$cont = ' ' unless defined $cont && $cont ne ''; |
236
|
50
|
|
|
|
|
58
|
my $len = shift @fmt; |
237
|
50
|
50
|
|
|
|
90
|
next if $len == 0; |
238
|
50
|
|
|
|
|
91
|
my $d = substr($line, 0, abs $len); |
239
|
50
|
|
|
|
|
49
|
$data_len = length $d; |
240
|
50
|
100
|
|
|
|
79
|
if ($len > 0) { |
241
|
38
|
|
|
|
|
128
|
$d =~ s/ +$//; |
242
|
|
|
|
|
|
|
} else { |
243
|
12
|
|
|
|
|
27
|
$d .= ' ' x -($data_len+$len); |
244
|
12
|
|
|
|
|
27
|
$d =~ s/^ +//; |
245
|
|
|
|
|
|
|
} |
246
|
50
|
100
|
|
|
|
104
|
if ($d ne '') { |
247
|
37
|
100
|
100
|
|
|
157
|
return -$col if !$multi && exists $data->{$field}; |
248
|
34
|
100
|
100
|
|
|
103
|
if ($multi && exists $data->{$field}) { |
249
|
|
|
|
|
|
|
# Multilines => concat |
250
|
6
|
|
|
|
|
13
|
$data->{$field} .= "\n" . $d; |
251
|
6
|
50
|
33
|
|
|
43
|
$ret = $col if $ret == 0 && $d =~ /\Q$cont\E$/; |
252
|
|
|
|
|
|
|
} else { |
253
|
28
|
|
|
|
|
57
|
$data->{$field} = $d; |
254
|
|
|
|
|
|
|
} |
255
|
|
|
|
|
|
|
} |
256
|
47
|
100
|
100
|
|
|
163
|
$ret = $col if $mandatory && !exists $data->{$field} && $ret == 0; |
|
|
|
66
|
|
|
|
|
257
|
|
|
|
|
|
|
} else { |
258
|
0
|
|
|
|
|
0
|
warn "Bad format!\n"; |
259
|
0
|
|
|
|
|
0
|
return -$col; |
260
|
|
|
|
|
|
|
} |
261
|
69
|
|
|
|
|
78
|
$col += $data_len; |
262
|
69
|
|
|
|
|
122
|
$line = substr($line, $data_len); |
263
|
69
|
100
|
66
|
|
|
229
|
last if $ret != 0 && $line eq ''; |
264
|
|
|
|
|
|
|
} |
265
|
23
|
100
|
|
|
|
95
|
return -$col unless $line =~ /^ *$/; |
266
|
20
|
|
|
|
|
107
|
return $ret; |
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
sub _dump_line_re() |
273
|
|
|
|
|
|
|
{ |
274
|
0
|
|
|
0
|
|
0
|
while ($#_ >= 0) { |
275
|
0
|
|
|
|
|
0
|
print "> [" . (shift @_) ."]\n"; |
276
|
0
|
|
|
|
|
0
|
print ' [' . join('] :: [', @{ (shift @_) }) . "]\n"; |
|
0
|
|
|
|
|
0
|
|
277
|
|
|
|
|
|
|
} |
278
|
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
sub _serialize_line_re() |
281
|
|
|
|
|
|
|
{ |
282
|
|
|
|
|
|
|
#&_dump_line_re(@_); |
283
|
76
|
|
|
76
|
|
117
|
my $re = ''; |
284
|
76
|
|
|
|
|
186
|
while ($#_ > -1) { |
285
|
|
|
|
|
|
|
# Pop the alternatives for the end of the line |
286
|
69
|
|
|
|
|
77
|
my @b = grep(!/^$/, @{ (pop @_) }); |
|
69
|
|
|
|
|
250
|
|
287
|
|
|
|
|
|
|
# TODO remove duplicates |
288
|
69
|
100
|
|
|
|
162
|
push @b, $re if $re ne ''; |
289
|
69
|
100
|
|
|
|
155
|
if ($#b > 0) { |
|
|
100
|
|
|
|
|
|
290
|
23
|
|
|
|
|
73
|
$re = "(?:" . join('|', @b) . ")"; |
291
|
|
|
|
|
|
|
} elsif ($#b > -1) { |
292
|
42
|
|
|
|
|
63
|
$re = $b[0]; |
293
|
|
|
|
|
|
|
} else { |
294
|
4
|
|
|
|
|
6
|
$re = ''; |
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
# Pop |
297
|
69
|
|
|
|
|
256
|
$re = (pop @_) . $re; |
298
|
|
|
|
|
|
|
#print "$re\n"; |
299
|
|
|
|
|
|
|
} |
300
|
76
|
|
|
|
|
236
|
return $re; |
301
|
|
|
|
|
|
|
} |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
sub _parse_column_format($;$) |
304
|
|
|
|
|
|
|
{ |
305
|
104
|
|
|
104
|
|
148
|
my ($format, $width) = @_; |
306
|
104
|
50
|
|
|
|
449
|
if ($format =~ /^(!?)([A-Za-z_]\w*)(?:(~)(.?))?$/) { |
307
|
104
|
|
|
|
|
683
|
my %def = ( |
308
|
|
|
|
|
|
|
mandatory => $1, |
309
|
|
|
|
|
|
|
name => $2, |
310
|
|
|
|
|
|
|
multi => $3, |
311
|
|
|
|
|
|
|
cont => $4, |
312
|
|
|
|
|
|
|
width => abs $width |
313
|
|
|
|
|
|
|
); |
314
|
104
|
100
|
|
|
|
273
|
$def{multi} = '' unless defined $def{multi}; |
315
|
104
|
100
|
|
|
|
239
|
$def{align} = $width > 0 ? 'L' : 'R'; |
316
|
104
|
|
|
|
|
751
|
return %def; |
317
|
|
|
|
|
|
|
} else { |
318
|
0
|
|
|
|
|
0
|
return undef; |
319
|
|
|
|
|
|
|
} |
320
|
|
|
|
|
|
|
} |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
sub _build_repetition_re($;$;$) |
323
|
|
|
|
|
|
|
{ |
324
|
101
|
|
|
101
|
|
136
|
my ($c, $min, $max) = @_; |
325
|
101
|
50
|
|
|
|
193
|
return '' if $max <= 0; |
326
|
101
|
50
|
|
|
|
156
|
if ($max == 1) { |
327
|
0
|
0
|
|
|
|
0
|
$c .= '?' if $min <= 0; |
328
|
|
|
|
|
|
|
} else { |
329
|
101
|
100
|
|
|
|
162
|
if ($min < $max) { |
330
|
45
|
|
|
|
|
91
|
$c .= "{$min,$max}"; |
331
|
|
|
|
|
|
|
} else { |
332
|
56
|
|
|
|
|
222
|
$c .= "{$max}"; |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
} |
335
|
101
|
|
|
|
|
202
|
return $c; |
336
|
|
|
|
|
|
|
} |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
sub _build_column_re |
339
|
|
|
|
|
|
|
{ |
340
|
78
|
|
|
78
|
|
95
|
my $self = shift; |
341
|
78
|
|
|
|
|
391
|
my %def = @_; |
342
|
78
|
|
100
|
|
|
407
|
my $branch_multi = $def{multi} && exists $def{branch_multi} && $def{branch_multi}; |
343
|
78
|
50
|
|
|
|
184
|
my $re_label = $self->{DEBUG} ? "(?#_$def{mandatory}$def{name}$def{multi}_)" : ''; |
344
|
78
|
100
|
|
|
|
311
|
my $re_spaces = $def{spaces} > 0 ? ' '.($def{spaces} > 1 ? "{$def{spaces}}":'') : ''; |
|
|
100
|
|
|
|
|
|
345
|
78
|
|
|
|
|
94
|
my $width = $def{width}; |
346
|
78
|
|
|
|
|
85
|
my ($re_col_mand, $re_col_end, $re_col); |
347
|
78
|
100
|
100
|
|
|
227
|
if ($def{mandatory} || $branch_multi) { |
348
|
56
|
|
|
|
|
67
|
$re_col_mand = $re_spaces . $re_label; |
349
|
56
|
100
|
|
|
|
129
|
if ($def{align} eq 'L') { # Left aligned |
350
|
45
|
|
|
|
|
100
|
$re_col_end = &_build_repetition_re('.', 0, $width-1); |
351
|
45
|
100
|
|
|
|
89
|
unless ($branch_multi) { |
352
|
40
|
|
|
|
|
164
|
$re_col_mand .= '\S'; |
353
|
40
|
|
|
|
|
89
|
$re_col = &_build_repetition_re('.', $width-1, $width-1); |
354
|
|
|
|
|
|
|
} else { |
355
|
5
|
|
|
|
|
11
|
$re_col = &_build_repetition_re('.', $width, $width); |
356
|
5
|
|
|
|
|
10
|
$re_col_end = '\S' . $re_col_end; |
357
|
|
|
|
|
|
|
} |
358
|
|
|
|
|
|
|
} else { |
359
|
11
|
|
|
|
|
28
|
$re_col_mand .= &_build_repetition_re('.', $width-1, $width-1); |
360
|
11
|
50
|
|
|
|
25
|
unless ($branch_multi) { |
361
|
11
|
|
|
|
|
16
|
$re_col_end = $re_col = ''; |
362
|
11
|
|
|
|
|
16
|
$re_col_mand .= '\S'; |
363
|
|
|
|
|
|
|
} else { |
364
|
0
|
|
|
|
|
0
|
$re_col_end = '\S'; |
365
|
0
|
|
|
|
|
0
|
$re_col = '.'; |
366
|
|
|
|
|
|
|
} |
367
|
|
|
|
|
|
|
} |
368
|
|
|
|
|
|
|
} else { |
369
|
22
|
|
|
|
|
30
|
$re_col_mand = ''; |
370
|
22
|
50
|
|
|
|
75
|
$re_col_end = $re_spaces . $re_label . '.' . ($width > 1 ? "{0,$width}" : '?'); |
371
|
22
|
50
|
|
|
|
96
|
$re_col_end = "(?:$re_col_end)?" if $def{spaces}; |
372
|
22
|
50
|
|
|
|
57
|
$re_col = $re_spaces . $re_label . '.' . ($width > 1 ? "{$width}" : '' ); |
373
|
|
|
|
|
|
|
} |
374
|
|
|
|
|
|
|
#print "$def{name} => /$re_col_mand/ /$re_col_end/ /$re_col/ (spaces = $def{spaces})\n"; |
375
|
78
|
|
|
|
|
411
|
return ($re_col_mand, $re_col_end, $re_col); |
376
|
|
|
|
|
|
|
} |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
sub _has_multi(@) |
379
|
|
|
|
|
|
|
{ |
380
|
17
|
|
|
17
|
|
35
|
foreach (@_) { |
381
|
43
|
100
|
|
|
|
129
|
return 1 if /!?[_[:alpha:]]\w+~/; |
382
|
|
|
|
|
|
|
} |
383
|
12
|
|
|
|
|
38
|
return 0; |
384
|
|
|
|
|
|
|
} |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
# @_ is the format |
388
|
|
|
|
|
|
|
# TODO handle the case where all columns are optionnal |
389
|
|
|
|
|
|
|
# The RE is then the union of the cases where one of the colmuns, up to the first multi, is mandatory |
390
|
|
|
|
|
|
|
sub _build_first_line_re |
391
|
|
|
|
|
|
|
{ |
392
|
37
|
|
|
37
|
|
46
|
my $self = shift; |
393
|
37
|
|
|
|
|
48
|
my $branch_multi = shift; |
394
|
37
|
|
|
|
|
40
|
my $spaces = 0; |
395
|
37
|
|
|
|
|
612
|
my @re = (); |
396
|
37
|
|
|
|
|
49
|
my $re_acc = ''; # Accumulator |
397
|
37
|
|
|
|
|
101
|
my $multi = '~'; # Force the initialisation of @re |
398
|
37
|
|
|
|
|
99
|
while ($#_ >= 0) { |
399
|
121
|
|
|
|
|
155
|
my $f = shift; |
400
|
121
|
100
|
|
|
|
437
|
if ($f =~ /^\d+$/) { |
401
|
60
|
|
|
|
|
152
|
$spaces += $f; |
402
|
|
|
|
|
|
|
} else { |
403
|
61
|
|
|
|
|
114
|
my %def = &_parse_column_format($f, shift); |
404
|
61
|
100
|
100
|
|
|
279
|
if ($multi && ($branch_multi || $#re == -1)) { |
|
|
|
66
|
|
|
|
|
405
|
|
|
|
|
|
|
# The previous column was a multi. The following fields may not be |
406
|
|
|
|
|
|
|
# on this line but on one of the next ones. |
407
|
|
|
|
|
|
|
# So the end of the line is optionnal. |
408
|
|
|
|
|
|
|
# We are starting a new altenative in the RE. |
409
|
42
|
|
|
|
|
80
|
push @re, $re_acc, [ ]; |
410
|
42
|
|
|
|
|
70
|
$re_acc = ''; |
411
|
|
|
|
|
|
|
} |
412
|
61
|
|
|
|
|
223
|
my ($re_col_mand, $re_col_end, $re_col) = $self->_build_column_re(%def, spaces => $spaces); |
413
|
61
|
100
|
|
|
|
156
|
if ($def{mandatory}) { |
414
|
|
|
|
|
|
|
# Flush optional columns and append this column |
415
|
40
|
|
|
|
|
91
|
$re[$#re-1] .= $re_acc . $re_col_mand; |
416
|
40
|
100
|
|
|
|
83
|
if ($re_col_end eq '') { |
417
|
8
|
|
|
|
|
16
|
$re[$#re] = [ ]; |
418
|
|
|
|
|
|
|
} else { |
419
|
32
|
|
|
|
|
72
|
$re[$#re] = [ $re_col_end ]; |
420
|
|
|
|
|
|
|
} |
421
|
40
|
|
|
|
|
64
|
$re_acc = $re_col; |
422
|
|
|
|
|
|
|
} else { |
423
|
|
|
|
|
|
|
# Save column format for later |
424
|
21
|
|
|
|
|
28
|
push @{$re[$#re]}, $re_acc . $re_col_mand . $re_col_end; |
|
21
|
|
|
|
|
57
|
|
425
|
21
|
|
|
|
|
38
|
$re_acc .= $re_col_mand . $re_col; |
426
|
|
|
|
|
|
|
} |
427
|
61
|
|
|
|
|
67
|
$spaces = 0; |
428
|
61
|
|
|
|
|
270
|
$multi = $def{multi}; |
429
|
|
|
|
|
|
|
} |
430
|
|
|
|
|
|
|
} |
431
|
37
|
|
|
|
|
133
|
return @re; |
432
|
|
|
|
|
|
|
} |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
sub _build_continue_line_re |
435
|
|
|
|
|
|
|
{ |
436
|
39
|
|
|
39
|
|
51
|
my $self = shift; |
437
|
39
|
|
|
|
|
46
|
my $spaces = 0; |
438
|
39
|
|
|
|
|
45
|
my $multi = '~'; # Force the initialisation of @re |
439
|
39
|
|
|
|
|
102
|
while ($#_ >= 0) { |
440
|
85
|
|
|
|
|
120
|
my $f = shift; |
441
|
85
|
100
|
|
|
|
326
|
if ($f =~ /^\d+$/) { |
442
|
42
|
|
|
|
|
105
|
$spaces += $f; |
443
|
|
|
|
|
|
|
} else { |
444
|
43
|
|
|
|
|
84
|
my %def = &_parse_column_format($f, shift); |
445
|
43
|
100
|
|
|
|
131
|
unless ($def{multi}) { |
446
|
26
|
|
|
|
|
35
|
$spaces += $def{width}; |
447
|
26
|
|
|
|
|
109
|
next; |
448
|
|
|
|
|
|
|
} |
449
|
17
|
|
|
|
|
23
|
my @re; |
450
|
17
|
|
|
|
|
20
|
my ($re_col_end, $re_col); |
451
|
17
|
|
|
|
|
63
|
($re[0], $re_col_end, $re_col) = $self->_build_column_re(%def, spaces => $spaces, branch_multi => &_has_multi(@_)); |
452
|
17
|
|
|
|
|
86
|
push @re, [ $re_col_end ]; |
453
|
17
|
|
|
|
|
21
|
my @re_end; |
454
|
17
|
100
|
|
|
|
100
|
push @re_end, &_serialize_line_re($self->_build_continue_line_re(@_)) if $self->{CONTINUE_STYLE} & FIRST; |
455
|
17
|
100
|
|
|
|
69
|
push @re_end, &_serialize_line_re($self->_build_first_line_re(1, @_)) if $self->{CONTINUE_STYLE} & LAST; |
456
|
17
|
|
|
|
|
79
|
@re_end = grep !/^$/, @re_end; |
457
|
|
|
|
|
|
|
#pop @re_end if $#re_end == 1 && $re_end[1] eq $re_end[0]; |
458
|
17
|
100
|
|
|
|
50
|
push @re, $re_col, [ @re_end ] if (@re_end); |
459
|
17
|
|
|
|
|
97
|
return @re; |
460
|
|
|
|
|
|
|
} |
461
|
|
|
|
|
|
|
} |
462
|
22
|
|
|
|
|
56
|
return (); |
463
|
|
|
|
|
|
|
} |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
=head2 C |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
Returns a regular expression that matches the first line of a "logical line" |
468
|
|
|
|
|
|
|
of data. |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
my $re = $fmt->get_first_line_re(); |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
=cut |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
sub get_first_line_re |
475
|
|
|
|
|
|
|
{ |
476
|
27
|
|
|
27
|
1
|
291
|
my $self = shift; |
477
|
27
|
100
|
|
|
|
87
|
if (!exists $self->{FIRST_LINE_RE}) { |
478
|
26
|
|
|
|
|
54
|
my @re; |
479
|
26
|
100
|
|
|
|
68
|
if ($self->{CONTINUE_STYLE} == FIRST) { |
480
|
6
|
|
|
|
|
10
|
@re = $self->_build_first_line_re(0, @{$self->{FORMAT}}); |
|
6
|
|
|
|
|
24
|
|
481
|
|
|
|
|
|
|
} else { |
482
|
20
|
|
|
|
|
35
|
@re = $self->_build_first_line_re(1, @{$self->{FORMAT}}); |
|
20
|
|
|
|
|
73
|
|
483
|
|
|
|
|
|
|
} |
484
|
26
|
|
|
|
|
63
|
my $re = &_serialize_line_re(@re); |
485
|
26
|
100
|
|
|
|
668
|
$self->{FIRST_LINE_RE} = ($re eq '' ? undef : qr/^$re *$/); |
486
|
|
|
|
|
|
|
} |
487
|
27
|
|
|
|
|
185
|
return $self->{FIRST_LINE_RE}; |
488
|
|
|
|
|
|
|
} |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
=head2 C |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
Returns a regular expression that matches the 2nd line and the following |
493
|
|
|
|
|
|
|
lines of a "logical line". |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
my $re = $fmt->get_continue_line_re(); |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
Returns undef if the format specification does not contains any column that |
498
|
|
|
|
|
|
|
can be splitted on multiples lines. |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
=cut |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
# continue-style: first (only cont columns can appear on a continue line) |
503
|
|
|
|
|
|
|
sub get_continue_line_re |
504
|
|
|
|
|
|
|
{ |
505
|
26
|
|
|
26
|
1
|
54
|
my $self = shift; |
506
|
26
|
50
|
|
|
|
127
|
if (!exists $self->{CONTINUE_LINE_RE}) { |
507
|
26
|
|
|
|
|
35
|
my @re = $self->_build_continue_line_re(@{$self->{FORMAT}}); |
|
26
|
|
|
|
|
93
|
|
508
|
|
|
|
|
|
|
#&_dump_line_re(@re); |
509
|
26
|
|
|
|
|
57
|
my $re = &_serialize_line_re(@re); |
510
|
26
|
100
|
|
|
|
599
|
$self->{CONTINUE_LINE_RE} = ($re eq '' ? undef : qr/^$re *$/); |
511
|
|
|
|
|
|
|
} |
512
|
26
|
|
|
|
|
169
|
return $self->{CONTINUE_LINE_RE}; |
513
|
|
|
|
|
|
|
} |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
1; # Magic for module end |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
__END__ |