| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package App::RecordStream::Operation::frommultire; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = "4.0.25"; |
|
4
|
|
|
|
|
|
|
|
|
5
|
3
|
|
|
3
|
|
1153
|
use strict; |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
70
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
3
|
|
|
3
|
|
11
|
use base qw(App::RecordStream::Operation); |
|
|
3
|
|
|
|
|
4
|
|
|
|
3
|
|
|
|
|
3073
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
sub init { |
|
10
|
8
|
|
|
8
|
0
|
12
|
my $this = shift; |
|
11
|
8
|
|
|
|
|
13
|
my $args = shift; |
|
12
|
|
|
|
|
|
|
my %options = ( |
|
13
|
13
|
|
|
13
|
|
5417
|
"no-flush-regex|regex|re=s" => sub { $this->add_regex($_[1], 0, 0); }, |
|
14
|
0
|
|
|
0
|
|
0
|
"pre-flush-regex|pre=s" => sub { $this->add_regex($_[1], 1, 0); }, |
|
15
|
2
|
|
|
2
|
|
212
|
"post-flush-regex|post=s" => sub { $this->add_regex($_[1], 0, 1); }, |
|
16
|
0
|
|
|
0
|
|
0
|
"double-flush-regex|double=s" => sub { $this->add_regex($_[1], 1, 1); }, |
|
17
|
2
|
|
|
2
|
|
167
|
"clobber" => sub { $this->_set_clobber(1); }, |
|
18
|
1
|
|
|
1
|
|
80
|
"keep-all" => sub { $this->_set_keep_all(1); }, |
|
19
|
1
|
|
|
1
|
|
87
|
"keep=s" => sub { $this->add_keep(split(/,/, $_[1])); }, |
|
20
|
8
|
|
|
|
|
72
|
); |
|
21
|
|
|
|
|
|
|
|
|
22
|
8
|
|
|
|
|
28
|
$this->parse_options($args, \%options); |
|
23
|
|
|
|
|
|
|
|
|
24
|
8
|
|
|
|
|
29
|
$this->{'RECORD'} = App::RecordStream::Record->new(); |
|
25
|
|
|
|
|
|
|
} |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
sub add_regex { |
|
28
|
15
|
|
|
15
|
0
|
35
|
my ($this, $string, $pre_flush, $post_flush) = @_; |
|
29
|
|
|
|
|
|
|
|
|
30
|
15
|
|
100
|
|
|
48
|
$this->{'REGEXES'} ||= []; |
|
31
|
|
|
|
|
|
|
|
|
32
|
15
|
|
|
|
|
15
|
my $fields = []; |
|
33
|
15
|
100
|
|
|
|
56
|
if($string =~ /^([^=]*)=(.*)$/) { |
|
34
|
14
|
|
|
|
|
38
|
$fields = [split(/,/, $1)]; |
|
35
|
14
|
|
|
|
|
23
|
$string = $2; |
|
36
|
|
|
|
|
|
|
} |
|
37
|
|
|
|
|
|
|
|
|
38
|
15
|
|
|
|
|
15
|
push @{$this->{'REGEXES'}}, [$string, $fields, $pre_flush, $post_flush] |
|
|
15
|
|
|
|
|
42
|
|
|
39
|
|
|
|
|
|
|
} |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
sub _get_regexes { |
|
42
|
42
|
|
|
42
|
|
64
|
my ($this) = @_; |
|
43
|
42
|
|
50
|
|
|
86
|
return $this->{'REGEXES'} || []; |
|
44
|
|
|
|
|
|
|
} |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
sub _set_clobber { |
|
47
|
2
|
|
|
2
|
|
4
|
my ($this, $value) = @_; |
|
48
|
2
|
|
|
|
|
4
|
$this->{'CLOBBER'} = $value; |
|
49
|
|
|
|
|
|
|
} |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
sub get_clobber { |
|
52
|
46
|
|
|
46
|
0
|
65
|
my ($this) = @_; |
|
53
|
46
|
|
100
|
|
|
125
|
return $this->{'CLOBBER'} || 0; |
|
54
|
|
|
|
|
|
|
} |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
sub _set_keep_all { |
|
57
|
1
|
|
|
1
|
|
3
|
my ($this, $value) = @_; |
|
58
|
1
|
|
|
|
|
2
|
$this->{'KEEP_ALL'} = $value; |
|
59
|
|
|
|
|
|
|
} |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
sub get_keep_all { |
|
62
|
63
|
|
|
63
|
0
|
67
|
my ($this) = @_; |
|
63
|
63
|
|
100
|
|
|
202
|
return $this->{'KEEP_ALL'} || 0; |
|
64
|
|
|
|
|
|
|
} |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
sub add_keep { |
|
67
|
1
|
|
|
1
|
0
|
2
|
my $this = shift; |
|
68
|
1
|
|
50
|
|
|
5
|
$this->{'KEEP'} ||= {}; |
|
69
|
1
|
|
|
|
|
3
|
for my $field (@_) { |
|
70
|
1
|
|
|
|
|
3
|
$this->{'KEEP'}->{$field} = 1; |
|
71
|
|
|
|
|
|
|
} |
|
72
|
|
|
|
|
|
|
} |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
sub check_keep { |
|
75
|
63
|
|
|
63
|
0
|
84
|
my ($this, $field) = @_; |
|
76
|
|
|
|
|
|
|
|
|
77
|
63
|
|
100
|
|
|
93
|
$this->{'KEEP'} ||= {}; |
|
78
|
63
|
|
66
|
|
|
71
|
return $this->get_keep_all() || exists($this->{'KEEP'}->{$field}); |
|
79
|
|
|
|
|
|
|
} |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
sub accept_line { |
|
82
|
42
|
|
|
42
|
0
|
47
|
my $this = shift; |
|
83
|
42
|
|
|
|
|
45
|
my $line = shift; |
|
84
|
|
|
|
|
|
|
|
|
85
|
42
|
|
|
|
|
42
|
my $regex_index = 0; |
|
86
|
42
|
|
|
|
|
53
|
for my $regex (@{$this->_get_regexes()}) { |
|
|
42
|
|
|
|
|
55
|
|
|
87
|
84
|
|
|
|
|
114
|
my ($string, $fields, $pre_flush, $post_flush) = @$regex; |
|
88
|
84
|
|
|
|
|
105
|
my $field_prefix = "$regex_index-"; |
|
89
|
|
|
|
|
|
|
|
|
90
|
84
|
100
|
|
|
|
660
|
if(my @groups = ($line =~ $string)) { |
|
91
|
38
|
|
|
|
|
72
|
my $pairs = $this->get_field_value_pairs(\@groups, $fields, $field_prefix); |
|
92
|
38
|
100
|
|
|
|
63
|
if(!$this->get_clobber()) { |
|
93
|
22
|
|
|
|
|
32
|
foreach my $pair ( @$pairs ) { |
|
94
|
33
|
|
|
|
|
52
|
my ($name, $value) = @$pair; |
|
95
|
33
|
100
|
|
|
|
32
|
if(defined ${$this->{'RECORD'}->guess_key_from_spec($name)}) { |
|
|
33
|
|
|
|
|
64
|
|
|
96
|
12
|
|
|
|
|
18
|
$pre_flush = 1; |
|
97
|
|
|
|
|
|
|
} |
|
98
|
|
|
|
|
|
|
} |
|
99
|
|
|
|
|
|
|
} |
|
100
|
|
|
|
|
|
|
|
|
101
|
38
|
100
|
|
|
|
57
|
if($pre_flush) { |
|
102
|
7
|
|
|
|
|
13
|
$this->flush_record(); |
|
103
|
|
|
|
|
|
|
} |
|
104
|
|
|
|
|
|
|
|
|
105
|
38
|
|
|
|
|
51
|
foreach my $pair ( @$pairs ) { |
|
106
|
57
|
|
|
|
|
82
|
my ($name, $value) = @$pair; |
|
107
|
57
|
|
|
|
|
54
|
${$this->{'RECORD'}->guess_key_from_spec($name)} = $value; |
|
|
57
|
|
|
|
|
107
|
|
|
108
|
|
|
|
|
|
|
} |
|
109
|
|
|
|
|
|
|
|
|
110
|
38
|
100
|
|
|
|
75
|
if($post_flush) { |
|
111
|
8
|
|
|
|
|
14
|
$this->flush_record(); |
|
112
|
|
|
|
|
|
|
} |
|
113
|
|
|
|
|
|
|
} |
|
114
|
84
|
|
|
|
|
132
|
++$regex_index; |
|
115
|
|
|
|
|
|
|
} |
|
116
|
|
|
|
|
|
|
|
|
117
|
42
|
|
|
|
|
125
|
return 1; |
|
118
|
|
|
|
|
|
|
} |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
sub stream_done { |
|
121
|
8
|
|
|
8
|
0
|
8
|
my $this = shift; |
|
122
|
8
|
|
|
|
|
12
|
my $record = $this->{'RECORD'}; |
|
123
|
|
|
|
|
|
|
|
|
124
|
8
|
50
|
100
|
|
|
12
|
if(!$this->get_clobber() && scalar($record->keys())) { |
|
125
|
6
|
|
|
|
|
10
|
$this->flush_record(); |
|
126
|
|
|
|
|
|
|
} |
|
127
|
|
|
|
|
|
|
} |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
sub get_field_value_pairs { |
|
130
|
38
|
|
|
38
|
0
|
53
|
my ($this, $groups, $fields, $prefix) = @_; |
|
131
|
|
|
|
|
|
|
|
|
132
|
38
|
|
|
|
|
41
|
my @field_names; |
|
133
|
|
|
|
|
|
|
my %groups_used; |
|
134
|
|
|
|
|
|
|
|
|
135
|
38
|
|
|
|
|
66
|
for(my $i = 0; $i < @$fields; ++$i) { |
|
136
|
51
|
|
|
|
|
63
|
my $field = $fields->[$i]; |
|
137
|
51
|
|
|
|
|
48
|
my $field_name; |
|
138
|
51
|
100
|
|
|
|
98
|
if($field =~ /^\$(\d+)$/) { |
|
139
|
10
|
|
|
|
|
19
|
my $n = $1 - 1; |
|
140
|
10
|
|
|
|
|
13
|
$field_name = $groups->[$n]; |
|
141
|
10
|
|
|
|
|
12
|
$groups_used{$n} = 1; |
|
142
|
|
|
|
|
|
|
} |
|
143
|
|
|
|
|
|
|
else { |
|
144
|
41
|
|
|
|
|
45
|
$field_name = $field; |
|
145
|
|
|
|
|
|
|
} |
|
146
|
51
|
|
|
|
|
95
|
push @field_names, $field_name; |
|
147
|
|
|
|
|
|
|
} |
|
148
|
|
|
|
|
|
|
|
|
149
|
38
|
|
|
|
|
39
|
my @pairs; |
|
150
|
38
|
|
|
|
|
38
|
my $pair_index = 0; |
|
151
|
38
|
|
|
|
|
51
|
for(my $i = 0; $i < @$groups; ++$i) { |
|
152
|
67
|
100
|
|
|
|
95
|
if($groups_used{$i}) { |
|
153
|
10
|
|
|
|
|
21
|
next; |
|
154
|
|
|
|
|
|
|
} |
|
155
|
57
|
100
|
|
|
|
96
|
my $field_name = ($pair_index < @field_names) ? $field_names[$pair_index] : ($prefix . $pair_index); |
|
156
|
57
|
|
|
|
|
120
|
push @pairs, [$field_name, $groups->[$i]]; |
|
157
|
57
|
|
|
|
|
88
|
$pair_index++; |
|
158
|
|
|
|
|
|
|
} |
|
159
|
|
|
|
|
|
|
|
|
160
|
38
|
|
|
|
|
69
|
return \@pairs; |
|
161
|
|
|
|
|
|
|
} |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
sub flush_record { |
|
164
|
21
|
|
|
21
|
0
|
21
|
my $this = shift; |
|
165
|
21
|
|
|
|
|
25
|
my $record = $this->{'RECORD'}; |
|
166
|
21
|
|
|
|
|
54
|
my $record2 = App::RecordStream::Record->new(); |
|
167
|
21
|
|
|
|
|
40
|
for my $field ($record->keys()) { |
|
168
|
63
|
100
|
|
|
|
87
|
if($this->check_keep($field)) { |
|
169
|
20
|
|
|
|
|
32
|
$record2->set($field, $record->get($field)); |
|
170
|
|
|
|
|
|
|
} |
|
171
|
|
|
|
|
|
|
} |
|
172
|
21
|
|
|
|
|
75
|
$this->push_record($record); |
|
173
|
21
|
|
|
|
|
41
|
$this->{'RECORD'} = $record2; |
|
174
|
|
|
|
|
|
|
} |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
sub add_help_types { |
|
177
|
8
|
|
|
8
|
0
|
10
|
my $this = shift; |
|
178
|
8
|
|
|
|
|
20
|
$this->use_help_type('keyspecs'); |
|
179
|
|
|
|
|
|
|
} |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub usage { |
|
182
|
0
|
|
|
0
|
0
|
|
my $this = shift; |
|
183
|
|
|
|
|
|
|
|
|
184
|
0
|
|
|
|
|
|
my $options = [ |
|
185
|
|
|
|
|
|
|
[ 'no-flush-regex|--regex|--re ', 'Add a normal regex.'], |
|
186
|
|
|
|
|
|
|
[ 'pre-flush-regex|--pre ', 'Add a regex that flushes before interpretting fields when matched.'], |
|
187
|
|
|
|
|
|
|
[ 'post-flush-regex|--post ', 'Add a regex that flushes after interpretting fields when matched.'], |
|
188
|
|
|
|
|
|
|
[ 'double-flush-regex|--double ', 'Add a regex that flushes both before and after interprettying fields when matched.'], |
|
189
|
|
|
|
|
|
|
[ 'clobber', 'Do not flush records when a field from a match would clobber an already existing field and do not flush at EOF.'], |
|
190
|
|
|
|
|
|
|
[ 'keep-all', 'Do not clear any fields on a flush.'], |
|
191
|
|
|
|
|
|
|
[ 'keep ', 'Do not clear this comma separated list of fields on a flush.'], |
|
192
|
|
|
|
|
|
|
]; |
|
193
|
|
|
|
|
|
|
|
|
194
|
0
|
|
|
|
|
|
my $args_string = $this->options_string($options); |
|
195
|
|
|
|
|
|
|
|
|
196
|
0
|
|
|
|
|
|
return <
|
|
197
|
|
|
|
|
|
|
Usage: recs-frommultire [] |
|
198
|
|
|
|
|
|
|
__FORMAT_TEXT__ |
|
199
|
|
|
|
|
|
|
Match multiple regexes against each line of input (or lines of ). |
|
200
|
|
|
|
|
|
|
Various parameters control when the accumulated fields are flushed to output |
|
201
|
|
|
|
|
|
|
as a record and which, if any, fields are cleared when the record is |
|
202
|
|
|
|
|
|
|
flushed. |
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
By default regexes do not necessarily flush on either side, would-be field |
|
205
|
|
|
|
|
|
|
collisions cause a flush, EOF causes a flush if any fields are set, and all |
|
206
|
|
|
|
|
|
|
fields are cleared on a flush. |
|
207
|
|
|
|
|
|
|
__FORMAT_TEXT__ |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
Arguments: |
|
210
|
|
|
|
|
|
|
$args_string |
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
__FORMAT_TEXT__ |
|
213
|
|
|
|
|
|
|
- Syntax is: ',=REGEX'. KEY field names are optional. |
|
214
|
|
|
|
|
|
|
The key names may be key specs, see '--help-keyspecs' for more. Field |
|
215
|
|
|
|
|
|
|
names may not be keygroups. If field matches \$NUM, then that match number |
|
216
|
|
|
|
|
|
|
in the regex will be used as the field name |
|
217
|
|
|
|
|
|
|
__FORMAT_TEXT__ |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
Examples: |
|
220
|
|
|
|
|
|
|
Typical use case one: parse several fields on separate lines |
|
221
|
|
|
|
|
|
|
recs-frommultire --re 'fname,lname=^Name: (.*) (.*)\$' --re 'addr=^Address: (.*)\$' |
|
222
|
|
|
|
|
|
|
Typical use case two: some fields apply to multiple records ("department" here) |
|
223
|
|
|
|
|
|
|
recs-frommultire --post 'fname,lname=^Name: (.*) (.*)\$' --re 'department=^Department: (.*)\$' --clobber --keep team |
|
224
|
|
|
|
|
|
|
USAGE |
|
225
|
|
|
|
|
|
|
} |
|
226
|
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
1; |