File Coverage

blib/lib/App/RecordStream/Operation/frommultire.pm
Criterion Covered Total %
statement 100 106 94.3
branch 21 22 95.4
condition 14 17 82.3
subroutine 21 24 87.5
pod 0 12 0.0
total 156 181 86.1


line stmt bran cond sub pod time code
1             package App::RecordStream::Operation::frommultire;
2              
3             our $VERSION = "4.0.23";
4              
5 3     3   1099 use strict;
  3         7  
  3         88  
6              
7 3     3   14 use base qw(App::RecordStream::Operation);
  3         5  
  3         3371  
8              
9             sub init {
10 8     8 0 15 my $this = shift;
11 8         15 my $args = shift;
12             my %options = (
13 13     13   7895 "no-flush-regex|regex|re=s" => sub { $this->add_regex($_[1], 0, 0); },
14 0     0   0 "pre-flush-regex|pre=s" => sub { $this->add_regex($_[1], 1, 0); },
15 2     2   314 "post-flush-regex|post=s" => sub { $this->add_regex($_[1], 0, 1); },
16 0     0   0 "double-flush-regex|double=s" => sub { $this->add_regex($_[1], 1, 1); },
17 2     2   243 "clobber" => sub { $this->_set_clobber(1); },
18 1     1   155 "keep-all" => sub { $this->_set_keep_all(1); },
19 1     1   125 "keep=s" => sub { $this->add_keep(split(/,/, $_[1])); },
20 8         102 );
21              
22 8         41 $this->parse_options($args, \%options);
23              
24 8         45 $this->{'RECORD'} = App::RecordStream::Record->new();
25             }
26              
27             sub add_regex {
28 15     15 0 38 my ($this, $string, $pre_flush, $post_flush) = @_;
29              
30 15   100     128 $this->{'REGEXES'} ||= [];
31              
32 15         30 my $fields = [];
33 15 100       81 if($string =~ /^([^=]*)=(.*)$/) {
34 14         53 $fields = [split(/,/, $1)];
35 14         40 $string = $2;
36             }
37              
38 15         23 push @{$this->{'REGEXES'}}, [$string, $fields, $pre_flush, $post_flush]
  15         60  
39             }
40              
41             sub _get_regexes {
42 42     42   79 my ($this) = @_;
43 42   50     143 return $this->{'REGEXES'} || [];
44             }
45              
46             sub _set_clobber {
47 2     2   5 my ($this, $value) = @_;
48 2         6 $this->{'CLOBBER'} = $value;
49             }
50              
51             sub get_clobber {
52 46     46 0 90 my ($this) = @_;
53 46   100     219 return $this->{'CLOBBER'} || 0;
54             }
55              
56             sub _set_keep_all {
57 1     1   5 my ($this, $value) = @_;
58 1         4 $this->{'KEEP_ALL'} = $value;
59             }
60              
61             sub get_keep_all {
62 63     63 0 106 my ($this) = @_;
63 63   100     322 return $this->{'KEEP_ALL'} || 0;
64             }
65              
66             sub add_keep {
67 1     1 0 3 my $this = shift;
68 1   50     8 $this->{'KEEP'} ||= {};
69 1         2 for my $field (@_) {
70 1         5 $this->{'KEEP'}->{$field} = 1;
71             }
72             }
73              
74             sub check_keep {
75 63     63 0 106 my ($this, $field) = @_;
76              
77 63   100     164 $this->{'KEEP'} ||= {};
78 63   66     116 return $this->get_keep_all() || exists($this->{'KEEP'}->{$field});
79             }
80              
81             sub accept_line {
82 42     42 0 76 my $this = shift;
83 42         68 my $line = shift;
84              
85 42         74 my $regex_index = 0;
86 42         74 for my $regex (@{$this->_get_regexes()}) {
  42         90  
87 84         177 my ($string, $fields, $pre_flush, $post_flush) = @$regex;
88 84         155 my $field_prefix = "$regex_index-";
89              
90 84 100       962 if(my @groups = ($line =~ $string)) {
91 38         134 my $pairs = $this->get_field_value_pairs(\@groups, $fields, $field_prefix);
92 38 100       89 if(!$this->get_clobber()) {
93 22         46 foreach my $pair ( @$pairs ) {
94 33         72 my ($name, $value) = @$pair;
95 33 100       50 if(defined ${$this->{'RECORD'}->guess_key_from_spec($name)}) {
  33         111  
96 12         28 $pre_flush = 1;
97             }
98             }
99             }
100              
101 38 100       92 if($pre_flush) {
102 7         19 $this->flush_record();
103             }
104              
105 38         76 foreach my $pair ( @$pairs ) {
106 57         120 my ($name, $value) = @$pair;
107 57         93 ${$this->{'RECORD'}->guess_key_from_spec($name)} = $value;
  57         164  
108             }
109              
110 38 100       116 if($post_flush) {
111 8         20 $this->flush_record();
112             }
113             }
114 84         213 ++$regex_index;
115             }
116              
117 42         185 return 1;
118             }
119              
120             sub stream_done {
121 8     8 0 15 my $this = shift;
122 8         17 my $record = $this->{'RECORD'};
123              
124 8 50 100     20 if(!$this->get_clobber() && scalar($record->keys())) {
125 6         48 $this->flush_record();
126             }
127             }
128              
129             sub get_field_value_pairs {
130 38     38 0 82 my ($this, $groups, $fields, $prefix) = @_;
131              
132 38         63 my @field_names;
133             my %groups_used;
134              
135 38         114 for(my $i = 0; $i < @$fields; ++$i) {
136 51         111 my $field = $fields->[$i];
137 51         83 my $field_name;
138 51 100       153 if($field =~ /^\$(\d+)$/) {
139 10         31 my $n = $1 - 1;
140 10         20 $field_name = $groups->[$n];
141 10         22 $groups_used{$n} = 1;
142             }
143             else {
144 41         72 $field_name = $field;
145             }
146 51         146 push @field_names, $field_name;
147             }
148              
149 38         80 my @pairs;
150 38         63 my $pair_index = 0;
151 38         103 for(my $i = 0; $i < @$groups; ++$i) {
152 67 100       155 if($groups_used{$i}) {
153 10         24 next;
154             }
155 57 100       135 my $field_name = ($pair_index < @field_names) ? $field_names[$pair_index] : ($prefix . $pair_index);
156 57         146 push @pairs, [$field_name, $groups->[$i]];
157 57         147 $pair_index++;
158             }
159              
160 38         104 return \@pairs;
161             }
162              
163             sub flush_record {
164 21     21 0 35 my $this = shift;
165 21         37 my $record = $this->{'RECORD'};
166 21         81 my $record2 = App::RecordStream::Record->new();
167 21         54 for my $field ($record->keys()) {
168 63 100       138 if($this->check_keep($field)) {
169 20         51 $record2->set($field, $record->get($field));
170             }
171             }
172 21         90 $this->push_record($record);
173 21         55 $this->{'RECORD'} = $record2;
174             }
175              
176             sub add_help_types {
177 8     8 0 15 my $this = shift;
178 8         30 $this->use_help_type('keyspecs');
179             }
180              
181             sub usage {
182 0     0 0   my $this = shift;
183              
184 0           my $options = [
185             [ 'no-flush-regex|--regex|--re ', 'Add a normal regex.'],
186             [ 'pre-flush-regex|--pre ', 'Add a regex that flushes before interpretting fields when matched.'],
187             [ 'post-flush-regex|--post ', 'Add a regex that flushes after interpretting fields when matched.'],
188             [ 'double-flush-regex|--double ', 'Add a regex that flushes both before and after interprettying fields when matched.'],
189             [ 'clobber', 'Do not flush records when a field from a match would clobber an already existing field and do not flush at EOF.'],
190             [ 'keep-all', 'Do not clear any fields on a flush.'],
191             [ 'keep ', 'Do not clear this comma separated list of fields on a flush.'],
192             ];
193              
194 0           my $args_string = $this->options_string($options);
195              
196 0           return <
197             Usage: recs-frommultire []
198             __FORMAT_TEXT__
199             Match multiple regexes against each line of input (or lines of ).
200             Various parameters control when the accumulated fields are flushed to output
201             as a record and which, if any, fields are cleared when the record is
202             flushed.
203              
204             By default regexes do not necessarily flush on either side, would-be field
205             collisions cause a flush, EOF causes a flush if any fields are set, and all
206             fields are cleared on a flush.
207             __FORMAT_TEXT__
208              
209             Arguments:
210             $args_string
211              
212             __FORMAT_TEXT__
213             - Syntax is: ',=REGEX'. KEY field names are optional.
214             The key names may be key specs, see '--help-keyspecs' for more. Field
215             names may not be keygroups. If field matches \$NUM, then that match number
216             in the regex will be used as the field name
217             __FORMAT_TEXT__
218              
219             Examples:
220             Typical use case one: parse several fields on separate lines
221             recs-frommultire --re 'fname,lname=^Name: (.*) (.*)\$' --re 'addr=^Address: (.*)\$'
222             Typical use case two: some fields apply to multiple records ("department" here)
223             recs-frommultire --post 'fname,lname=^Name: (.*) (.*)\$' --re 'department=^Department: (.*)\$' --clobber --keep team
224             USAGE
225             }
226              
227             1;