File Coverage

blib/lib/App/RecordStream/Operation/frommultire.pm
Criterion Covered Total %
statement 100 106 94.3
branch 21 22 95.4
condition 14 17 82.3
subroutine 21 24 87.5
pod 0 12 0.0
total 156 181 86.1


line stmt bran cond sub pod time code
1             package App::RecordStream::Operation::frommultire;
2              
3             our $VERSION = "4.0.24";
4              
5 3     3   1305 use strict;
  3         7  
  3         99  
6              
7 3     3   18 use base qw(App::RecordStream::Operation);
  3         7  
  3         3291  
8              
9             sub init {
10 8     8 0 17 my $this = shift;
11 8         18 my $args = shift;
12             my %options = (
13 13     13   8374 "no-flush-regex|regex|re=s" => sub { $this->add_regex($_[1], 0, 0); },
14 0     0   0 "pre-flush-regex|pre=s" => sub { $this->add_regex($_[1], 1, 0); },
15 2     2   307 "post-flush-regex|post=s" => sub { $this->add_regex($_[1], 0, 1); },
16 0     0   0 "double-flush-regex|double=s" => sub { $this->add_regex($_[1], 1, 1); },
17 2     2   282 "clobber" => sub { $this->_set_clobber(1); },
18 1     1   106 "keep-all" => sub { $this->_set_keep_all(1); },
19 1     1   157 "keep=s" => sub { $this->add_keep(split(/,/, $_[1])); },
20 8         152 );
21              
22 8         47 $this->parse_options($args, \%options);
23              
24 8         49 $this->{'RECORD'} = App::RecordStream::Record->new();
25             }
26              
27             sub add_regex {
28 15     15 0 47 my ($this, $string, $pre_flush, $post_flush) = @_;
29              
30 15   100     77 $this->{'REGEXES'} ||= [];
31              
32 15         34 my $fields = [];
33 15 100       84 if($string =~ /^([^=]*)=(.*)$/) {
34 14         64 $fields = [split(/,/, $1)];
35 14         41 $string = $2;
36             }
37              
38 15         25 push @{$this->{'REGEXES'}}, [$string, $fields, $pre_flush, $post_flush]
  15         68  
39             }
40              
41             sub _get_regexes {
42 42     42   101 my ($this) = @_;
43 42   50     146 return $this->{'REGEXES'} || [];
44             }
45              
46             sub _set_clobber {
47 2     2   7 my ($this, $value) = @_;
48 2         7 $this->{'CLOBBER'} = $value;
49             }
50              
51             sub get_clobber {
52 46     46 0 91 my ($this) = @_;
53 46   100     191 return $this->{'CLOBBER'} || 0;
54             }
55              
56             sub _set_keep_all {
57 1     1   3 my ($this, $value) = @_;
58 1         3 $this->{'KEEP_ALL'} = $value;
59             }
60              
61             sub get_keep_all {
62 63     63 0 113 my ($this) = @_;
63 63   100     312 return $this->{'KEEP_ALL'} || 0;
64             }
65              
66             sub add_keep {
67 1     1 0 4 my $this = shift;
68 1   50     10 $this->{'KEEP'} ||= {};
69 1         4 for my $field (@_) {
70 1         5 $this->{'KEEP'}->{$field} = 1;
71             }
72             }
73              
74             sub check_keep {
75 63     63 0 125 my ($this, $field) = @_;
76              
77 63   100     165 $this->{'KEEP'} ||= {};
78 63   66     131 return $this->get_keep_all() || exists($this->{'KEEP'}->{$field});
79             }
80              
81             sub accept_line {
82 42     42 0 79 my $this = shift;
83 42         67 my $line = shift;
84              
85 42         67 my $regex_index = 0;
86 42         66 for my $regex (@{$this->_get_regexes()}) {
  42         91  
87 84         201 my ($string, $fields, $pre_flush, $post_flush) = @$regex;
88 84         185 my $field_prefix = "$regex_index-";
89              
90 84 100       1074 if(my @groups = ($line =~ $string)) {
91 38         138 my $pairs = $this->get_field_value_pairs(\@groups, $fields, $field_prefix);
92 38 100       96 if(!$this->get_clobber()) {
93 22         44 foreach my $pair ( @$pairs ) {
94 33         72 my ($name, $value) = @$pair;
95 33 100       48 if(defined ${$this->{'RECORD'}->guess_key_from_spec($name)}) {
  33         93  
96 12         23 $pre_flush = 1;
97             }
98             }
99             }
100              
101 38 100       85 if($pre_flush) {
102 7         20 $this->flush_record();
103             }
104              
105 38         107 foreach my $pair ( @$pairs ) {
106 57         136 my ($name, $value) = @$pair;
107 57         90 ${$this->{'RECORD'}->guess_key_from_spec($name)} = $value;
  57         172  
108             }
109              
110 38 100       117 if($post_flush) {
111 8         27 $this->flush_record();
112             }
113             }
114 84         267 ++$regex_index;
115             }
116              
117 42         168 return 1;
118             }
119              
120             sub stream_done {
121 8     8 0 14 my $this = shift;
122 8         17 my $record = $this->{'RECORD'};
123              
124 8 50 100     22 if(!$this->get_clobber() && scalar($record->keys())) {
125 6         19 $this->flush_record();
126             }
127             }
128              
129             sub get_field_value_pairs {
130 38     38 0 93 my ($this, $groups, $fields, $prefix) = @_;
131              
132 38         81 my @field_names;
133             my %groups_used;
134              
135 38         98 for(my $i = 0; $i < @$fields; ++$i) {
136 51         110 my $field = $fields->[$i];
137 51         68 my $field_name;
138 51 100       173 if($field =~ /^\$(\d+)$/) {
139 10         27 my $n = $1 - 1;
140 10         12 $field_name = $groups->[$n];
141 10         22 $groups_used{$n} = 1;
142             }
143             else {
144 41         71 $field_name = $field;
145             }
146 51         145 push @field_names, $field_name;
147             }
148              
149 38         83 my @pairs;
150 38         59 my $pair_index = 0;
151 38         100 for(my $i = 0; $i < @$groups; ++$i) {
152 67 100       142 if($groups_used{$i}) {
153 10         21 next;
154             }
155 57 100       136 my $field_name = ($pair_index < @field_names) ? $field_names[$pair_index] : ($prefix . $pair_index);
156 57         151 push @pairs, [$field_name, $groups->[$i]];
157 57         140 $pair_index++;
158             }
159              
160 38         110 return \@pairs;
161             }
162              
163             sub flush_record {
164 21     21 0 42 my $this = shift;
165 21         40 my $record = $this->{'RECORD'};
166 21         79 my $record2 = App::RecordStream::Record->new();
167 21         76 for my $field ($record->keys()) {
168 63 100       156 if($this->check_keep($field)) {
169 20         56 $record2->set($field, $record->get($field));
170             }
171             }
172 21         117 $this->push_record($record);
173 21         62 $this->{'RECORD'} = $record2;
174             }
175              
176             sub add_help_types {
177 8     8 0 18 my $this = shift;
178 8         36 $this->use_help_type('keyspecs');
179             }
180              
181             sub usage {
182 0     0 0   my $this = shift;
183              
184 0           my $options = [
185             [ 'no-flush-regex|--regex|--re ', 'Add a normal regex.'],
186             [ 'pre-flush-regex|--pre ', 'Add a regex that flushes before interpretting fields when matched.'],
187             [ 'post-flush-regex|--post ', 'Add a regex that flushes after interpretting fields when matched.'],
188             [ 'double-flush-regex|--double ', 'Add a regex that flushes both before and after interprettying fields when matched.'],
189             [ 'clobber', 'Do not flush records when a field from a match would clobber an already existing field and do not flush at EOF.'],
190             [ 'keep-all', 'Do not clear any fields on a flush.'],
191             [ 'keep ', 'Do not clear this comma separated list of fields on a flush.'],
192             ];
193              
194 0           my $args_string = $this->options_string($options);
195              
196 0           return <
197             Usage: recs-frommultire []
198             __FORMAT_TEXT__
199             Match multiple regexes against each line of input (or lines of ).
200             Various parameters control when the accumulated fields are flushed to output
201             as a record and which, if any, fields are cleared when the record is
202             flushed.
203              
204             By default regexes do not necessarily flush on either side, would-be field
205             collisions cause a flush, EOF causes a flush if any fields are set, and all
206             fields are cleared on a flush.
207             __FORMAT_TEXT__
208              
209             Arguments:
210             $args_string
211              
212             __FORMAT_TEXT__
213             - Syntax is: ',=REGEX'. KEY field names are optional.
214             The key names may be key specs, see '--help-keyspecs' for more. Field
215             names may not be keygroups. If field matches \$NUM, then that match number
216             in the regex will be used as the field name
217             __FORMAT_TEXT__
218              
219             Examples:
220             Typical use case one: parse several fields on separate lines
221             recs-frommultire --re 'fname,lname=^Name: (.*) (.*)\$' --re 'addr=^Address: (.*)\$'
222             Typical use case two: some fields apply to multiple records ("department" here)
223             recs-frommultire --post 'fname,lname=^Name: (.*) (.*)\$' --re 'department=^Department: (.*)\$' --clobber --keep team
224             USAGE
225             }
226              
227             1;