File Coverage

blib/lib/App/RecordStream/Operation/frommultire.pm
Criterion Covered Total %
statement 100 106 94.3
branch 21 22 95.4
condition 14 17 82.3
subroutine 21 24 87.5
pod 0 12 0.0
total 156 181 86.1


line stmt bran cond sub pod time code
1             package App::RecordStream::Operation::frommultire;
2              
3             our $VERSION = "4.0.25";
4              
5 3     3   1153 use strict;
  3         6  
  3         70  
6              
7 3     3   11 use base qw(App::RecordStream::Operation);
  3         4  
  3         3073  
8              
9             sub init {
10 8     8 0 12 my $this = shift;
11 8         13 my $args = shift;
12             my %options = (
13 13     13   5417 "no-flush-regex|regex|re=s" => sub { $this->add_regex($_[1], 0, 0); },
14 0     0   0 "pre-flush-regex|pre=s" => sub { $this->add_regex($_[1], 1, 0); },
15 2     2   212 "post-flush-regex|post=s" => sub { $this->add_regex($_[1], 0, 1); },
16 0     0   0 "double-flush-regex|double=s" => sub { $this->add_regex($_[1], 1, 1); },
17 2     2   167 "clobber" => sub { $this->_set_clobber(1); },
18 1     1   80 "keep-all" => sub { $this->_set_keep_all(1); },
19 1     1   87 "keep=s" => sub { $this->add_keep(split(/,/, $_[1])); },
20 8         72 );
21              
22 8         28 $this->parse_options($args, \%options);
23              
24 8         29 $this->{'RECORD'} = App::RecordStream::Record->new();
25             }
26              
27             sub add_regex {
28 15     15 0 35 my ($this, $string, $pre_flush, $post_flush) = @_;
29              
30 15   100     48 $this->{'REGEXES'} ||= [];
31              
32 15         15 my $fields = [];
33 15 100       56 if($string =~ /^([^=]*)=(.*)$/) {
34 14         38 $fields = [split(/,/, $1)];
35 14         23 $string = $2;
36             }
37              
38 15         15 push @{$this->{'REGEXES'}}, [$string, $fields, $pre_flush, $post_flush]
  15         42  
39             }
40              
41             sub _get_regexes {
42 42     42   64 my ($this) = @_;
43 42   50     86 return $this->{'REGEXES'} || [];
44             }
45              
46             sub _set_clobber {
47 2     2   4 my ($this, $value) = @_;
48 2         4 $this->{'CLOBBER'} = $value;
49             }
50              
51             sub get_clobber {
52 46     46 0 65 my ($this) = @_;
53 46   100     125 return $this->{'CLOBBER'} || 0;
54             }
55              
56             sub _set_keep_all {
57 1     1   3 my ($this, $value) = @_;
58 1         2 $this->{'KEEP_ALL'} = $value;
59             }
60              
61             sub get_keep_all {
62 63     63 0 67 my ($this) = @_;
63 63   100     202 return $this->{'KEEP_ALL'} || 0;
64             }
65              
66             sub add_keep {
67 1     1 0 2 my $this = shift;
68 1   50     5 $this->{'KEEP'} ||= {};
69 1         3 for my $field (@_) {
70 1         3 $this->{'KEEP'}->{$field} = 1;
71             }
72             }
73              
74             sub check_keep {
75 63     63 0 84 my ($this, $field) = @_;
76              
77 63   100     93 $this->{'KEEP'} ||= {};
78 63   66     71 return $this->get_keep_all() || exists($this->{'KEEP'}->{$field});
79             }
80              
81             sub accept_line {
82 42     42 0 47 my $this = shift;
83 42         45 my $line = shift;
84              
85 42         42 my $regex_index = 0;
86 42         53 for my $regex (@{$this->_get_regexes()}) {
  42         55  
87 84         114 my ($string, $fields, $pre_flush, $post_flush) = @$regex;
88 84         105 my $field_prefix = "$regex_index-";
89              
90 84 100       660 if(my @groups = ($line =~ $string)) {
91 38         72 my $pairs = $this->get_field_value_pairs(\@groups, $fields, $field_prefix);
92 38 100       63 if(!$this->get_clobber()) {
93 22         32 foreach my $pair ( @$pairs ) {
94 33         52 my ($name, $value) = @$pair;
95 33 100       32 if(defined ${$this->{'RECORD'}->guess_key_from_spec($name)}) {
  33         64  
96 12         18 $pre_flush = 1;
97             }
98             }
99             }
100              
101 38 100       57 if($pre_flush) {
102 7         13 $this->flush_record();
103             }
104              
105 38         51 foreach my $pair ( @$pairs ) {
106 57         82 my ($name, $value) = @$pair;
107 57         54 ${$this->{'RECORD'}->guess_key_from_spec($name)} = $value;
  57         107  
108             }
109              
110 38 100       75 if($post_flush) {
111 8         14 $this->flush_record();
112             }
113             }
114 84         132 ++$regex_index;
115             }
116              
117 42         125 return 1;
118             }
119              
120             sub stream_done {
121 8     8 0 8 my $this = shift;
122 8         12 my $record = $this->{'RECORD'};
123              
124 8 50 100     12 if(!$this->get_clobber() && scalar($record->keys())) {
125 6         10 $this->flush_record();
126             }
127             }
128              
129             sub get_field_value_pairs {
130 38     38 0 53 my ($this, $groups, $fields, $prefix) = @_;
131              
132 38         41 my @field_names;
133             my %groups_used;
134              
135 38         66 for(my $i = 0; $i < @$fields; ++$i) {
136 51         63 my $field = $fields->[$i];
137 51         48 my $field_name;
138 51 100       98 if($field =~ /^\$(\d+)$/) {
139 10         19 my $n = $1 - 1;
140 10         13 $field_name = $groups->[$n];
141 10         12 $groups_used{$n} = 1;
142             }
143             else {
144 41         45 $field_name = $field;
145             }
146 51         95 push @field_names, $field_name;
147             }
148              
149 38         39 my @pairs;
150 38         38 my $pair_index = 0;
151 38         51 for(my $i = 0; $i < @$groups; ++$i) {
152 67 100       95 if($groups_used{$i}) {
153 10         21 next;
154             }
155 57 100       96 my $field_name = ($pair_index < @field_names) ? $field_names[$pair_index] : ($prefix . $pair_index);
156 57         120 push @pairs, [$field_name, $groups->[$i]];
157 57         88 $pair_index++;
158             }
159              
160 38         69 return \@pairs;
161             }
162              
163             sub flush_record {
164 21     21 0 21 my $this = shift;
165 21         25 my $record = $this->{'RECORD'};
166 21         54 my $record2 = App::RecordStream::Record->new();
167 21         40 for my $field ($record->keys()) {
168 63 100       87 if($this->check_keep($field)) {
169 20         32 $record2->set($field, $record->get($field));
170             }
171             }
172 21         75 $this->push_record($record);
173 21         41 $this->{'RECORD'} = $record2;
174             }
175              
176             sub add_help_types {
177 8     8 0 10 my $this = shift;
178 8         20 $this->use_help_type('keyspecs');
179             }
180              
181             sub usage {
182 0     0 0   my $this = shift;
183              
184 0           my $options = [
185             [ 'no-flush-regex|--regex|--re ', 'Add a normal regex.'],
186             [ 'pre-flush-regex|--pre ', 'Add a regex that flushes before interpretting fields when matched.'],
187             [ 'post-flush-regex|--post ', 'Add a regex that flushes after interpretting fields when matched.'],
188             [ 'double-flush-regex|--double ', 'Add a regex that flushes both before and after interprettying fields when matched.'],
189             [ 'clobber', 'Do not flush records when a field from a match would clobber an already existing field and do not flush at EOF.'],
190             [ 'keep-all', 'Do not clear any fields on a flush.'],
191             [ 'keep ', 'Do not clear this comma separated list of fields on a flush.'],
192             ];
193              
194 0           my $args_string = $this->options_string($options);
195              
196 0           return <
197             Usage: recs-frommultire []
198             __FORMAT_TEXT__
199             Match multiple regexes against each line of input (or lines of ).
200             Various parameters control when the accumulated fields are flushed to output
201             as a record and which, if any, fields are cleared when the record is
202             flushed.
203              
204             By default regexes do not necessarily flush on either side, would-be field
205             collisions cause a flush, EOF causes a flush if any fields are set, and all
206             fields are cleared on a flush.
207             __FORMAT_TEXT__
208              
209             Arguments:
210             $args_string
211              
212             __FORMAT_TEXT__
213             - Syntax is: ',=REGEX'. KEY field names are optional.
214             The key names may be key specs, see '--help-keyspecs' for more. Field
215             names may not be keygroups. If field matches \$NUM, then that match number
216             in the regex will be used as the field name
217             __FORMAT_TEXT__
218              
219             Examples:
220             Typical use case one: parse several fields on separate lines
221             recs-frommultire --re 'fname,lname=^Name: (.*) (.*)\$' --re 'addr=^Address: (.*)\$'
222             Typical use case two: some fields apply to multiple records ("department" here)
223             recs-frommultire --post 'fname,lname=^Name: (.*) (.*)\$' --re 'department=^Department: (.*)\$' --clobber --keep team
224             USAGE
225             }
226              
227             1;