| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Data::Tubes::Plugin::Parser; | 
| 2 | 14 |  |  | 14 |  | 1297 | use strict; | 
|  | 14 |  |  |  |  | 31 |  | 
|  | 14 |  |  |  |  | 453 |  | 
| 3 | 14 |  |  | 14 |  | 73 | use warnings; | 
|  | 14 |  |  |  |  | 33 |  | 
|  | 14 |  |  |  |  | 505 |  | 
| 4 | 14 |  |  | 14 |  | 76 | use English qw< -no_match_vars >; | 
|  | 14 |  |  |  |  | 27 |  | 
|  | 14 |  |  |  |  | 143 |  | 
| 5 | 14 |  |  | 14 |  | 5608 | use Data::Dumper; | 
|  | 14 |  |  |  |  | 40 |  | 
|  | 14 |  |  |  |  | 1152 |  | 
| 6 |  |  |  |  |  |  | our $VERSION = '0.738'; | 
| 7 |  |  |  |  |  |  |  | 
| 8 | 14 |  |  | 14 |  | 110 | use Log::Log4perl::Tiny qw< :easy :dead_if_first >; | 
|  | 14 |  |  |  |  | 33 |  | 
|  | 14 |  |  |  |  | 142 |  | 
| 9 |  |  |  |  |  |  |  | 
| 10 | 14 |  |  |  |  | 1370 | use Data::Tubes::Util qw< | 
| 11 |  |  |  |  |  |  | assert_all_different | 
| 12 |  |  |  |  |  |  | generalized_hashy | 
| 13 |  |  |  |  |  |  | metadata | 
| 14 |  |  |  |  |  |  | normalize_args | 
| 15 |  |  |  |  |  |  | shorter_sub_names | 
| 16 |  |  |  |  |  |  | test_all_equal | 
| 17 |  |  |  |  |  |  | trim | 
| 18 |  |  |  |  |  |  | unzip | 
| 19 | 14 |  |  | 14 |  | 5618 | >; | 
|  | 14 |  |  |  |  | 32 |  | 
| 20 | 14 |  |  | 14 |  | 4247 | use Data::Tubes::Plugin::Util qw< identify >; | 
|  | 14 |  |  |  |  | 39 |  | 
|  | 14 |  |  |  |  | 49556 |  | 
| 21 |  |  |  |  |  |  | my %global_defaults = ( | 
| 22 |  |  |  |  |  |  | input  => 'raw', | 
| 23 |  |  |  |  |  |  | output => 'structured', | 
| 24 |  |  |  |  |  |  | ); | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | sub parse_by_format { | 
| 27 | 24 |  |  | 24 | 1 | 19009 | my %args = normalize_args(@_, | 
| 28 |  |  |  |  |  |  | [{%global_defaults, name => 'parse by format'}, 'format']); | 
| 29 | 24 |  |  |  |  | 156 | identify(\%args); | 
| 30 |  |  |  |  |  |  |  | 
| 31 | 24 |  |  |  |  | 68 | my $format = $args{format}; | 
| 32 | 24 | 50 |  |  |  | 89 | LOGDIE "parser of type 'format' needs a definition" | 
| 33 |  |  |  |  |  |  | unless defined $format; | 
| 34 |  |  |  |  |  |  |  | 
| 35 | 24 |  |  |  |  | 178 | my @items = split m{(\W+)}, $format; | 
| 36 | 24 | 50 |  |  |  | 94 | return parse_single(key => $items[0]) if @items == 1; | 
| 37 |  |  |  |  |  |  |  | 
| 38 | 24 |  |  |  |  | 125 | my ($keys, $separators) = unzip(\@items); | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  | # all keys MUST be different, otherwise some fields are just trumping | 
| 41 |  |  |  |  |  |  | # on each other | 
| 42 | 24 | 50 |  |  |  | 54 | eval { assert_all_different($keys); } | 
|  | 24 |  |  |  |  | 83 |  | 
| 43 |  |  |  |  |  |  | or LOGDIE "'format' parser [$format] " | 
| 44 |  |  |  |  |  |  | . "has duplicate key $EVAL_ERROR->{message}"; | 
| 45 |  |  |  |  |  |  |  | 
| 46 | 24 |  | 100 |  |  | 119 | my $value = $args{value} //= ['whatever']; | 
| 47 | 24 | 100 |  |  |  | 70 | $value = [$value] unless ref $value; | 
| 48 | 24 |  | 100 |  |  | 197 | my $multiple = | 
| 49 |  |  |  |  |  |  | (ref($value) ne 'ARRAY') | 
| 50 |  |  |  |  |  |  | || (scalar(@$value) > 1) | 
| 51 |  |  |  |  |  |  | || ($value->[0] ne 'whatever'); | 
| 52 |  |  |  |  |  |  |  | 
| 53 | 24 | 100 | 100 |  |  | 148 | return parse_by_separators( | 
| 54 |  |  |  |  |  |  | %args, | 
| 55 |  |  |  |  |  |  | keys       => $keys, | 
| 56 |  |  |  |  |  |  | separators => $separators | 
| 57 |  |  |  |  |  |  | ) if $multiple || !test_all_equal(@$separators); | 
| 58 |  |  |  |  |  |  |  | 
| 59 |  |  |  |  |  |  | # a simple split will do if all separators are the same | 
| 60 | 13 |  |  |  |  | 75 | return parse_by_split( | 
| 61 |  |  |  |  |  |  | %args, | 
| 62 |  |  |  |  |  |  | keys      => $keys, | 
| 63 |  |  |  |  |  |  | separator => $separators->[0] | 
| 64 |  |  |  |  |  |  | ); | 
| 65 |  |  |  |  |  |  | } ## end sub parse_by_format | 
| 66 |  |  |  |  |  |  |  | 
| 67 |  |  |  |  |  |  | sub parse_by_regex { | 
| 68 | 4 |  |  | 4 | 1 | 5443 | my %args = | 
| 69 |  |  |  |  |  |  | normalize_args(@_, | 
| 70 |  |  |  |  |  |  | [{%global_defaults, name => 'parse by regex'}, 'regex']); | 
| 71 | 4 |  |  |  |  | 29 | identify(\%args); | 
| 72 |  |  |  |  |  |  |  | 
| 73 | 4 |  |  |  |  | 15 | my $name  = $args{name}; | 
| 74 | 4 |  |  |  |  | 8 | my $regex = $args{regex}; | 
| 75 | 4 | 50 |  |  |  | 15 | LOGDIE "parse_by_regex needs a regex" | 
| 76 |  |  |  |  |  |  | unless defined $regex; | 
| 77 |  |  |  |  |  |  |  | 
| 78 | 4 |  |  |  |  | 34 | $regex = qr{$regex}; | 
| 79 | 4 |  |  |  |  | 11 | my $input  = $args{input}; | 
| 80 | 4 |  |  |  |  | 10 | my $output = $args{output}; | 
| 81 |  |  |  |  |  |  | return sub { | 
| 82 | 4 |  |  | 4 |  | 28 | my $record = shift; | 
| 83 | 4 | 50 |  |  |  | 40 | $record->{$input} =~ m{$regex} | 
| 84 |  |  |  |  |  |  | or die { | 
| 85 |  |  |  |  |  |  | message => "'$name': invalid record, regex is $regex", | 
| 86 |  |  |  |  |  |  | input   => $input, | 
| 87 |  |  |  |  |  |  | record  => $record, | 
| 88 |  |  |  |  |  |  | }; | 
| 89 | 4 |  |  |  |  | 85 | my $retval = {%+}; | 
| 90 | 4 |  |  |  |  | 19 | $record->{$output} = $retval; | 
| 91 | 4 |  |  |  |  | 12 | return $record; | 
| 92 | 4 |  |  |  |  | 33 | }; | 
| 93 |  |  |  |  |  |  | } ## end sub parse_by_regex | 
| 94 |  |  |  |  |  |  |  | 
| 95 |  |  |  |  |  |  | sub _resolve_separator { | 
| 96 | 63 |  |  | 63 |  | 148 | my ($separator, $args) = @_; | 
| 97 | 63 | 50 |  |  |  | 157 | return unless defined $separator; | 
| 98 | 63 | 50 |  |  |  | 156 | $separator = $separator->($args) if ref($separator) eq 'CODE'; | 
| 99 | 63 |  |  |  |  | 108 | my $ref = ref $separator; | 
| 100 | 63 | 100 |  |  |  | 159 | return $separator if $ref eq 'Regexp'; | 
| 101 | 47 | 50 |  |  |  | 98 | LOGCROAK "$args->{name}: unknown separator type $ref" if $ref; | 
| 102 | 47 |  |  |  |  | 91 | $separator = quotemeta $separator; | 
| 103 | 47 |  |  |  |  | 552 | return qr{(?-i:$separator)}; | 
| 104 |  |  |  |  |  |  | } ## end sub _resolve_separator | 
| 105 |  |  |  |  |  |  |  | 
| 106 |  |  |  |  |  |  | sub _resolve_value { | 
| 107 | 25 |  |  | 25 |  | 63 | my ($value, $args) = @_; | 
| 108 | 25 |  | 100 |  |  | 93 | $value //= 'whatever'; | 
| 109 | 25 | 50 |  |  |  | 72 | $value = $value->($args) if ref($value) eq 'CODE'; | 
| 110 | 25 |  |  |  |  | 43 | my $ref = ref $value; | 
| 111 | 25 | 100 | 66 |  |  | 116 | ($value, $ref) = ([$value], 'ARRAY') if (!$ref) || ($ref eq 'Regexp'); | 
| 112 | 25 | 50 |  |  |  | 80 | LOGCROAK "$args->{name}: unknown value type $ref" if $ref ne 'ARRAY'; | 
| 113 |  |  |  |  |  |  |  | 
| 114 | 25 |  |  |  |  | 39 | my (%flag_for, @regexps); | 
| 115 | 25 |  |  |  |  | 64 | for my $part (@$value) { | 
| 116 | 31 |  |  |  |  | 61 | my $ref = ref $part; | 
| 117 | 31 | 50 |  |  |  | 210 | if ($ref eq 'Regexp') { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 118 | 0 |  |  |  |  | 0 | push @regexps, $part; | 
| 119 |  |  |  |  |  |  | } | 
| 120 |  |  |  |  |  |  | elsif ( | 
| 121 |  |  |  |  |  |  | $part =~ m{\A(?: | 
| 122 |  |  |  |  |  |  | (?:single|double)[-_]quoted | 
| 123 |  |  |  |  |  |  | | escaped | 
| 124 |  |  |  |  |  |  | | whatever | 
| 125 |  |  |  |  |  |  | )\z}mxs | 
| 126 |  |  |  |  |  |  | ) | 
| 127 |  |  |  |  |  |  | { | 
| 128 | 30 |  |  |  |  | 77 | $part =~ s{-}{_}mxs; | 
| 129 | 30 |  |  |  |  | 93 | $flag_for{$part} = 1; | 
| 130 |  |  |  |  |  |  | } ## end elsif ($part =~ m{\A(?: )}) | 
| 131 |  |  |  |  |  |  | elsif ($part eq 'quoted') { | 
| 132 | 0 |  |  |  |  | 0 | $flag_for{single_quoted} = 1; | 
| 133 | 0 |  |  |  |  | 0 | $flag_for{double_quoted} = 1; | 
| 134 |  |  |  |  |  |  | } | 
| 135 |  |  |  |  |  |  | elsif ($part eq 'specials') { | 
| 136 | 1 |  |  |  |  | 5 | $flag_for{single_quoted} = 1; | 
| 137 | 1 |  |  |  |  | 4 | $flag_for{double_quoted} = 1; | 
| 138 | 1 |  |  |  |  | 4 | $flag_for{escaped}       = 1; | 
| 139 |  |  |  |  |  |  | } | 
| 140 |  |  |  |  |  |  | elsif ($ref) { | 
| 141 | 0 |  |  |  |  | 0 | LOGCROAK "$args->{name}: unknown part of type $ref"; | 
| 142 |  |  |  |  |  |  | } | 
| 143 |  |  |  |  |  |  | else { | 
| 144 | 0 |  |  |  |  | 0 | LOGCROAK "$args->{name}: unknown part $part"; | 
| 145 |  |  |  |  |  |  | } | 
| 146 |  |  |  |  |  |  | } ## end for my $part (@$value) | 
| 147 |  |  |  |  |  |  |  | 
| 148 | 25 |  |  |  |  | 44 | my @escape; | 
| 149 | 25 | 100 |  |  |  | 74 | if ($flag_for{single_quoted}) { | 
| 150 | 7 |  |  |  |  | 16 | push @escape, q{'}; | 
| 151 | 7 |  |  |  |  | 17 | unshift @regexps, q{(?mxs: '[^']*' )}; | 
| 152 |  |  |  |  |  |  | } | 
| 153 | 25 | 100 |  |  |  | 61 | if ($flag_for{double_quoted}) { | 
| 154 | 3 |  |  |  |  | 8 | push @escape, q{"}; | 
| 155 | 3 |  |  |  |  | 8 | unshift @regexps, q{(?mxs: "(?: [^\\"] | \\\\.)*" )}; | 
| 156 |  |  |  |  |  |  | } | 
| 157 | 25 | 100 |  |  |  | 96 | if ($flag_for{escaped}) { | 
| 158 | 7 |  |  |  |  | 14 | push @escape, '\\'; | 
| 159 | 7 |  |  |  |  | 25 | my $escape = quotemeta join '', @escape; | 
| 160 | 7 |  |  |  |  | 20 | push @regexps, qq{(?mxs-i: (?: [^$escape] | \\\\.)*?)}; | 
| 161 |  |  |  |  |  |  | } | 
| 162 | 25 | 100 |  |  |  | 68 | if ($flag_for{whatever}) { | 
| 163 | 16 |  |  |  |  | 33 | push @regexps, qq{(?mxs:.*?)}; | 
| 164 |  |  |  |  |  |  | } | 
| 165 |  |  |  |  |  |  |  | 
| 166 | 25 |  |  |  |  | 86 | my $regex = '(' . join('|', @regexps) . ')'; | 
| 167 | 25 |  |  |  |  | 161 | return ($regex, \%flag_for); | 
| 168 |  |  |  |  |  |  | } ## end sub _resolve_value | 
| 169 |  |  |  |  |  |  |  | 
| 170 |  |  |  |  |  |  | sub _resolve_decode { | 
| 171 | 25 |  |  | 25 |  | 41 | my $args    = shift; | 
| 172 | 25 |  |  |  |  | 54 | my $name    = $args->{name}; | 
| 173 | 25 |  |  |  |  | 39 | my $escape  = $args->{escaped}; | 
| 174 | 25 |  |  |  |  | 42 | my $squote  = $args->{single_quoted}; | 
| 175 | 25 |  |  |  |  | 38 | my $dquote  = $args->{double_quoted}; | 
| 176 | 25 |  |  |  |  | 38 | my $vdecode = $args->{decode}; | 
| 177 | 25 |  |  |  |  | 44 | my $decode  = $args->{decode_values}; | 
| 178 | 25 | 50 | 100 |  |  | 169 | if ($vdecode) { | 
|  |  | 100 | 100 |  |  |  |  | 
| 179 |  |  |  |  |  |  | $decode ||= sub { | 
| 180 | 0 |  |  | 0 |  | 0 | my $values = shift; | 
| 181 | 0 |  |  |  |  | 0 | for my $value (@$values) { | 
| 182 | 0 |  |  |  |  | 0 | $value = $vdecode->($value); | 
| 183 |  |  |  |  |  |  | } | 
| 184 | 0 |  |  |  |  | 0 | return $values; | 
| 185 |  |  |  |  |  |  | } | 
| 186 | 0 |  | 0 |  |  | 0 | } ## end if ($vdecode) | 
| 187 |  |  |  |  |  |  | elsif ($escape || $squote || $dquote) { | 
| 188 |  |  |  |  |  |  | $decode ||= sub { | 
| 189 | 12 |  |  | 12 |  | 24 | my $values = shift; | 
| 190 | 12 |  |  |  |  | 40 | for my $i (0 .. $#$values) { | 
| 191 | 41 |  |  |  |  | 86 | my $value = $values->[$i]; | 
| 192 | 41 | 50 |  |  |  | 82 | my $len   = length $value or next; | 
| 193 | 41 |  |  |  |  | 73 | my $first = substr $value, 0, 1; | 
| 194 | 41 | 100 | 100 |  |  | 194 | if ($dquote && $first eq q{"}) { | 
|  |  | 100 | 100 |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 195 | 5 | 50 | 33 |  |  | 28 | die {message => "'$name': invalid record, " | 
| 196 |  |  |  |  |  |  | . "unterminated double quote at field $i (0-based)" | 
| 197 |  |  |  |  |  |  | } | 
| 198 |  |  |  |  |  |  | unless $len > 1 && substr($value, -1, 1) eq q{"}; | 
| 199 | 5 |  |  |  |  | 16 | $values->[$i] = substr $value, 1, $len - 2;    # unquote | 
| 200 | 5 |  |  |  |  | 25 | $values->[$i] =~ s{\\(.)}{$1}gmxs;             # unescape | 
| 201 |  |  |  |  |  |  | } ## end if ($dquote && $first ...) | 
| 202 |  |  |  |  |  |  | elsif ($squote && $first eq q{'}) { | 
| 203 | 11 | 50 | 33 |  |  | 59 | die {message => "'$name': invalid record, " | 
| 204 |  |  |  |  |  |  | . "unterminated single quote at field $i (0-based)", | 
| 205 |  |  |  |  |  |  | } | 
| 206 |  |  |  |  |  |  | unless $len > 1 && substr($value, -1, 1) eq q{'}; | 
| 207 | 11 |  |  |  |  | 34 | $values->[$i] = substr $value, 1, $len - 2;    # unquote | 
| 208 |  |  |  |  |  |  | } ## end elsif ($squote && $first ...) | 
| 209 |  |  |  |  |  |  | elsif ($escape) { | 
| 210 | 21 |  |  |  |  | 83 | $values->[$i] =~ s{\\(.)}{$1}gmxs;             # unescape | 
| 211 |  |  |  |  |  |  | } | 
| 212 |  |  |  |  |  |  | } ## end for my $i (0 .. $#$values) | 
| 213 | 12 |  |  |  |  | 50 | return $values; | 
| 214 |  |  |  |  |  |  | } | 
| 215 | 11 |  | 50 |  |  | 96 | } ## end elsif ($escape || $squote...) | 
| 216 | 25 |  |  |  |  | 60 | return $decode; | 
| 217 |  |  |  |  |  |  | } ## end sub _resolve_decode | 
| 218 |  |  |  |  |  |  |  | 
| 219 |  |  |  |  |  |  | sub parse_by_separators { | 
| 220 | 16 |  |  | 16 | 1 | 3214 | my %args = normalize_args(@_, | 
| 221 |  |  |  |  |  |  | [{%global_defaults, name => 'parse by separators'}, 'separators']); | 
| 222 | 16 |  |  |  |  | 91 | identify(\%args); | 
| 223 | 16 |  |  |  |  | 49 | my $name = $args{name}; | 
| 224 |  |  |  |  |  |  |  | 
| 225 | 16 |  |  |  |  | 35 | my $separators = $args{separators}; | 
| 226 | 16 | 50 |  |  |  | 55 | LOGDIE "parse_by_separators needs separators" | 
| 227 |  |  |  |  |  |  | unless defined $separators; | 
| 228 | 16 |  |  |  |  | 45 | $separators = [map { _resolve_separator($_, \%args) } @$separators]; | 
|  | 41 |  |  |  |  | 116 |  | 
| 229 |  |  |  |  |  |  |  | 
| 230 | 16 |  |  |  |  | 46 | my $keys = $args{keys}; | 
| 231 | 16 |  |  |  |  | 31 | my ($delta, $n_keys); | 
| 232 | 16 | 100 |  |  |  | 50 | if (defined $keys) { | 
| 233 | 12 |  |  |  |  | 27 | $n_keys = scalar @$keys; | 
| 234 | 12 |  |  |  |  | 28 | $delta  = $n_keys - scalar(@$separators); | 
| 235 | 12 | 50 | 33 |  |  | 70 | LOGDIE "parse_by_separators 0 <= #keys - #separators <= 1" | 
| 236 |  |  |  |  |  |  | if ($delta < 0) || ($delta > 1); | 
| 237 |  |  |  |  |  |  | } ## end if (defined $keys) | 
| 238 |  |  |  |  |  |  | else { | 
| 239 | 4 |  |  |  |  | 12 | $keys   = [0 .. scalar(@$separators)]; | 
| 240 | 4 |  |  |  |  | 6 | $n_keys = 0;                             # don't bother | 
| 241 | 4 |  |  |  |  | 7 | $delta  = 1; | 
| 242 |  |  |  |  |  |  | } | 
| 243 |  |  |  |  |  |  |  | 
| 244 | 16 |  |  |  |  | 66 | my ($value_regex, $flag_for) = _resolve_value($args{value}, \%args); | 
| 245 |  |  |  |  |  |  |  | 
| 246 | 16 |  |  |  |  | 42 | my @items; | 
| 247 | 16 |  |  |  |  | 69 | for my $i (0 .. $#$keys) { | 
| 248 | 57 |  |  |  |  | 90 | push @items, $value_regex; | 
| 249 | 57 | 100 |  |  |  | 148 | push @items, $separators->[$i] if $i <= $#$separators; | 
| 250 |  |  |  |  |  |  | } | 
| 251 |  |  |  |  |  |  |  | 
| 252 |  |  |  |  |  |  | # if not a separator, the last item becomes a catchall | 
| 253 | 16 | 50 |  |  |  | 54 | $items[-1] = '(.*)' if $delta > 0; | 
| 254 |  |  |  |  |  |  |  | 
| 255 |  |  |  |  |  |  | # ready to generate the regexp. We bind the end to \z anyway because | 
| 256 |  |  |  |  |  |  | # the last element might be a separator | 
| 257 | 16 |  |  |  |  | 53 | my $format = join '', '(?:\\A', @items, '\\z)'; | 
| 258 | 16 |  |  |  |  | 614 | my $regex = qr{$format}; | 
| 259 | 16 |  |  |  |  | 119 | DEBUG "$name: regex will be: $regex"; | 
| 260 |  |  |  |  |  |  |  | 
| 261 |  |  |  |  |  |  | # this sub will use the regexp above, do checking and return captured | 
| 262 |  |  |  |  |  |  | # values in a hash with @keys | 
| 263 | 16 |  |  |  |  | 287 | my $input  = $args{input}; | 
| 264 | 16 |  |  |  |  | 34 | my $output = $args{output}; | 
| 265 | 16 |  |  |  |  | 29 | my $trim   = $args{trim}; | 
| 266 | 16 |  |  |  |  | 138 | my $decode = _resolve_decode({%args, %$flag_for}); | 
| 267 |  |  |  |  |  |  | return sub { | 
| 268 | 16 |  |  | 16 |  | 109 | my $record = shift; | 
| 269 | 16 | 50 |  |  |  | 237 | my @values = $record->{$input} =~ m{$regex} | 
| 270 |  |  |  |  |  |  | or die { | 
| 271 |  |  |  |  |  |  | message => 'invalid record', | 
| 272 |  |  |  |  |  |  | record  => $record, | 
| 273 |  |  |  |  |  |  | regex   => $regex | 
| 274 |  |  |  |  |  |  | }; | 
| 275 | 16 | 100 |  |  |  | 119 | trim(@values) if $trim; | 
| 276 | 16 | 100 |  |  |  | 45 | if ($decode) { | 
| 277 | 7 | 50 |  |  |  | 15 | eval { @values = @{$decode->(\@values)}; 1 } or do { | 
|  | 7 |  |  |  |  | 11 |  | 
|  | 7 |  |  |  |  | 22 |  | 
|  | 7 |  |  |  |  | 23 |  | 
| 278 | 0 |  |  |  |  | 0 | my $e = $@; | 
| 279 | 0 | 0 |  |  |  | 0 | $e = {message => $e} unless ref $e; | 
| 280 | 0 | 0 |  |  |  | 0 | $e = {%$e, record => $record} if ref($e) eq 'HASH'; | 
| 281 | 0 |  |  |  |  | 0 | die $e; | 
| 282 |  |  |  |  |  |  | }; | 
| 283 |  |  |  |  |  |  | } ## end if ($decode) | 
| 284 |  |  |  |  |  |  |  | 
| 285 | 16 | 100 |  |  |  | 43 | if ($n_keys) { | 
| 286 | 12 |  |  |  |  | 22 | my $n_values = scalar @values; | 
| 287 | 12 | 50 |  |  |  | 37 | die { | 
| 288 |  |  |  |  |  |  | message => "'$name': invalid record, expected $n_keys, " | 
| 289 |  |  |  |  |  |  | . "got $n_values only", | 
| 290 |  |  |  |  |  |  | values => \@values, | 
| 291 |  |  |  |  |  |  | record => $record | 
| 292 |  |  |  |  |  |  | } | 
| 293 |  |  |  |  |  |  | if $n_values < $n_keys; | 
| 294 |  |  |  |  |  |  |  | 
| 295 | 12 |  |  |  |  | 34 | $record->{$output} = \my %retval; | 
| 296 | 12 |  |  |  |  | 63 | @retval{@$keys} = @values; | 
| 297 |  |  |  |  |  |  | } ## end if ($n_keys) | 
| 298 |  |  |  |  |  |  | else { | 
| 299 | 4 |  |  |  |  | 11 | $record->{$output} = \@values; | 
| 300 |  |  |  |  |  |  | } | 
| 301 | 16 |  |  |  |  | 57 | return $record; | 
| 302 | 16 |  |  |  |  | 232 | }; | 
| 303 |  |  |  |  |  |  | } ## end sub parse_by_separators | 
| 304 |  |  |  |  |  |  |  | 
| 305 |  |  |  |  |  |  | sub parse_by_split { | 
| 306 | 13 |  |  | 13 | 1 | 87 | my %args = | 
| 307 |  |  |  |  |  |  | normalize_args(@_, | 
| 308 |  |  |  |  |  |  | [{%global_defaults, name => 'parse by split'}, 'separator']); | 
| 309 | 13 |  |  |  |  | 75 | identify(\%args); | 
| 310 |  |  |  |  |  |  |  | 
| 311 | 13 |  |  |  |  | 60 | my $separator = _resolve_separator($args{separator}, \%args); | 
| 312 |  |  |  |  |  |  |  | 
| 313 | 13 |  |  |  |  | 36 | my $name          = $args{name}; | 
| 314 | 13 |  |  |  |  | 23 | my $keys          = $args{keys}; | 
| 315 | 13 | 50 |  |  |  | 40 | my $n_keys        = defined($keys) ? scalar(@$keys) : 0; | 
| 316 | 13 |  |  |  |  | 24 | my $input         = $args{input}; | 
| 317 | 13 |  |  |  |  | 24 | my $output        = $args{output}; | 
| 318 | 13 |  | 100 |  |  | 59 | my $allow_missing = $args{allow_missing} || 0; | 
| 319 | 13 |  |  |  |  | 34 | my $trim          = $args{trim}; | 
| 320 |  |  |  |  |  |  |  | 
| 321 |  |  |  |  |  |  | return sub { | 
| 322 | 16 |  |  | 16 |  | 62 | my $record = shift; | 
| 323 |  |  |  |  |  |  |  | 
| 324 | 16 |  |  |  |  | 87 | my @values = split(/$separator/, $record->{$input}, $n_keys); | 
| 325 | 16 | 100 |  |  |  | 59 | trim(@values) if $trim; | 
| 326 |  |  |  |  |  |  |  | 
| 327 | 16 |  |  |  |  | 30 | my $n_values = @values; | 
| 328 | 16 | 100 |  |  |  | 91 | die { | 
| 329 |  |  |  |  |  |  | message => "'$name': invalid record, expected $n_keys items, " | 
| 330 |  |  |  |  |  |  | . "got $n_values", | 
| 331 |  |  |  |  |  |  | input  => $input, | 
| 332 |  |  |  |  |  |  | record => $record, | 
| 333 |  |  |  |  |  |  | } | 
| 334 |  |  |  |  |  |  | if $n_values + $allow_missing < $n_keys; | 
| 335 |  |  |  |  |  |  |  | 
| 336 | 11 |  |  |  |  | 29 | $record->{$output} = \my %retval; | 
| 337 | 11 |  |  |  |  | 51 | @retval{@$keys} = @values; | 
| 338 | 11 |  |  |  |  | 37 | return $record; | 
| 339 |  |  |  |  |  |  | } | 
| 340 | 13 | 50 |  |  |  | 159 | if $n_keys; | 
| 341 |  |  |  |  |  |  |  | 
| 342 |  |  |  |  |  |  | return sub { | 
| 343 | 0 |  |  | 0 |  | 0 | my $record = shift; | 
| 344 | 0 |  |  |  |  | 0 | my @retval = split /$separator/, $record->{$input}; | 
| 345 | 0 | 0 |  |  |  | 0 | trim(@retval) if $trim; | 
| 346 | 0 |  |  |  |  | 0 | $record->{$output} = \@retval; | 
| 347 | 0 |  |  |  |  | 0 | return $record; | 
| 348 | 0 |  |  |  |  | 0 | }; | 
| 349 |  |  |  |  |  |  |  | 
| 350 |  |  |  |  |  |  | } ## end sub parse_by_split | 
| 351 |  |  |  |  |  |  |  | 
| 352 |  |  |  |  |  |  | sub parse_by_value_separator { | 
| 353 | 9 |  |  | 9 | 1 | 5433 | my %args = normalize_args( | 
| 354 |  |  |  |  |  |  | @_, | 
| 355 |  |  |  |  |  |  | [ | 
| 356 |  |  |  |  |  |  | {%global_defaults, name => 'parse by value and separator'}, | 
| 357 |  |  |  |  |  |  | 'separator' | 
| 358 |  |  |  |  |  |  | ] | 
| 359 |  |  |  |  |  |  | ); | 
| 360 | 9 |  |  |  |  | 45 | identify(\%args); | 
| 361 | 9 |  |  |  |  | 22 | my $name = $args{name}; | 
| 362 |  |  |  |  |  |  |  | 
| 363 | 9 |  |  |  |  | 27 | my $separator = _resolve_separator($args{separator}, \%args); | 
| 364 | 9 | 50 |  |  |  | 36 | LOGCROAK "$name: argument separator is mandatory" | 
| 365 |  |  |  |  |  |  | unless defined $separator; | 
| 366 |  |  |  |  |  |  |  | 
| 367 | 9 |  |  |  |  | 24 | my ($value, $flag_for) = _resolve_value($args{value}, \%args); | 
| 368 | 9 |  |  |  |  | 60 | my $decode = _resolve_decode({%args, %$flag_for}); | 
| 369 |  |  |  |  |  |  |  | 
| 370 | 9 |  |  |  |  | 27 | my $keys          = $args{keys}; | 
| 371 | 9 | 100 |  |  |  | 19 | my $n_keys        = defined($keys) ? scalar(@$keys) : 0; | 
| 372 | 9 |  |  |  |  | 15 | my $input         = $args{input}; | 
| 373 | 9 |  |  |  |  | 12 | my $output        = $args{output}; | 
| 374 | 9 |  | 50 |  |  | 28 | my $allow_missing = $args{allow_missing} || 0; | 
| 375 | 9 |  | 50 |  |  | 23 | my $allow_surplus = $args{allow_surplus} || 0; | 
| 376 | 9 |  |  |  |  | 12 | my $trim          = $args{trim}; | 
| 377 | 9 |  |  |  |  | 96 | my $go_global     = $^V lt v5.18.0; | 
| 378 |  |  |  |  |  |  |  | 
| 379 |  |  |  |  |  |  | return sub { | 
| 380 | 10 |  |  | 10 |  | 659 | my $record = shift; | 
| 381 |  |  |  |  |  |  |  | 
| 382 | 10 |  |  |  |  | 14 | my @values; | 
| 383 | 10 | 50 |  |  |  | 22 | if ($go_global) { | 
| 384 | 0 |  |  |  |  | 0 | local our @global_values = (); | 
| 385 | 0 |  |  |  |  | 0 | my $collector = qr/(?{push @global_values, $^N})/; | 
|  | 0 |  |  |  |  | 0 |  | 
| 386 | 0 | 0 |  |  |  | 0 | $record->{$input} =~ m/ | 
| 387 |  |  |  |  |  |  | \A (?: $value $separator $collector )* | 
| 388 |  |  |  |  |  |  | $value \z $collector | 
| 389 |  |  |  |  |  |  | /gmxs | 
| 390 |  |  |  |  |  |  | or die { | 
| 391 |  |  |  |  |  |  | message   => 'invalid record', | 
| 392 |  |  |  |  |  |  | separator => $separator, | 
| 393 |  |  |  |  |  |  | value     => $value, | 
| 394 |  |  |  |  |  |  | record    => $record, | 
| 395 |  |  |  |  |  |  | }; | 
| 396 | 0 |  |  |  |  | 0 | @values = @global_values; | 
| 397 |  |  |  |  |  |  | } | 
| 398 |  |  |  |  |  |  | else { | 
| 399 | 10 | 50 |  |  |  | 413 | $record->{$input} =~ m/ | 
| 400 | 30 |  |  |  |  | 163 | \A (?: $value $separator (?{push @values, $^N}) )* | 
| 401 | 10 |  |  |  |  | 66 | $value \z (?{push @values, $^N}) | 
| 402 |  |  |  |  |  |  | /gmxs | 
| 403 |  |  |  |  |  |  | or die { | 
| 404 |  |  |  |  |  |  | message   => 'invalid record', | 
| 405 |  |  |  |  |  |  | separator => $separator, | 
| 406 |  |  |  |  |  |  | value     => $value, | 
| 407 |  |  |  |  |  |  | record    => $record, | 
| 408 |  |  |  |  |  |  | }; | 
| 409 |  |  |  |  |  |  | } | 
| 410 | 10 | 100 |  |  |  | 48 | trim(@values) if $trim; | 
| 411 | 10 | 100 |  |  |  | 26 | if ($decode) { | 
| 412 | 5 | 50 |  |  |  | 10 | eval { @values = @{$decode->(\@values)}; 1 } or do { | 
|  | 5 |  |  |  |  | 7 |  | 
|  | 5 |  |  |  |  | 13 |  | 
|  | 5 |  |  |  |  | 15 |  | 
| 413 | 0 |  |  |  |  | 0 | my $e = $EVAL_ERROR; | 
| 414 | 0 | 0 |  |  |  | 0 | $e = {message => $e} unless ref $e; | 
| 415 | 0 | 0 |  |  |  | 0 | $e = {%$e, record => $record} if ref($e) eq 'HASH'; | 
| 416 | 0 |  |  |  |  | 0 | die $e; | 
| 417 |  |  |  |  |  |  | }; | 
| 418 |  |  |  |  |  |  | } ## end if ($decode) | 
| 419 |  |  |  |  |  |  |  | 
| 420 | 10 | 100 |  |  |  | 17 | if ($n_keys) { | 
| 421 | 6 |  |  |  |  | 10 | my $n_values = @values; | 
| 422 | 6 | 50 | 33 |  |  | 28 | die { | 
| 423 |  |  |  |  |  |  | message => "'$name': invalid record, expected $n_keys items, " | 
| 424 |  |  |  |  |  |  | . "got $n_values", | 
| 425 |  |  |  |  |  |  | input  => $input, | 
| 426 |  |  |  |  |  |  | record => $record, | 
| 427 |  |  |  |  |  |  | } | 
| 428 |  |  |  |  |  |  | if ($n_values + $allow_missing < $n_keys) | 
| 429 |  |  |  |  |  |  | || ($n_values - $allow_surplus > $n_keys); | 
| 430 | 6 |  |  |  |  | 13 | $record->{$output} = \my %retval; | 
| 431 | 6 |  |  |  |  | 31 | @retval{@$keys} = @values; | 
| 432 |  |  |  |  |  |  | } ## end if ($n_keys) | 
| 433 |  |  |  |  |  |  | else { | 
| 434 | 4 |  |  |  |  | 11 | $record->{$output} = \@values; | 
| 435 |  |  |  |  |  |  | } | 
| 436 | 10 |  |  |  |  | 31 | return $record; | 
| 437 | 9 |  |  |  |  | 83 | }; | 
| 438 |  |  |  |  |  |  | } ## end sub parse_by_value_separator | 
| 439 |  |  |  |  |  |  |  | 
| 440 |  |  |  |  |  |  | sub parse_ghashy { | 
| 441 | 3 |  |  | 3 | 1 | 3234 | my %args = normalize_args(@_, | 
| 442 |  |  |  |  |  |  | {%global_defaults, default_key => '', name => 'parse ghashy'}); | 
| 443 | 3 |  |  |  |  | 15 | identify(\%args); | 
| 444 |  |  |  |  |  |  |  | 
| 445 | 3 | 50 |  |  |  | 6 | my %defaults = %{$args{defaults} || {}}; | 
|  | 3 |  |  |  |  | 19 |  | 
| 446 | 3 |  |  |  |  | 8 | my $input    = $args{input}; | 
| 447 | 3 |  |  |  |  | 5 | my $output   = $args{output}; | 
| 448 |  |  |  |  |  |  |  | 
| 449 |  |  |  |  |  |  | # pre-compile capture thing from generalized_hashy | 
| 450 | 3 |  |  |  |  | 13 | $args{capture} = generalized_hashy(%args, text => undef)->{capture}; | 
| 451 |  |  |  |  |  |  |  | 
| 452 |  |  |  |  |  |  | return sub { | 
| 453 | 3 |  |  | 3 |  | 87 | my $record = shift; | 
| 454 | 3 |  |  |  |  | 11 | my $outcome = generalized_hashy(%args, text => $record->{$input}); | 
| 455 |  |  |  |  |  |  | die { | 
| 456 |  |  |  |  |  |  | input   => $input, | 
| 457 |  |  |  |  |  |  | message => $outcome->{failure}, | 
| 458 |  |  |  |  |  |  | outcome => $outcome, | 
| 459 |  |  |  |  |  |  | record  => $record, | 
| 460 |  |  |  |  |  |  | } | 
| 461 | 3 | 50 |  |  |  | 11 | unless exists $outcome->{hash}; | 
| 462 | 3 |  |  |  |  | 5 | $record->{$output} = {%defaults, %{$outcome->{hash}}}; | 
|  | 3 |  |  |  |  | 15 |  | 
| 463 | 3 |  |  |  |  | 19 | return $record; | 
| 464 | 3 |  |  |  |  | 20 | }; | 
| 465 |  |  |  |  |  |  | } ## end sub parse_ghashy | 
| 466 |  |  |  |  |  |  |  | 
| 467 |  |  |  |  |  |  | sub parse_hashy { | 
| 468 | 4 |  |  | 4 | 1 | 2300 | my %args = normalize_args( | 
| 469 |  |  |  |  |  |  | @_, | 
| 470 |  |  |  |  |  |  | { | 
| 471 |  |  |  |  |  |  | %global_defaults, | 
| 472 |  |  |  |  |  |  | chunks_separator    => ' ', | 
| 473 |  |  |  |  |  |  | default_key         => '', | 
| 474 |  |  |  |  |  |  | key_value_separator => '=', | 
| 475 |  |  |  |  |  |  | name                => 'parse hashy', | 
| 476 |  |  |  |  |  |  | } | 
| 477 |  |  |  |  |  |  | ); | 
| 478 | 4 |  |  |  |  | 24 | identify(\%args); | 
| 479 | 4 | 50 |  |  |  | 10 | my %defaults = %{$args{defaults} || {}}; | 
|  | 4 |  |  |  |  | 39 |  | 
| 480 | 4 |  |  |  |  | 12 | my $input    = $args{input}; | 
| 481 | 4 |  |  |  |  | 10 | my $output   = $args{output}; | 
| 482 |  |  |  |  |  |  | return sub { | 
| 483 | 3 |  |  | 3 |  | 558 | my $record = shift; | 
| 484 | 3 |  |  |  |  | 30 | my $parsed = metadata($record->{$input}, %args); | 
| 485 | 3 |  |  |  |  | 20 | $record->{$output} = {%defaults, %$parsed}; | 
| 486 | 3 |  |  |  |  | 13 | return $record; | 
| 487 | 4 |  |  |  |  | 30 | }; | 
| 488 |  |  |  |  |  |  | } ## end sub parse_hashy | 
| 489 |  |  |  |  |  |  |  | 
| 490 |  |  |  |  |  |  | sub parse_single { | 
| 491 | 2 |  |  | 2 | 1 | 2072 | my %args = normalize_args( | 
| 492 |  |  |  |  |  |  | @_, | 
| 493 |  |  |  |  |  |  | { | 
| 494 |  |  |  |  |  |  | key => 'key', | 
| 495 |  |  |  |  |  |  | %global_defaults, | 
| 496 |  |  |  |  |  |  | } | 
| 497 |  |  |  |  |  |  | ); | 
| 498 | 2 |  |  |  |  | 11 | identify(\%args); | 
| 499 | 2 |  |  |  |  | 5 | my $key     = $args{key}; | 
| 500 | 2 |  | 33 |  |  | 12 | my $has_key = defined($key) && length($key); | 
| 501 | 2 |  |  |  |  | 3 | my $input   = $args{input}; | 
| 502 | 2 |  |  |  |  | 4 | my $output  = $args{output}; | 
| 503 |  |  |  |  |  |  | return sub { | 
| 504 | 2 |  |  | 2 |  | 10 | my $record = shift; | 
| 505 |  |  |  |  |  |  | $record->{$output} = | 
| 506 | 2 | 50 |  |  |  | 9 | $has_key ? {$key => $record->{$input}} : $record->{$input}; | 
| 507 | 2 |  |  |  |  | 5 | return $record; | 
| 508 |  |  |  |  |  |  | } | 
| 509 | 2 |  |  |  |  | 14 | } ## end sub parse_single | 
| 510 |  |  |  |  |  |  |  | 
| 511 |  |  |  |  |  |  | shorter_sub_names(__PACKAGE__, 'parse_'); | 
| 512 |  |  |  |  |  |  |  | 
| 513 |  |  |  |  |  |  | 1; |