| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package App::Sets::Parser; | 
| 2 |  |  |  |  |  |  | $App::Sets::Parser::VERSION = '0.974'; | 
| 3 |  |  |  |  |  |  |  | 
| 4 |  |  |  |  |  |  |  | 
| 5 | 3 |  |  | 3 |  | 10 | use strict; | 
|  | 3 |  |  |  |  | 3 |  | 
|  | 3 |  |  |  |  | 68 |  | 
| 6 | 3 |  |  | 3 |  | 9 | use warnings; | 
|  | 3 |  |  |  |  | 3 |  | 
|  | 3 |  |  |  |  | 65 |  | 
| 7 | 3 |  |  | 3 |  | 9 | use Carp; | 
|  | 3 |  |  |  |  | 3 |  | 
|  | 3 |  |  |  |  | 155 |  | 
| 8 | 3 |  |  | 3 |  | 9 | use Log::Log4perl::Tiny qw< :easy :dead_if_first >; | 
|  | 3 |  |  |  |  | 3 |  | 
|  | 3 |  |  |  |  | 15 |  | 
| 9 |  |  |  |  |  |  |  | 
| 10 |  |  |  |  |  |  | # ABSTRACT: parse input expressions of operations on sets | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  |  | 
| 13 |  |  |  |  |  |  | sub parse { | 
| 14 | 64 |  |  | 64 | 0 | 57 | my ($string) = @_; | 
| 15 | 64 |  |  |  |  | 112 | my $retval = first($string, 0); | 
| 16 | 64 | 50 |  |  |  | 250 | my ($expression, $pos) = $retval ? @$retval : (undef, 0); | 
| 17 | 64 | 50 |  |  |  | 190 | return $expression if $pos == length $string; | 
| 18 |  |  |  |  |  |  |  | 
| 19 | 0 |  |  |  |  | 0 | my $offending = substr $string, $pos; | 
| 20 |  |  |  |  |  |  |  | 
| 21 | 0 |  |  |  |  | 0 | my ($spaces) = $offending =~ s{\A(\s+)}{}mxs; | 
| 22 | 0 |  |  |  |  | 0 | $pos += length $spaces; | 
| 23 |  |  |  |  |  |  |  | 
| 24 | 0 |  |  |  |  | 0 | my $nchars = 23; | 
| 25 | 0 | 0 |  |  |  | 0 | $offending = substr($offending, 0, $nchars - 3) . '...' | 
| 26 |  |  |  |  |  |  | if length($offending) > $nchars; | 
| 27 |  |  |  |  |  |  |  | 
| 28 | 0 |  |  |  |  | 0 | LOGDIE "parse error at char $pos --> $offending\n",; | 
| 29 |  |  |  |  |  |  | } ## end sub parse | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  | sub lrx_head { | 
| 32 | 252 |  |  | 252 | 0 | 305 | my $sequence = _sequence(@_); | 
| 33 |  |  |  |  |  |  | return sub { | 
| 34 | 252 | 50 |  | 252 |  | 269 | my $retval = $sequence->(@_) | 
| 35 |  |  |  |  |  |  | or return; | 
| 36 | 252 |  |  |  |  | 236 | my ($struct, $pos) = @$retval; | 
| 37 | 252 |  |  |  |  | 192 | my ($second, $first_tail) = @{$struct}[1, 3]; | 
|  | 252 |  |  |  |  | 297 |  | 
| 38 | 252 | 100 |  |  |  | 346 | if (defined $first_tail->[0]) { | 
| 39 | 64 |  |  |  |  | 39 | my ($root, $parent) = @{$first_tail->[0]}; | 
|  | 64 |  |  |  |  | 69 |  | 
| 40 | 64 |  |  |  |  | 56 | $parent->[1] = $second->[0]; | 
| 41 | 64 |  |  |  |  | 64 | $struct = $root; | 
| 42 |  |  |  |  |  |  | } | 
| 43 |  |  |  |  |  |  | else { | 
| 44 | 188 |  |  |  |  | 167 | $struct = $second->[0]; | 
| 45 |  |  |  |  |  |  | } | 
| 46 | 252 |  |  |  |  | 642 | return [$struct, $pos]; | 
| 47 |  |  |  |  |  |  | } | 
| 48 | 252 |  |  |  |  | 658 | } ## end sub lrx_head | 
| 49 |  |  |  |  |  |  |  | 
| 50 |  |  |  |  |  |  | sub lrx_tail { | 
| 51 | 316 |  |  | 316 | 0 | 352 | my $sequence = _sequence('optws', _alternation(_sequence(@_), 'empty')); | 
| 52 |  |  |  |  |  |  | return sub { | 
| 53 | 316 | 50 |  | 316 |  | 328 | my $retval = $sequence->(@_) | 
| 54 |  |  |  |  |  |  | or return; | 
| 55 | 316 |  |  |  |  | 301 | my ($struct, $pos) = @$retval; | 
| 56 | 316 |  |  |  |  | 254 | $retval = $struct->[1]; | 
| 57 | 316 | 100 |  |  |  | 472 | if (!defined $retval->[0]) { | 
| 58 | 252 |  |  |  |  | 210 | $retval = undef; | 
| 59 |  |  |  |  |  |  | } | 
| 60 |  |  |  |  |  |  | else {    # not empty | 
| 61 | 64 |  |  |  |  | 44 | my ($op, $second, $tail) = @{$retval->[0]}[0, 2, 4]; | 
|  | 64 |  |  |  |  | 106 |  | 
| 62 | 64 |  |  |  |  | 83 | my $node = [$op->[0], undef, $second->[0]]; | 
| 63 | 64 | 50 |  |  |  | 72 | if (defined $tail->[0]) { | 
| 64 | 0 |  |  |  |  | 0 | my ($root, $parent) = @{$tail->[0]}; | 
|  | 0 |  |  |  |  | 0 |  | 
| 65 | 0 |  |  |  |  | 0 | $parent->[1] = $node;    # link leaf to parent node | 
| 66 | 0 |  |  |  |  | 0 | $retval = [$root, $node]; | 
| 67 |  |  |  |  |  |  | } | 
| 68 |  |  |  |  |  |  | else { | 
| 69 | 64 |  |  |  |  | 78 | $retval = [$node, $node]; | 
| 70 |  |  |  |  |  |  | } | 
| 71 |  |  |  |  |  |  | } ## end else [ if (!defined $retval->... | 
| 72 | 316 |  |  |  |  | 1105 | return [$retval, $pos]; | 
| 73 |  |  |  |  |  |  | } | 
| 74 | 316 |  |  |  |  | 820 | } ## end sub lrx_tail | 
| 75 |  |  |  |  |  |  |  | 
| 76 |  |  |  |  |  |  | sub first { | 
| 77 | 64 |  |  | 64 | 0 | 96 | return lrx_head(qw< optws second optws first_tail optws >)->(@_); | 
| 78 |  |  |  |  |  |  | } | 
| 79 |  |  |  |  |  |  |  | 
| 80 |  |  |  |  |  |  | sub first_tail { | 
| 81 | 80 |  |  | 80 | 0 | 120 | return lrx_tail(qw< op_subtract optws second optws first_tail optws >) | 
| 82 |  |  |  |  |  |  | ->(@_); | 
| 83 |  |  |  |  |  |  | } | 
| 84 |  |  |  |  |  |  |  | 
| 85 |  |  |  |  |  |  | sub second { | 
| 86 | 80 |  |  | 80 | 0 | 101 | return lrx_head(qw< optws third optws second_tail optws >)->(@_); | 
| 87 |  |  |  |  |  |  | } | 
| 88 |  |  |  |  |  |  |  | 
| 89 |  |  |  |  |  |  | sub second_tail { | 
| 90 | 108 |  |  | 108 | 0 | 142 | return lrx_tail(qw< op_union optws third optws second_tail optws >) | 
| 91 |  |  |  |  |  |  | ->(@_); | 
| 92 |  |  |  |  |  |  | } | 
| 93 |  |  |  |  |  |  |  | 
| 94 |  |  |  |  |  |  | sub third { | 
| 95 | 108 |  |  | 108 | 0 | 140 | return lrx_head(qw< optws fourth optws third_tail optws >)->(@_); | 
| 96 |  |  |  |  |  |  | } | 
| 97 |  |  |  |  |  |  |  | 
| 98 |  |  |  |  |  |  | sub third_tail { | 
| 99 | 128 |  |  | 128 | 0 | 185 | return lrx_tail(qw< op_intersect optws fourth optws third_tail optws >) | 
| 100 |  |  |  |  |  |  | ->(@_); | 
| 101 |  |  |  |  |  |  | } | 
| 102 |  |  |  |  |  |  |  | 
| 103 |  |  |  |  |  |  | sub fourth { | 
| 104 | 128 | 50 |  | 128 | 0 | 191 | my $retval = _sequence( | 
| 105 |  |  |  |  |  |  | 'optws', | 
| 106 |  |  |  |  |  |  | _alternation( | 
| 107 |  |  |  |  |  |  | _sequence(_string('('), qw< optws first optws >, _string(')')), | 
| 108 |  |  |  |  |  |  | 'filename', | 
| 109 |  |  |  |  |  |  | ), | 
| 110 |  |  |  |  |  |  | 'optws' | 
| 111 |  |  |  |  |  |  | )->(@_) | 
| 112 |  |  |  |  |  |  | or return; | 
| 113 | 128 |  |  |  |  | 919 | my ($struct, $pos) = @$retval; | 
| 114 | 128 |  |  |  |  | 144 | my $meat = $struct->[1]; | 
| 115 | 128 | 50 |  |  |  | 152 | if (ref($meat->[0])) { | 
| 116 | 0 |  |  |  |  | 0 | $retval = $meat->[0][2][0]; | 
| 117 |  |  |  |  |  |  | } | 
| 118 |  |  |  |  |  |  | else { | 
| 119 | 128 |  |  |  |  | 117 | $retval = $meat->[0]; | 
| 120 |  |  |  |  |  |  | } | 
| 121 | 128 |  |  |  |  | 329 | return [$retval, $pos]; | 
| 122 |  |  |  |  |  |  | } ## end sub fourth | 
| 123 |  |  |  |  |  |  |  | 
| 124 |  |  |  |  |  |  | sub _op { | 
| 125 | 316 |  |  | 316 |  | 331 | my ($regex, $retval, $string, $pos) = @_; | 
| 126 | 316 |  |  |  |  | 308 | pos($string) = $pos; | 
| 127 | 316 | 100 |  |  |  | 6555 | return unless $string =~ m{\G($regex)}cgmxs; | 
| 128 | 64 |  |  |  |  | 301 | return [$retval, pos($string)]; | 
| 129 |  |  |  |  |  |  | } ## end sub _op | 
| 130 |  |  |  |  |  |  |  | 
| 131 |  |  |  |  |  |  | sub op_intersect { | 
| 132 | 128 |  |  | 128 | 0 | 392 | return _op(qr{(?:intersect|[iI&^])}, 'intersect', @_); | 
| 133 |  |  |  |  |  |  | } | 
| 134 |  |  |  |  |  |  |  | 
| 135 |  |  |  |  |  |  | sub op_union { | 
| 136 | 108 |  |  | 108 | 0 | 217 | return _op(qr{(?:union|[uUvV|+])}, 'union', @_); | 
| 137 |  |  |  |  |  |  | } | 
| 138 |  |  |  |  |  |  |  | 
| 139 |  |  |  |  |  |  | sub op_subtract { | 
| 140 | 80 |  |  | 80 | 0 | 160 | return _op(qr{(?:minus|less|[\\-])}, 'minus', @_); | 
| 141 |  |  |  |  |  |  | } | 
| 142 |  |  |  |  |  |  |  | 
| 143 |  |  |  |  |  |  | sub filename { | 
| 144 | 128 |  |  | 128 | 0 | 120 | my ($string, $pos) = @_; | 
| 145 | 128 |  |  |  |  | 337 | DEBUG "filename() >$string< $pos"; | 
| 146 | 128 |  |  |  |  | 1187 | pos($string) = $pos; | 
| 147 | 128 |  |  |  |  | 122 | my $retval; | 
| 148 | 128 | 50 |  |  |  | 1280 | if (($retval) = $string =~ m{\G ' ( [^']+ ) '}cgmxs) { | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 149 | 0 |  |  |  |  | 0 | return [$retval, pos($string)]; | 
| 150 |  |  |  |  |  |  | } | 
| 151 |  |  |  |  |  |  | elsif (($retval) = $string =~ m{\G " ( (?: \\. | [^"])+ ) "}cgmxs) { | 
| 152 | 0 |  |  |  |  | 0 | $retval =~ s{\\(.)}{$1}gmxs; | 
| 153 | 0 |  |  |  |  | 0 | return [$retval, pos($string)]; | 
| 154 |  |  |  |  |  |  | } | 
| 155 |  |  |  |  |  |  | elsif (($retval) = $string =~ m{\G ( (?: \\. | [\w.-/])+ )}cgmxs) { | 
| 156 | 128 |  |  |  |  | 531 | $retval =~ s{\\(.)}{$1}gmxs; | 
| 157 | 128 |  |  |  |  | 373 | return [$retval, pos($string)]; | 
| 158 |  |  |  |  |  |  | } | 
| 159 | 0 |  |  |  |  | 0 | return; | 
| 160 |  |  |  |  |  |  | } ## end sub filename | 
| 161 |  |  |  |  |  |  |  | 
| 162 |  |  |  |  |  |  | sub empty { | 
| 163 | 252 |  |  | 252 | 0 | 236 | my ($string, $pos) = @_; | 
| 164 | 252 |  |  |  |  | 466 | return [undef, $pos]; | 
| 165 |  |  |  |  |  |  | } | 
| 166 |  |  |  |  |  |  |  | 
| 167 |  |  |  |  |  |  | sub is_empty { | 
| 168 | 0 |  |  | 0 | 0 | 0 | my ($struct) = @_; | 
| 169 | 0 |  |  |  |  | 0 | return @{$struct->[0]} > 0; | 
|  | 0 |  |  |  |  | 0 |  | 
| 170 |  |  |  |  |  |  | } | 
| 171 |  |  |  |  |  |  |  | 
| 172 |  |  |  |  |  |  | sub ws { | 
| 173 | 0 |  |  | 0 | 0 | 0 | my ($string, $pos) = @_; | 
| 174 | 0 |  |  |  |  | 0 | pos($string) = $pos; | 
| 175 | 0 | 0 |  |  |  | 0 | my ($retval) = $string =~ m{\G (\s+)}cgmxs | 
| 176 |  |  |  |  |  |  | or return; | 
| 177 | 0 |  |  |  |  | 0 | return [$retval, pos($string)]; | 
| 178 |  |  |  |  |  |  | } ## end sub ws | 
| 179 |  |  |  |  |  |  |  | 
| 180 |  |  |  |  |  |  | sub optws { | 
| 181 | 1520 |  |  | 1520 | 0 | 1093 | my ($string, $pos) = @_; | 
| 182 | 1520 |  |  |  |  | 1592 | pos($string) = $pos; | 
| 183 | 1520 |  |  |  |  | 3642 | my ($retval) = $string =~ m{\G (\s*)}cgmxs; | 
| 184 | 1520 |  | 100 |  |  | 3917 | $retval = [$retval || '', pos($string)]; | 
| 185 | 1520 |  |  |  |  | 2477 | return $retval; | 
| 186 |  |  |  |  |  |  | } ## end sub optws | 
| 187 |  |  |  |  |  |  |  | 
| 188 |  |  |  |  |  |  | sub _string { | 
| 189 | 256 |  |  | 256 |  | 248 | my ($target) = @_; | 
| 190 | 256 |  |  |  |  | 178 | my $len = length $target; | 
| 191 |  |  |  |  |  |  | return sub { | 
| 192 | 128 |  |  | 128 |  | 92 | my ($string, $pos) = @_; | 
| 193 | 128 | 50 |  |  |  | 545 | return unless substr($string, $pos, $len) eq $target; | 
| 194 | 0 |  |  |  |  | 0 | return [$target, $pos + $len]; | 
| 195 |  |  |  |  |  |  | } | 
| 196 | 256 |  |  |  |  | 567 | } ## end sub _string | 
| 197 |  |  |  |  |  |  |  | 
| 198 |  |  |  |  |  |  | sub _alternation { | 
| 199 | 444 |  |  | 444 |  | 497 | my @subs = _resolve(@_); | 
| 200 |  |  |  |  |  |  | return sub { | 
| 201 | 444 |  |  | 444 |  | 353 | my ($string, $pos) = @_; | 
| 202 | 444 |  |  |  |  | 364 | for my $sub (@subs) { | 
| 203 | 824 |  | 100 |  |  | 867 | my $retval = $sub->($string, $pos) || next; | 
| 204 | 444 |  |  |  |  | 753 | return $retval; | 
| 205 |  |  |  |  |  |  | } | 
| 206 | 0 |  |  |  |  | 0 | return; | 
| 207 | 444 |  |  |  |  | 1048 | }; | 
| 208 |  |  |  |  |  |  | } ## end sub _alternation | 
| 209 |  |  |  |  |  |  |  | 
| 210 |  |  |  |  |  |  | sub _sequence { | 
| 211 | 1140 |  |  | 1140 |  | 1143 | my @subs = _resolve(@_); | 
| 212 |  |  |  |  |  |  | return sub { | 
| 213 | 1140 |  |  | 1140 |  | 895 | my ($string, $pos) = @_; | 
| 214 | 1140 |  |  |  |  | 642 | my @chunks; | 
| 215 | 1140 |  |  |  |  | 978 | for my $sub (@subs) { | 
| 216 | 3040 | 100 |  |  |  | 2876 | my $chunk = $sub->($string, $pos) | 
| 217 |  |  |  |  |  |  | or return; | 
| 218 | 2660 |  |  |  |  | 4077 | push @chunks, $chunk; | 
| 219 | 2660 |  |  |  |  | 2494 | $pos = $chunk->[1]; | 
| 220 |  |  |  |  |  |  | } ## end for my $sub (@subs) | 
| 221 | 760 |  |  |  |  | 1355 | return [\@chunks, $pos]; | 
| 222 | 1140 |  |  |  |  | 2729 | }; | 
| 223 |  |  |  |  |  |  | } ## end sub _sequence | 
| 224 |  |  |  |  |  |  |  | 
| 225 |  |  |  |  |  |  | sub _resolve { | 
| 226 |  |  |  |  |  |  | return | 
| 227 | 1584 | 100 | 33 | 1584 |  | 1166 | map { ref $_ ? $_ : __PACKAGE__->can($_) || LOGDIE "unknown $_" } @_; | 
|  | 5700 |  |  |  |  | 13171 |  | 
| 228 |  |  |  |  |  |  | } | 
| 229 |  |  |  |  |  |  |  | 
| 230 |  |  |  |  |  |  | 1; | 
| 231 |  |  |  |  |  |  |  | 
| 232 |  |  |  |  |  |  | __END__ |