File Coverage

blib/lib/Apache2/Expression.pm
Criterion Covered Total %
statement 373 597 62.4
branch 118 232 50.8
condition 109 244 44.6
subroutine 13 37 35.1
pod 5 6 83.3
total 618 1116 55.3


line stmt bran cond sub pod time code
1             ##----------------------------------------------------------------------------
2             ## Apache2 Server Side Include Parser - ~/lib/Apache2/Expression.pm
3             ## Version v0.1.0
4             ## Copyright(c) 2021 DEGUEST Pte. Ltd.
5             ## Author: Jacques Deguest <jack@deguest.jp>
6             ## Created 2021/02/20
7             ## Modified 2021/02/20
8             ## All rights reserved
9             ##
10             ## This program is free software; you can redistribute it and/or modify it
11             ## under the same terms as Perl itself.
12             ##----------------------------------------------------------------------------
13             package Apache2::Expression;
14             BEGIN
15             {
16 14     14   108 use strict;
  14         30  
  14         479  
17 14     14   68 use warnings;
  14         91  
  14         464  
18 14     14   83 use warnings::register;
  14         24  
  14         2039  
19 14     14   87 use parent qw( Module::Generic );
  14         27  
  14         85  
20 14     14   860 use Regexp::Common qw( Apache2 );
  14         28  
  14         146  
21 14     14   11507876 use PPI;
  14         33  
  14         499  
22 14     14   91008 our $VERSION = 'v0.1.0';
23             };
24              
25             sub init
26             {
27 38     38 1 1430 my $self = shift( @_ );
28 38         323 $self->{legacy} = 0;
29 38         106 $self->{trunk} = 0;
30 38         193 $self->SUPER::init( @_ );
31 38         1515 return( $self );
32             }
33              
34 285     285 1 4415 sub legacy { return( shift->_set_get_boolean( 'legacy', @_ ) ); }
35              
36             sub parse
37             {
38 247     247 1 473 my $self = shift( @_ );
39 247         403 my $data = shift( @_ );
40 247 50       735 return( '' ) if( !length( $data ) );
41 247         435 my $opts = {};
42 247 100       577 if( @_ )
43             {
44 77 50       596 $opts = ref( $_[0] ) eq 'HASH'
    50          
45             ? shift( @_ )
46             : !( @_ % 2 )
47             ? { @_ }
48             : {};
49             }
50 247         635 pos( $data ) = 0;
51 247 0       747 my $prefix = $self->legacy ? 'Legacy' : $self->trunk ? 'Trunk' : '';
    50          
52 247         9815 $self->message( 3, "Using prefix '$prefix'." );
53 247         4918 my @callinfo = caller(0);
54 247         625 $opts->{top} = 0;
55 247 100 33     2591 $opts->{top} = 1 if( $callinfo[0] ne ref( $self ) || ( $callinfo[0] eq ref( $self ) && substr( (caller(1))[3], rindex( (caller(1))[3], ':' ) + 1 ) ne 'parse' ) );
      66        
56 247         2478 $self->message( 3, "Parsing expression '$data'. Called from file $callinfo[1] at line $callinfo[2] and from sub ", (caller(1))[3], ". top is set to '$opts->{top}'" );
57             ## This is used to avoid looping when an expression drills down its substring by calling parse again
58 247         3721 my $skip = {};
59 247 50 100     787 if( ref( $opts->{skip} ) eq 'ARRAY' &&
60 77         308 scalar( @{$opts->{skip}} ) )
61             {
62 77         133 @$skip{ @{$opts->{skip}} } = ( 1 ) x scalar( @{$opts->{skip}} )
  77         235  
  77         209  
63             }
64 247     0   1321 $self->message( 3, "Skip contains: ", sub{ $self->dump( $skip ) });
  0         0  
65 247         3711 my $p = {};
66 247         518 $p->{is_negative} = 0;
67 247         414 my $elems = [];
68 247         784 my $hash =
69             {
70             raw => $data,
71             elements => $elems,
72             };
73 247         358 my $looping = 0;
74             PARSE:
75             {
76 247         343 my $pos = pos( $data );
  470         977  
77 470 100       1881 $self->message( 3, "Nothing more to parse at pos '$pos'. End of data." ), last PARSE if( pos( $data ) == length( $data ) );
78 247         1141 $self->message( 3, "Parsing ", length( $data ), " bytes of data starting at '$pos' in string '$data'" );
79 247 50 66     4047 if( $data =~ m/\G\r?\n$/ )
    100 66        
    100 66        
    100 66        
    100 66        
    100 66        
    50 66        
    50 66        
    50 66        
    100 66        
    50 66        
    100 66        
    50 100        
    50 66        
    100 66        
    100 33        
    50 0        
      33        
      0        
      33        
      0        
      33        
      0        
      33        
      0        
      33        
      0        
      66        
      66        
      66        
      33        
      33        
      33        
      66        
      66        
      66        
      33        
      0        
      33        
      0        
      33        
      0        
      33        
      0        
      66        
      66        
      100        
      66        
      100        
      66        
      66        
      33        
      33        
      33        
80             {
81 0         0 $self->message( 3, "End of string new line detected, skipping." );
82 0         0 redo PARSE;
83             }
84             elsif( $self->message( 3, "Trying with legacy variable." ) &&
85             $data =~ /\A\G$RE{Apache2}{LegacyVariable}\Z/gmcs &&
86             length( $+{variable} ) )
87             {
88 36         5848 my $re = { %+ };
89 36         191 $self->message( 3, "Got here in legacy variable." );
90 36         697 $self->whereami( \$data, pos( $data ) );
91 36     0   815 $self->message( 3, "variable => capture groups for $+{variable} are: ", sub{ $self->dump( $re ) });
  0         0  
92             my $def =
93             {
94             elements => [],
95             type => 'variable',
96             raw => $re->{variable},
97 36         655 re => $re,
98             };
99 36 100       145 if( length( $re->{var_func_name} ) )
    100          
    50          
100             {
101 1         3 $def->{subtype} = 'function';
102 1         2 $def->{name} = $re->{var_func_name};
103 1         3 $def->{args} = $re->{var_func_args};
104 1 50       4 if( length( $def->{args} ) )
105             {
106 1         5 my @argv = $self->parse_args( $def->{args} );
107 1         44 $def->{args_def} = [];
108 1         3 foreach my $this ( @argv )
109             {
110 1         3 my $this = $self->parse( $this );
111 1         1 push( @{$def->{elements}}, @{$this->{elements}} );
  1         3  
  1         2  
112 1         1 push( @{$def->{args_def}}, @{$this->{elements}} );
  1         2  
  1         4  
113             }
114             }
115             }
116             elsif( length( $re->{varname} ) )
117             {
118 34         74 $def->{subtype} = 'variable';
119 34         85 $def->{name} = $re->{varname};
120             }
121             elsif( length( $re->{rebackref} ) )
122             {
123 1         3 $def->{subtype} = 'rebackref';
124 1         3 $def->{value} = $re->{rebackref};
125             }
126 36         73 push( @$elems, $def );
127 36         281 redo PARSE;
128             }
129             elsif( !$skip->{cond} &&
130             $self->message( 3, "Trying with condition." ) &&
131             ( ( $pos == 0 && $data =~ /\A$RE{Apache2}{"${prefix}Cond"}\Z/gms ) || $data =~ /\G$RE{Apache2}{"${prefix}Cond"}/gmcs ) &&
132             length( $+{cond} ) )
133             {
134 48         52984 my $re = { %+ };
135 48         278 $self->message( 3, "Got here in condition." );
136 48         1066 $self->whereami( \$data, pos( $data ) );
137 48     0   1270 $self->message( 3, "condition => capture groups for '$+{cond}' in data '$data' are: ", sub{ $self->dump( $re ) });
  0         0  
138             my $def =
139             {
140             elements => [],
141             type => 'cond',
142             raw => $re->{cond},
143 48         968 re => $re,
144             };
145 48 100       145 $def->{is_negative} = 1 if( $re->{cond_neg} );
146 48 50 66     173 $p->{is_negative} = 1 if( $re->{cond_neg} && $opts->{top} );
147 48 50 66     481 if( length( $re->{cond_variable} ) )
    100 66        
    100          
    100          
    100          
    50          
148             {
149 0         0 $def->{subtype} = 'variable';
150 0         0 $def->{variable_def} = [];
151             ## Avoid looping
152 0 0       0 unless( $re->{cond_variable} eq $data )
153             {
154 0         0 my $this = $self->parse( $re->{cond_variable} );
155 0         0 $def->{elements} = $this->{elements};
156 0         0 $def->{variable_def} = $this->{elements};
157             }
158             }
159             elsif( length( $re->{cond_parenthesis} ) )
160             {
161 1         3 $def->{subtype} = 'parenthesis';
162 1         3 $def->{parenthesis_def} = [];
163 1 50       4 unless( $re->{cond_parenthesis} eq $data )
164             {
165 1         40 my $this = $self->parse( $re->{cond_parenthesis} );
166 1         2 $def->{elements} = $this->{elements};
167 1         3 $def->{parenthesis_def} = $this->{elements};
168             }
169             }
170             elsif( length( $re->{cond_neg} ) )
171             {
172 7         21 $def->{subtype} = 'negative';
173 7         26 $def->{negative_def} = [];
174 7 50       29 if( length( $re->{cond_expr} ) )
175             {
176 7         78 my $this = $self->parse( $re->{cond_expr} );
177 7         22 $def->{elements} = $this->{elements};
178 7         16 $def->{negative_def} = $this->{elements};
179             }
180             }
181             elsif( length( $re->{cond_and} ) || length( $re->{cond_or} ) )
182             {
183 1 50       5 $def->{subtype} = length( $re->{cond_and} ) ? 'and' : 'or';
184 1         5 $def->{ $def->{subtype} . '_def' } = [];
185 1 50       4 $def->{expr1} = length( $re->{cond_and_expr1} ) ? $re->{cond_and_expr1} : $re->{cond_or_expr1};
186 1 50       5 $def->{expr2} = length( $re->{cond_and_expr2} ) ? $re->{cond_and_expr2} : $re->{cond_or_expr2};
187 1         5 my $this1 = $self->parse( $def->{expr1} );
188 1         26 my $this2 = $self->parse( $def->{expr2} );
189 1         12 $def->{elements} = [ @{$this1->{elements}}, @{$this2->{elements}} ];
  1         2  
  1         5  
190 1         2 $def->{ $def->{subtype} . '_def' } = [ @{$this1->{elements}}, @{$this2->{elements}} ];
  1         2  
  1         4  
191 1         2 $def->{ $def->{subtype} . '_def_expr1' } = [ @{$this1->{elements}} ];
  1         6  
192 1         2 $def->{ $def->{subtype} . '_def_expr2' } = [ @{$this2->{elements}} ];
  1         5  
193             }
194             elsif( length( $re->{cond_comp} ) )
195             {
196 30         94 $def->{subtype} = 'comp';
197 30         69 $def->{comp_def} = [];
198 30         61 my $chunk = $re->{cond_comp};
199 30         255 my $this = $self->parse( $chunk, skip => [qw( cond )] );
200 30         133 $def->{elements} = $this->{elements};
201 30         103 $def->{comp_def} = $this->{elements};
202             }
203             ## e.g. when the condition is just true or false
204             elsif( length( $re->{cond_true} ) || length( $re->{cond_false} ) )
205             {
206 9         19 $def->{subtype} = 'boolean';
207 9 100       20 $def->{boolval} = length( $re->{cond_true} ) ? 1 : 0;
208 9 100       26 $def->{booltype} = length( $re->{cond_true} ) ? 'true' : 'false';
209 9 100       52 $def->{value} = length( $re->{cond_true} ) ? $re->{cond_true} : $re->{cond_false};
210             }
211             else
212             {
213 0         0 $def->{subtype} = 'cond';
214             }
215 48         110 my $chunk = $re->{cond};
216 48 50       190 push( @$elems, $def ) if( length( $re->{cond} ) );
217 48         2482 redo PARSE;
218             }
219             elsif( $self->message( 3, "Trying with string comparison." ) &&
220             ( ( $pos == 0 && $data =~ /\A$RE{Apache2}{"${prefix}StringComp"}\Z/gms ) || $data =~ /\G$RE{Apache2}{"${prefix}StringComp"}/gmcs ) &&
221             length( $+{stringcomp} ) )
222             {
223 13         13334 my $re = { %+ };
224 13         103 $self->message( 3, "Got here in string comp." );
225 13         356 $self->whereami( \$data, pos( $data ) );
226 13     0   353 $self->message( 3, "stringcomp => capture groups for $+{stringcomp} are: ", sub{ $self->dump( $re ) });
  0         0  
227             my $def =
228             {
229             elements => [],
230             type => 'stringcomp',
231             raw => $re->{stringcomp},
232             re => $re,
233             op => $re->{stringcomp_op},
234             worda => $re->{stringcomp_worda},
235             wordb => $re->{stringcomp_wordb},
236 13         334 };
237 13         53 $def->{worda_def} = [];
238 13         37 $def->{wordb_def} = [];
239 13 50       56 if( length( $def->{worda} ) )
240             {
241 13         173 my $this = $self->parse( $def->{worda} );
242 13         28 push( @{$def->{elements}}, @{$this->{elements}} );
  13         81  
  13         45  
243 13         54 $def->{worda_def} = $this->{elements};
244             }
245 13 50       75 if( length( $def->{wordb} ) )
246             {
247 13         51 my $this = $self->parse( $def->{wordb} );
248 13         36 push( @{$def->{elements}}, @{$this->{elements}} );
  13         51  
  13         52  
249 13         58 $def->{wordb_def} = $this->{elements};
250             }
251 13         62 push( @$elems, $def );
252 13         447 redo PARSE;
253             }
254             elsif( $self->message( 3, "Trying with integer comparison." ) &&
255             ( ( $pos == 0 && $data =~ /\A$RE{Apache2}{"${prefix}IntegerComp"}\Z/gms ) || $data =~ /\G$RE{Apache2}{"${prefix}IntegerComp"}/gmcs ) &&
256             length( $+{integercomp} ) )
257             {
258 3         3249 my $re = { %+ };
259 3         21 $self->message( 3, "Got here in integer comp." );
260 3         64 $self->whereami( \$data, pos( $data ) );
261 3     0   100 $self->message( 3, "integercomp => capture groups for $+{integercomp} are: ", sub{ $self->dump( $re ) });
  0         0  
262             my $def =
263             {
264             elements => [],
265             type => 'integercomp',
266             raw => $re->{integercomp},
267             re => $re,
268             op => $re->{integercomp_op},
269             worda => $re->{integercomp_worda},
270             wordb => $re->{integercomp_wordb},
271 3         78 };
272 3 50       13 if( length( $re->{integercomp_worda} ) )
273             {
274 3         12 my $this = $self->parse( $re->{integercomp_worda} );
275 3         8 push( @{$def->{elements}}, @{$this->{elements}} );
  3         7  
  3         7  
276 3         11 $def->{worda_def} = $this->{elements};
277             }
278 3 50       17 if( length( $re->{integercomp_wordb} ) )
279             {
280 3         12 my $this = $self->parse( $re->{integercomp_wordb} );
281 3         20 push( @{$def->{elements}}, @{$this->{elements}} );
  3         7  
  3         7  
282 3         7 $def->{wordb_def} = $this->{elements};
283             }
284 3         7 push( @$elems, $def );
285 3         58 redo PARSE;
286             }
287             elsif( $self->message( 3, "Trying with general comparison." ) &&
288             ( ( $pos == 0 && $data =~ /\A$RE{Apache2}{"${prefix}Comp"}\Z/gms ) ||
289             ( $pos > 0 && $data =~ /\G$RE{Apache2}{"${prefix}Comp"}/gmcs ) ) &&
290             length( $+{comp} ) )
291             {
292              
293             # elsif( $self->message( 3, "Trying with general comparison." ) &&
294             # ( $pos == 0 && $data =~ /\A$RE{Apache2}{"${prefix}Comp"}\Z/gms ) &&
295             # length( $+{comp} ) )
296              
297             # elsif( $self->message( 3, "Trying with general comparison." ) &&
298             # ( $data =~ /\G$RE{Apache2}{"${prefix}Comp"}/gmcs ) &&
299             # length( $+{comp} ) )
300             # {
301 24         122645 my $re = { %+ };
302 24         115 my $cur_pos = pos( $data );
303 24         109 $self->message( 3, "Got here in comp." );
304 24         492 $self->whereami( \$data, $cur_pos );
305 24     0   601 $self->message( 3, "comparison => capture groups for $+{comp} are: ", sub{ $self->dump( $re ) });
  0         0  
306             ## next PARSE unless( length( $re->{comp} ) );
307             my $def =
308             {
309             elements => [],
310             type => 'comp',
311             raw => $re->{comp},
312 24         468 re => $re,
313             };
314 24         37 my $this;
315 24         47 my $chunk = $re->{comp};
316 24 100 33     185 if( length( $re->{comp_unary} ) )
    100          
    100          
    50          
    0          
317             {
318 12         42 $self->message( 3, "Found unary operation." );
319 12         164 $def->{subtype} = 'unary';
320 12         36 $def->{op} = $re->{comp_unaryop};
321 12         28 $def->{word} = $re->{comp_word};
322 12         34 $def->{word_def} = [];
323 12 50       43 if( length( $def->{word} ) )
324             {
325 12         104 my $this = $self->parse( $def->{word} );
326 12         33 push( @{$def->{elements}}, @{$this->{elements}} );
  12         26  
  12         25  
327 12         34 $def->{word_def} = $this->{elements};
328             }
329             }
330             elsif( length( $re->{comp_binary} ) )
331             {
332 4         21 $self->message( 3, "Found binary operation." );
333 4         67 $def->{subtype} = 'binary';
334 4         12 $def->{op} = $re->{comp_binaryop};
335 4 50       20 $def->{is_negative} = ( defined( $re->{comp_binary_is_neg} ) ? length( $re->{comp_binary_is_neg} ) > 0 ? 1 : 0 : 0 );
    100          
336 4         28 $def->{worda} = $re->{comp_worda};
337 4         13 $def->{wordb} = $re->{comp_wordb};
338 4         18 $def->{worda_def} = [];
339 4         14 $def->{wordb_def} = [];
340 4 50       21 if( length( $def->{worda} ) )
341             {
342 4         18 my $this = $self->parse( $def->{worda} );
343 4         22 push( @{$def->{elements}}, @{$this->{elements}} );
  4         11  
  4         13  
344 4         12 $def->{worda_def} = $this->{elements};
345             }
346 4 50       18 if( length( $def->{wordb} ) )
347             {
348 4         29 my $this = $self->parse( $def->{wordb} );
349 4         17 push( @{$def->{elements}}, @{$this->{elements}} );
  4         12  
  4         11  
350 4         16 $def->{wordb_def} = $this->{elements};
351             }
352             }
353             elsif( length( $re->{comp_word_in_listfunc} ) )
354             {
355 2         11 $self->message( 3, "Found function." );
356 2         36 $def->{subtype} = 'function';
357 2         6 $def->{word} = $re->{comp_word};
358 2         5 $def->{function} = $re->{comp_listfunc};
359 2         7 $def->{word_def} = [];
360 2         22 $def->{function_def} = [];
361 2 50       20 if( length( $def->{word} ) )
362             {
363 2         10 my $this1 = $self->parse( $def->{word} );
364 2         16 push( @{$def->{elements}}, @{$this1->{elements}} );
  2         8  
  2         5  
365 2         6 $def->{word_def} = $this1->{elements};
366             }
367 2         7 my $this2 = $self->parse( $def->{function} );
368 2         9 push( @{$def->{elements}}, @{$this2->{elements}} );
  2         7  
  2         6  
369 2         5 $def->{function_def} = $this2->{elements};
370             }
371             elsif( length( $re->{comp_in_regexp} ) || length( $re->{comp_in_regexp_legacy} ) )
372             {
373 6         22 $self->message( 3, "Found regular expression." );
374 6         110 $def->{subtype} = 'regexp';
375 6         19 $def->{word} = $re->{comp_word};
376 6         21 $def->{op} = $re->{comp_regexp_op};
377 6         47 $def->{regexp} = $re->{comp_regexp};
378 6         24 $def->{word_def} = [];
379 6         20 $def->{regexp_def} = [];
380 6         25 my $str = $def->{word} . '';
381             ## Break down the word being compared as well as the regular expression
382 6 50       18 if( length( $str ) )
383             {
384 6         214 my $this1 = $self->parse( $str );
385 6         13 $def->{elements} = [@{$this1->{elements}}];
  6         17  
386 6         21 $def->{word_def} = $this1->{elements};
387             }
388 6 50       20 if( length( $def->{regexp} ) )
389             {
390 6         16 my $this2 = $self->parse( $def->{regexp} );
391 6         59 push( @{$def->{elements}}, @{$this2->{elements}} );
  6         32  
  6         18  
392 6         27 $def->{regexp_def} = $this2->{elements};
393             }
394             }
395             elsif( length( $re->{comp_word_in_list} ) )
396             {
397 0         0 $self->message( 3, "Found comparison to list." );
398 0         0 $def->{subtype} = 'list';
399 0         0 $def->{word} = $re->{comp_word};
400 0         0 $def->{list} = $re->{com_list};
401 0         0 $def->{word_def} = [];
402 0         0 $def->{list_def} = [];
403 0 0       0 if( length( $def->{word} ) )
404             {
405 0         0 my $this1 = $self->parse( $def->{word} );
406 0         0 push( @{$def->{elements}}, @{$this1->{elements}} );
  0         0  
  0         0  
407 0         0 $def->{word_def} = $this1->{elements};
408             }
409 0 0       0 if( length( $def->{list} ) )
410             {
411 0         0 my @argv = $self->parse_args( $def->{list} );
412 0         0 foreach my $this ( @argv )
413             {
414 0         0 my $this = $self->parse( $this );
415 0         0 push( @{$def->{elements}}, @{$this->{elements}} );
  0         0  
  0         0  
416 0         0 push( @{$def->{list_def}}, @{$this->{elements}} );
  0         0  
  0         0  
417             }
418             }
419             }
420             else
421             {
422 0         0 $self->message( 3, "No match found in comparison." );
423             }
424 24 0 50     70 if( defined( $this ) && scalar( keys( %$this ) ) )
425             {
426 0         0 $def->{elements} = $this->{elements};
427             }
428 24 50       95 push( @$elems, $def ) if( length( $re->{comp} ) );
429 24 50 33     130 $cur_pos == length( $data ) && $self->message( 3, "End of string (", length( $data ), " bytes long) reached at pos ", $cur_pos ) && last PARSE;
430             # redo PARSE unless( !length( $re->{comp} ) && ++$looping > 1 );
431 0         0 redo PARSE;
432             }
433             ## Trunk function
434             elsif( $self->message( 3, "Trying with trunk join." ) &&
435             $prefix eq 'Trunk' &&
436             ( ( $pos == 0 && $data =~ /\A$RE{Apache2}{"${prefix}Join"}\Z/gms ) || $data =~ /\G$RE{Apache2}{"${prefix}Join"}/gmcs ) &&
437             length( $+{join} ) )
438             {
439 0         0 my $re = { %+ };
440 0         0 $self->message( 3, "Got here in join for string '$data'." );
441 0         0 $self->whereami( \$data, pos( $data ) );
442 0     0   0 $self->message( 3, "join => capture groups for $+{join} are: ", sub{ $self->dump( $re ) });
  0         0  
443             my $def =
444             {
445             elements => [],
446             type => 'join',
447             raw => $re->{join},
448             re => $re,
449             word => $re->{join_word},
450             list => $re->{join_list},
451 0         0 };
452             ## word is optional
453 0 0       0 if( length( $def->{word} ) )
454             {
455 0         0 my $this = $self->parse( $def->{word} );
456 0         0 push( @{$def->{elements}}, @{$this->{elements}} );
  0         0  
  0         0  
457 0         0 $def->{word_def} = $this->{elements};
458             }
459 0 0       0 if( length( $def->{list} ) )
460             {
461 0         0 my @argv = $self->parse_args( $def->{list} );
462 0         0 $def->{list_def} = [];
463 0         0 foreach my $that ( @argv )
464             {
465 0         0 my $this = $self->parse( $that );
466 0         0 push( @{$def->{elements}}, @{$this->{elements}} );
  0         0  
  0         0  
467 0         0 push( @{$def->{list_def}}, @{$this->{elements}} );
  0         0  
  0         0  
468             }
469             }
470 0         0 push( @$elems, $def );
471 0         0 redo PARSE;
472             }
473             elsif( $self->message( 3, "Trying with trunk split." ) &&
474             $prefix eq 'Trunk' &&
475             ( ( $pos == 0 && $data =~ /\A$RE{Apache2}{"${prefix}Split"}\Z/gms ) || $data =~ /\G$RE{Apache2}{"${prefix}Split"}/gmcs ) &&
476             length( $+{split} ) )
477             {
478 0         0 my $re = { %+ };
479 0         0 $self->message( 3, "Got here in split for string '$data'." );
480 0         0 $self->whereami( \$data, pos( $data ) );
481 0     0   0 $self->message( 3, "split => capture groups for $+{split} are: ", sub{ $self->dump( $re ) });
  0         0  
482             my $def =
483             {
484             elements => [],
485             type => 'split',
486             raw => $re->{split},
487             re => $re,
488             regex => $re->{split_regex},
489             word => $re->{split_word},
490             list => $re->{split_list},
491 0         0 };
492             ## It is either a word or a list as parameter
493 0 0       0 if( length( $def->{word} ) )
494             {
495 0         0 my $this = $self->parse( $def->{word} );
496 0         0 push( @{$def->{elements}}, @{$this->{elements}} );
  0         0  
  0         0  
497 0         0 $def->{word_def} = $this->{elements};
498             }
499 0 0       0 if( length( $def->{list} ) )
500             {
501 0         0 my @argv = $self->parse_args( $def->{list} );
502 0         0 $def->{list_def} = [];
503 0         0 foreach my $that ( @argv )
504             {
505 0         0 my $this = $self->parse( $that );
506 0         0 push( @{$def->{elements}}, @{$this->{elements}} );
  0         0  
  0         0  
507 0         0 push( @{$def->{list_def}}, @{$this->{elements}} );
  0         0  
  0         0  
508             }
509             }
510 0         0 push( @$elems, $def );
511 0         0 redo PARSE;
512             }
513             elsif( $self->message( 3, "Trying with legacy variable." ) &&
514             $prefix eq 'Trunk' &&
515             ( ( $pos == 0 && $data =~ /\A$RE{Apache2}{"${prefix}Sub"}\Z/gms ) || $data =~ /\G$RE{Apache2}{"${prefix}Sub"}/gmcs ) &&
516             length( $+{sub} ) )
517             {
518 0         0 my $re = { %+ };
519 0         0 $self->message( 3, "Got here in sub for string '$data'." );
520 0         0 $self->whereami( \$data, pos( $data ) );
521 0     0   0 $self->message( 3, "sub => capture groups for $+{split} are: ", sub{ $self->dump( $re ) });
  0         0  
522             my $def =
523             {
524             elements => [],
525             type => 'sub',
526             raw => $re->{sub},
527             re => $re,
528             regsub => $re->{sub_regsub},
529             word => $re->{sub_word},
530 0         0 };
531 0 0       0 if( length( $def->{word} ) )
532             {
533 0         0 my $this = $self->parse( $def->{word} );
534 0         0 push( @{$def->{elements}}, @{$this->{elements}} );
  0         0  
  0         0  
535 0         0 $def->{word_def} = $this->{elements};
536             }
537 0 0       0 if( length( $def->{regsub} ) )
538             {
539 0         0 my $this = $self->parse( $def->{regsub} );
540 0         0 push( @{$def->{elements}}, @{$this->{elements}} );
  0         0  
  0         0  
541 0         0 $def->{regsub_def} = $this->{elements};
542             }
543 0         0 push( @$elems, $def );
544 0         0 redo PARSE;
545             }
546             elsif( $self->message( 3, "Trying with function." ) &&
547             ( ( $pos == 0 && $data =~ /\A$RE{Apache2}{"${prefix}Function"}\Z/gms ) || $data =~ /\G$RE{Apache2}{"${prefix}Function"}/gmcs ) &&
548             length( $+{function} ) )
549             {
550 14         16271399 my $re = { %+ };
551 14         148 $self->message( 3, "Got here in function for string '$data'." );
552 14         377 $self->whereami( \$data, pos( $data ) );
553 14     0   464 $self->message( 3, "function => capture groups for $+{function} are: ", sub{ $self->dump( $re ) });
  0         0  
554             my $def =
555             {
556             elements => [],
557             type => 'function',
558             raw => $re->{function},
559             re => $re,
560             name => $re->{func_name},
561             args => $re->{func_args},
562 14         405 };
563 14 50       76 if( length( $def->{args} ) )
564             {
565 14         86 my @argv = $self->parse_args( $def->{args} );
566 14         990 $def->{args_def} = [];
567 14         49 foreach my $this ( @argv )
568             {
569 20         141 my $this = $self->parse( $this );
570 20         82 push( @{$def->{elements}}, @{$this->{elements}} );
  20         64  
  20         55  
571 20         42 push( @{$def->{args_def}}, @{$this->{elements}} );
  20         48  
  20         100  
572             }
573             }
574 14         47 push( @$elems, $def );
575 14         3353 redo PARSE;
576             }
577             elsif( $self->message( 3, "Trying with list function." ) &&
578             ( ( $pos == 0 && $data =~ /\A$RE{Apache2}{"${prefix}ListFunc"}\Z/gms ) || $data =~ /\G$RE{Apache2}{"${prefix}ListFunc"}/gmcs ) &&
579             length( $+{listfunc} ) )
580             {
581 0         0 my $re = { %+ };
582 0         0 $self->message( 3, "Got here in listfunc." );
583 0         0 $self->whereami( \$data, pos( $data ) );
584 0     0   0 $self->message( 3, "listfunc => capture groups for $+{listfunc} are: ", sub{ $self->dump( $re ) });
  0         0  
585             my $def =
586             {
587             elements => [],
588             type => 'listfunc',
589             raw => $re->{listfunc},
590             re => $re,
591             name => $re->{func_name},
592             args => $re->{func_args},
593 0         0 };
594 0 0       0 if( length( $def->{args} ) )
595             {
596 0         0 my @argv = $self->parse_args( $def->{args} );
597 0         0 $def->{args_def} = [];
598 0         0 foreach my $this ( @argv )
599             {
600 0         0 my $this = $self->parse( $this );
601 0         0 push( @{$def->{elements}}, @{$this->{elements}} );
  0         0  
  0         0  
602 0         0 push( @{$def->{args_def}}, @{$this->{elements}} );
  0         0  
  0         0  
603             }
604             }
605 0         0 push( @$elems, $def );
606 0         0 redo PARSE;
607             }
608             elsif( $self->message( 3, "Trying with regex." ) &&
609             ( ( $pos == 0 && $data =~ /\A$RE{Apache2}{"${prefix}Regexp"}\Z/gms ) || $data =~ /\G$RE{Apache2}{"${prefix}Regexp"}/gmcs ) &&
610             length( $+{regex} ) )
611             {
612 8         46096 my $re = { %+ };
613 8         63 $self->message( 3, "Got here in regex." );
614 8         182 $self->whereami( \$data, pos( $data ) );
615 8     0   228 $self->message( 3, "regex => capture groups for $+{regex} are: ", sub{ $self->dump( $re ) });
  0         0  
616             my $def =
617             {
618             elements => [],
619             type => 'regex',
620             raw => $re->{regex},
621             re => $re,
622             pattern => $re->{regpattern},
623             flags => $re->{regflags},
624             sep => $re->{regsep},
625 8         204 };
626 8         35 push( @$elems, $def );
627 8         657 redo PARSE;
628             }
629             ## Trunk only
630             elsif( $self->message( 3, "Trying with regex any." ) &&
631             $prefix eq 'Trunk' &&
632             ( ( $pos == 0 && $data =~ /\A$RE{Apache2}{"${prefix}Regany"}\Z/gms ) || $data =~ /\G$RE{Apache2}{"${prefix}Regany"}/gmcs ) &&
633             length( $+{regany} ) )
634             {
635 0         0 my $re = { %+ };
636 0         0 $self->message( 3, "Got here in regany." );
637 0         0 $self->whereami( \$data, pos( $data ) );
638 0     0   0 $self->message( 3, "regany => capture groups for $+{regany} are: ", sub{ $self->dump( $re ) });
  0         0  
639             my $def =
640             {
641             elements => [],
642             type => 'regany',
643             raw => $re->{regany},
644             re => $re,
645             regex => $re->{regany_regex},
646             regsub => $re->{regany_regsub},
647 0         0 };
648 0         0 push( @$elems, $def );
649 0         0 redo PARSE;
650             }
651             ## Trunk only
652             elsif( $self->message( 3, "Trying with regsub." ) &&
653             $prefix eq 'Trunk' &&
654             ( ( $pos == 0 && $data =~ /\A$RE{Apache2}{"${prefix}Regsub"}\Z/gms ) || $data =~ /\G$RE{Apache2}{"${prefix}Regsub"}/gmcs ) &&
655             length( $+{regsub} ) )
656             {
657 0         0 my $re = { %+ };
658 0         0 $self->message( 3, "Got here in regsub." );
659 0         0 $self->whereami( \$data, pos( $data ) );
660 0     0   0 $self->message( 3, "regsub => capture groups for $+{regsub} are: ", sub{ $self->dump( $re ) });
  0         0  
661             my $def =
662             {
663             elements => [],
664             type => 'regsub',
665             raw => $re->{regsub},
666             re => $re,
667             pattern => $re->{regpattern},
668             replacement => $re->{regstring},
669             flags => $re->{regflags},
670             sep => $re->{regsep},
671 0         0 };
672 0         0 push( @$elems, $def );
673 0         0 redo PARSE;
674             }
675             elsif( !$skip->{words} &&
676             $self->message( 3, "Trying with words." ) &&
677             ( ( $pos == 0 && $data =~ /\A$RE{Apache2}{"${prefix}Words"}\Z/gms ) || $data =~ /\G$RE{Apache2}{"${prefix}Words"}/gmcs ) &&
678             length( $+{words} ) )
679             {
680 44         41028818 my $re = { %+ };
681 44         288 $self->message( 3, "Got here in words." );
682 44         1040 $self->whereami( \$data, pos( $data ) );
683 44     0   1162 $self->message( 3, "words => capture groups for $+{words} are: ", sub{ $self->dump( $re ) });
  0         0  
684             my $def =
685             {
686             elements => [],
687             type => 'words',
688             raw => $re->{words},
689             re => $re,
690             word => $re->{words_word},
691 44         1028 };
692 44 100       182 if( length( $re->{words_list} ) )
693             {
694 1         16 $def->{list} = $re->{words_list};
695 1         9 $def->{sublist} = $re->{words_sublist};
696 1         9 my $this2 = $self->parse( $def->{list}, skip => [qw( words )] );
697 1         19 $def->{elements} = $this2->{elements};
698 1         5 $def->{list_def} = $this2->{elements};
699 1         4 $def->{words_def} = [];
700 1         8 $self->message( "Found list word '$def->{word}' with sublist '$def->{sublist}'" );
701 1         20 my $this = $self->parse( $def->{word}, skip => [qw( words )] );
702 1         7 push( @{$def->{words_def}}, @{$this->{elements}} );
  1         4  
  1         2  
703            
704 1         3 my $tmp = $def->{sublist};
705 1         5 while( $tmp =~ s/^$RE{Apache2}{"${prefix}Words"}$//gs )
706             {
707 2         2461 my $re2 = { %+ };
708 2 50       10 $re2->{words_word} = '' if( !exists( $re2->{words_word} ) );
709 2 100       6 $re2->{words_sublist} = '' if( !exists( $re2->{words_sublist} ) );
710 2         11 $self->message( "Found list word '$re2->{words_word}' with sublist '$re2->{words_sublist}" );
711 2         40 my $this = $self->parse( $re2->{words_word}, skip => [qw( words )] );
712 2         5 push( @{$def->{words_def}}, @{$this->{elements}} );
  2         3  
  2         5  
713 2 100       15 $tmp = $re2->{words_sublist} if( $re2->{words_sublist} );
714             }
715             }
716             else
717             {
718 43         586 my $this = $self->parse( $def->{word}, skip => [qw( words )] );
719 43         184 $def->{word_def} = $this->{elements};
720 43         96 push( @{$def->{elements}}, @{$this->{elements}} );
  43         164  
  43         124  
721             }
722 44         224 push( @$elems, $def );
723 44         4977 redo PARSE;
724             }
725             elsif( $self->message( 3, "Trying with word." ) &&
726             ( ( $pos == 0 && $data =~ /\A$RE{Apache2}{"${prefix}Word"}\Z/gms ) ||
727             ( $pos > 0 && $data =~ /\G$RE{Apache2}{"${prefix}Word"}/gmcs )
728             ) &&
729             length( $+{word} ) )
730             {
731 46         41739465 my $re = { %+ };
732 46         328 $self->message( 3, "Got here in word." );
733 46         1137 $self->whereami( \$data, pos( $data ) );
734 46     0   1389 $self->message( 3, "word => capture groups for '$+{word}' are: ", sub{ $self->dump( $re ) });
  0         0  
735             my $def =
736             {
737             elements => [],
738             type => 'word',
739             raw => $re->{word},
740 46         1027 re => $re,
741             };
742 46 100 33     369 if( length( $re->{word_digits} ) )
    100 0        
    50          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
743             {
744             ## We keep whatever quote was used
745 2         5 $def->{subtype} = 'digits';
746 2         15 $def->{value} = $re->{word_digits};
747             }
748             elsif( length( $re->{word_ip} ) )
749             {
750 5         13 $def->{subtype} = 'ip';
751 5 50       25 $def->{ip_version} = length( $re->{word_ip4} ) ? 4 : 6;
752 5 50       18 $def->{value} = length( $re->{word_ip4} ) ? $re->{word_ip4} : $re->{word_ip6};
753             }
754             elsif( length( $re->{word_quote} ) || length( $re->{word_parens_open} ) )
755             {
756 39         138 $def->{word} = $re->{word_enclosed};
757 39 50       125 if( length( $re->{word_quote} ) )
    0          
758             {
759 39         107 $def->{subtype} = 'quote';
760 39         143 $def->{quote} = $re->{word_quote};
761             }
762             elsif( length( $re->{word_parens_open} ) )
763             {
764 0         0 $def->{subtype} = 'parens';
765             ## If the enclosing elements are parenthesis
766 0         0 $def->{parens} = [$re->{word_parens_open}, $re->{word_parens_close}];
767 0         0 my $this = $self->parse( $def->{word} );
768 0         0 push( @{$def->{elements}}, @{$this->{elements}} );
  0         0  
  0         0  
769 0         0 $def->{word_def} = $this->{elements};
770             }
771             # XXX Should probably make a run on the enclosed word as it could be a variable
772             # For example: "Go back to %{REQUEST_URI}"
773             }
774             elsif( length( $re->{word_function} ) || length( $re->{word_variable} ) )
775             {
776 0   0     0 my $chunk = ( $re->{word_function} || $re->{word_variable} );
777 0 0       0 my $this = length( $chunk ) ? $self->parse( $chunk ) : {};
778 0 0       0 $def->{subtype} = length( $re->{word_function} ) ? 'function' : length( $re->{word_variable} ) ? 'variable' : undef();
    0          
779 0 0 0     0 if( defined( $this ) && scalar( keys( %$this ) ) )
780             {
781 0         0 $def->{elements} = $this->{elements};
782 0 0       0 if( $def->{subtype} eq 'function' )
    0          
783             {
784 0         0 $def->{function_def} = $this->{elements};
785             }
786             elsif( $def->{subtype} eq 'variable' )
787             {
788 0         0 $def->{variable_def} = $this->{elements};
789             }
790             }
791             }
792             elsif( length( $re->{word_join} ) )
793             {
794 0         0 $def->{subtype} = 'join';
795 0         0 my $this = $self->parse( $re->{word_variable} );
796 0         0 push( @{$def->{elements}}, @{$this->{elements}} );
  0         0  
  0         0  
797 0         0 $def->{join_def} = $this->{elements};
798             }
799             elsif( length( $re->{word_sub} ) )
800             {
801 0         0 $def->{subtype} = 'sub';
802 0         0 my $this = $self->parse( $re->{word_variable} );
803 0         0 push( @{$def->{elements}}, @{$this->{elements}} );
  0         0  
  0         0  
804 0         0 $def->{sub_def} = $this->{elements};
805             }
806             elsif( length( $re->{word_variable} ) )
807             {
808 0         0 $def->{subtype} = 'variable';
809 0         0 my $this = $self->parse( $re->{word_variable} );
810 0         0 push( @{$def->{elements}}, @{$this->{elements}} );
  0         0  
  0         0  
811 0         0 $def->{variable_def} = $this->{elements};
812             }
813             elsif( length( $re->{word_dot_word} ) )
814             {
815 0         0 $def->{subtype} = 'dotted';
816 0         0 $def->{word} = $re->{word_dot_word};
817             }
818             elsif( length( $re->{rebackref} ) )
819             {
820 0         0 $def->{subtype} = 'rebackref';
821 0         0 $def->{value} = $re->{rebackref};
822             }
823             elsif( length( $re->{regex} ) )
824             {
825 0         0 $def->{subtype} = 'regex';
826 0         0 $def->{sep} = $re->{regsep};
827 0         0 $def->{pattern} = $re->{regpattern};
828 0         0 $def->{flags} = $re->{regflags};
829             }
830 46         137 push( @$elems, $def );
831 46         3348 redo PARSE;
832             }
833             elsif( $self->message( 3, "Trying with string." ) &&
834             ( ( $pos == 0 && $data =~ /\A$RE{Apache2}{"${prefix}String"}\Z/gms ) || $data =~ /\G$RE{Apache2}{"${prefix}String"}/gmcs ) &&
835             length( $+{string} ) )
836             {
837 11         230732 my $re = { %+ };
838 11         68 $self->message( 3, "Got here in string." );
839 11         243 $self->whereami( \$data, pos( $data ) );
840 11     0   284 $self->message( 3, "string => capture groups for $+{string} are: ", sub{ $self->dump( $re ) });
  0         0  
841             my $def =
842             {
843             elements => [],
844             type => 'string',
845             raw => $re->{string},
846 11         217 re => $re,
847             };
848 11         30 push( @$elems, $def );
849 11         4269 redo PARSE;
850             }
851             else
852             {
853 0         0 $self->message( 3, "Do not know what to do with '$data'." );
854             }
855 0         0 $self->message( 3, "Nothing found." );
856 0 0 0     0 $self->message( 3, "Looping detected now exiting parsing." ) && last PARSE if( ++$looping > 1 );
857             ## We arrived here, which means we could not find anything suitable in our parser, instead of returning a result for part of the data parsed, we return the original string marking it as nomatch string.
858 0 0       0 if( $opts->{top} )
859             {
860 0         0 @$elems =
861             ({
862             type => 'string',
863             subtype => 'nomatch',
864             raw => $data,
865             pos => $pos,
866             });
867 0     0   0 $self->message( 3, "Failed to complete parsing, so returning original data as string: ", sub{ $self->dump( $elems ) } );
  0         0  
868 0         0 last PARSE;
869             }
870             };
871 247     0   9692 $self->message( 3, "Returning hash: ", sub{ $self->dump( $hash ) } );
  0         0  
872 247 50       5137 return( scalar( @$elems ) ? $hash : {} );
873             }
874              
875             sub parse_args
876             {
877 15     15 1 40 my $self = shift( @_ );
878             ## String
879 15         49 my $args = shift( @_ );
880 15         101 $self->message( 3, "Parsing arguments: '$args'." );
881 15   50     482 my $doc = PPI::Document->new( \$args, readonly => 1 ) ||
882             return( "Unable to parse: ", PPI::Document->errstr, "\n$args" );
883             ## Nothing found as argument
884 15 50       22373 return( () ) if( !scalar( @{$doc->{children}} ) );
  15         75  
885 15 50       125 return( $self->error( "Was expecting a statement, but got ", ($doc->elements)[0]->class ) ) if( ($doc->elements)[0]->class ne 'PPI::Statement' );
886 15     0   419 $self->message( 3, "Arguments contains the following PPI structure: ", sub{ $self->dump( $doc->{children} ) });
  0         0  
887 15         410 my $st = ($doc->elements)[0];
888 15         140 my @children = $st->elements;
889 15         134 my $op_skip =
890             {
891             ',' => 1,
892             };
893 15         41 my $expect = 0;
894             local $recur = sub
895             {
896 15     15   40 my @elems = @_;
897             ## We need space, so we do not remove them
898             ## For example, md5("some string") is not the same as md5, ("some string")
899 15         43 my $argv = [];
900 15         81 for( my $i = 0; $i < scalar( @elems ); $i++ )
901             {
902 33         354 my $e = $elems[$i];
903 33         50 my @expr;
904             ## Hopefully those below should cover all of our needs
905 33 100 33     123 if(
    100 66        
    50 33        
      66        
      66        
      100        
      100        
      66        
      66        
906             $e->class eq 'PPI::Token::ArrayIndex' ||
907             ## Including PPI::Token::Number::Float
908             $e->isa( 'PPI::Token::Number' ) ||
909             ## operators like ==, !=, =~
910             ( $e->class eq 'PPI::Token::Operator' && !exists( $op_skip->{ $e->content } ) ) ||
911             ## including, PPI::Token::Quote::Double, PPI::Token::Quote::Interpolate, PPI::Token::Quote::Literal and PPI::Token::Quote::Single
912             ## Example q{foo bar}
913             $e->isa( 'PPI::Token::Quote' ) ||
914             $e->isa( 'PPI::Token::QuoteLike' ) ||
915             ## Including PPI::Token::Regexp::Match, PPI::Token::Regexp::Substitute, PPI::Token::Regexp::Transliterate
916             $e->isa( 'PPI::Token::Regexp' ) ||
917             ## Including for example PPI::Token::Magic
918             $e->isa( 'PPI::Token::Symbol' ) ||
919             $e->class eq 'PPI::Token::Word'
920             )
921             {
922 21         694 push( @$argv, [$e] );
923             }
924             elsif( $e->class eq 'PPI::Token::Operator' && $e->content eq ',' )
925             {
926 6 50       305 $self->message( 3, "Found a comma separating argument '", ( ( $i - 1 ) >= 0 ? $elems[$i-1]->content : '' ), "' and '", ( ( $i + 1 ) <= scalar( @elems ) ? $elems[$i+1]->content : '' ), "'." );
    50          
927             }
928             ## Either this is arguments for the previous function found, or this is expressions embedded within parenthesis
929             # XXX Need to implement also PPI::Token::Structure, i.e. [], {}
930             elsif( $e->class eq 'PPI::Structure::List' )
931             {
932 0 0 0     0 if( ref( $elems[$i - 1] ) &&
    0 0        
      0        
      0        
      0        
933             $elems[$i - 1]->class eq 'PPI::Token::Word' &&
934             $argv->[-1]->[0]->class eq 'PPI::Token::Word' )
935             {
936 0         0 push( @{$argv->[-1]}, $e );
  0         0  
937             }
938             elsif( scalar($e->elements) &&
939             ref(($e->elements)[0]) &&
940             ( @expr = $self->_find_expression($e) ) &&
941             $expr[0]->class eq 'PPI::Statement::Expression' )
942             {
943 0         0 my @list = $self->_trim( $expr[0]->elements );
944 0         0 my @new = $recur->( @list );
945 0         0 push( @$argv, [$e->start] );
946 0         0 push( @$argv, @new );
947 0         0 push( @$argv, [$e->finish] );
948             }
949             }
950             ## else we are not interested
951             else
952             {
953 6         210 $self->message( 3, "Clueless about what I should do with element of class '", $e->class, "' and value '", $e->content, "'." );
954             }
955             }
956 15         60 return( @$argv );
957 15         188 };
958 15         49 my @objects = $recur->( @children );
959 15     0   156 $self->message( 3, "Objects found: ", sub{ $self->dump( @objects ) } );
  0         0  
960             ## Stringify result
961 15         344 my @result = map( join( '', map( $_->content, @$_ ) ), @objects );
962 15         533 return( @result );
963             }
964              
965 0     0 1 0 sub trunk { return( shift->_set_get_boolean( 'trunk', @_ ) ); }
966              
967             sub whereami
968             {
969 247     247 0 739 my $self = shift( @_ );
970 247         764 my( $ref, $pos ) = @_;
971             ## How far back should we look?
972 247         438 my $lookback = 10;
973 247 100       625 $lookback = $pos if( $pos < $lookback );
974 247         376 my $lookahead = 20;
975 247         428 my $start = $pos - $lookback;
976 247         744 my $first_line = substr( $$ref, $start, $lookback + $lookahead );
977 247         805 $lookback += () = substr( $$ref, $start, $lookback ) =~ /\n/gs;
978 247         466 $first_line =~ s/\n/\\n/gs;
979 247         859 my $sec_line = ( '.' x $lookback ) . '^' . ( '.' x $lookahead );
980 247         1100 $self->message( 3, "Cusrsor is now here at position '$pos':\n$first_line\n$sec_line" );
981             }
982              
983             ## PPI object manipulation
984             sub _find_expression
985             {
986 0     0     my $self = shift( @_ );
987 0           my $e = shift( @_ );
988 0           my @found = ();
989 0           foreach my $this ( $e->elements )
990             {
991 0           push( @found, $e );
992             }
993 0           return( @found );
994             }
995              
996             ## PPI object manipulation
997             sub _trim
998             {
999 0     0     my $self = shift( @_ );
1000 0           my @elems = @_;
1001 0           for( my $i = 0; $i < scalar( @elems ); $i++ )
1002             {
1003 0 0         if( $elems[$i]->class eq 'PPI::Token::Whitespace' )
1004             {
1005 0           splice( @elems, $i, 1 );
1006 0           $i--;
1007             }
1008             }
1009 0           return( @elems );
1010             }
1011              
1012             1;
1013              
1014             __END__
1015              
1016             =encoding utf-8
1017              
1018             =pod
1019              
1020             =head1 NAME
1021              
1022             Apache2::Expression - Apache2 Expressions
1023              
1024             =head1 SYNOPSIS
1025              
1026             use Apache2::Expression;
1027             my $exp = Apache2::Expression->new( legacy => 1 );
1028             my $hash = $exp->parse;
1029              
1030             =head1 VERSION
1031              
1032             v0.1.0
1033              
1034             =head1 DESCRIPTION
1035              
1036             L<Apache2::Expression> is used to parse Apache2 expression like the one found in SSI (Server Side Includes).
1037              
1038             =head1 METHODS
1039              
1040             =head2 parse
1041              
1042             This method takes a string representing an Apache2 expression as argument, and returns an hash containing the details of the elements that make the expression.
1043              
1044             It takes an optional hash of parameters, as follows :
1045              
1046             =over 4
1047              
1048             =item I<legacy>
1049              
1050             When this is provided with a positive value, this will enable Apache2 legacy regular expression. See L<Regexp::Common::Apache2> for more information on what this means.
1051              
1052             =item I<trunk>
1053              
1054             When this is provided with a positive value, this will enable Apache2 experimental and advanced expressions. See L<Regexp::Common::Apache2> for more information on what this means.
1055              
1056             =back
1057              
1058             For example :
1059              
1060             $HTTP_COOKIE = /lang\%22\%3A\%22([a-zA-Z]+\-[a-zA-Z]+)\%22\%7D;?/
1061              
1062             would return :
1063              
1064             {
1065             elements => [
1066             {
1067             elements => [
1068             {
1069             elements => [
1070             {
1071             elements => [],
1072             name => "HTTP_COOKIE",
1073             raw => "\$HTTP_COOKIE",
1074             re => { variable => "\$HTTP_COOKIE", varname => "HTTP_COOKIE" },
1075             subtype => "variable",
1076             type => "variable",
1077             },
1078             {
1079             elements => [],
1080             flags => undef,
1081             pattern => "lang\\%22\\%3A\\%22([a-zA-Z]+\\-[a-zA-Z]+)\\%22\\%7D;?",
1082             raw => "/lang\\%22\\%3A\\%22([a-zA-Z]+\\-[a-zA-Z]+)\\%22\\%7D;?/",
1083             re => {
1084             regex => "/lang\\%22\\%3A\\%22([a-zA-Z]+\\-[a-zA-Z]+)\\%22\\%7D;?/",
1085             regpattern => "lang\\%22\\%3A\\%22([a-zA-Z]+\\-[a-zA-Z]+)\\%22\\%7D;?",
1086             regsep => "/",
1087             },
1088             sep => "/",
1089             type => "regex",
1090             },
1091             ],
1092             op => "=",
1093             raw => "\$HTTP_COOKIE = /lang\\%22\\%3A\\%22([a-zA-Z]+\\-[a-zA-Z]+)\\%22\\%7D;?/",
1094             re => {
1095             comp => "\$HTTP_COOKIE = /lang\\%22\\%3A\\%22([a-zA-Z]+\\-[a-zA-Z]+)\\%22\\%7D;?/",
1096             comp_in_regexp_legacy => "\$HTTP_COOKIE = /lang\\%22\\%3A\\%22([a-zA-Z]+\\-[a-zA-Z]+)\\%22\\%7D;?/",
1097             comp_regexp => "/lang\\%22\\%3A\\%22([a-zA-Z]+\\-[a-zA-Z]+)\\%22\\%7D;?/",
1098             comp_regexp_op => "=",
1099             comp_word => "\$HTTP_COOKIE",
1100             },
1101             regexp => "/lang\\%22\\%3A\\%22([a-zA-Z]+\\-[a-zA-Z]+)\\%22\\%7D;?/",
1102             subtype => "regexp",
1103             type => "comp",
1104             word => "\$HTTP_COOKIE",
1105             },
1106             ],
1107             raw => "\$HTTP_COOKIE = /lang\\%22\\%3A\\%22([a-zA-Z]+\\-[a-zA-Z]+)\\%22\\%7D;?/",
1108             re => {
1109             cond => "\$HTTP_COOKIE = /lang\\%22\\%3A\\%22([a-zA-Z]+\\-[a-zA-Z]+)\\%22\\%7D;?/",
1110             cond_comp => "\$HTTP_COOKIE = /lang\\%22\\%3A\\%22([a-zA-Z]+\\-[a-zA-Z]+)\\%22\\%7D;?/",
1111             },
1112             subtype => "comp",
1113             type => "cond",
1114             },
1115             ],
1116             raw => "\$HTTP_COOKIE = /lang\\%22\\%3A\\%22([a-zA-Z]+\\-[a-zA-Z]+)\\%22\\%7D;?/",
1117             }
1118              
1119             The properties returned in the hash are:
1120              
1121             =over 4
1122              
1123             =item I<elements>
1124              
1125             An array reference of sub elements contained which provides granular definition.
1126              
1127             Whatever the I<elements> array reference contains is defined in one of the types below.
1128              
1129             =item I<name>
1130              
1131             The name of the element. For example if this is a function, this would be the function name, or if this is a variable, this would be the variable name without it leading dollar or percent sign nor its possible surrounding accolades.
1132              
1133             =item I<raw>
1134              
1135             The raw string, or chunk of string that was processed.
1136              
1137             =item I<re>
1138              
1139             This contains the hash of capture groups as provided by L<Regexp::Common::Apache2>. It is made available to enable finer and granular control.
1140              
1141             =item I<regexp>
1142              
1143             =item I<subtype>
1144              
1145             A sub type that provide more information about the type of expression processed.
1146              
1147             This can be any of the I<type> mentioned below plus the following ones : binary (for comparison), list (for word to list comparison), negative, parenthesis, rebackref, regexp, unary (for comparison)
1148              
1149             See below for possible combinations.
1150              
1151             =item I<type>
1152              
1153             The main type matching the Apache2 expression. This can be comp, cond, digits, function, integercomp, quote (for quoted words), regex, stringcomp, listfunc, variable, word
1154              
1155             See below for possible combinations.
1156              
1157             =item I<word>
1158              
1159             If this is a word, this contains the word. In th example above, C<$HTTP_COOKIE> would be the word used in the regular expression comparison.
1160              
1161             =back
1162              
1163             =head2 parse_args
1164              
1165             Given a string that represents typically a function arguments, this method will use L<PPI> to parse it and returns an array of parameters as string.
1166              
1167             Parsing a function argument is non-trivial as it can contain function call within function call.
1168              
1169             =head1 COMBINATIONS
1170              
1171             =over 4
1172              
1173             =item B<comp>
1174              
1175             Type: comp
1176              
1177             Possible sub types:
1178              
1179             =over 8
1180              
1181             =item I<binary>
1182              
1183             When a binary operator is used, such as :
1184              
1185             ==, =, !=, <, <=, >, >=, -ipmatch, -strmatch, -strcmatch, -fnmatch
1186              
1187             Example :
1188              
1189             192.168.2.10 -ipmatch 192.168.2/24
1190              
1191             C<192.168.2.10> would be captured in property I<worda>, C<ipmatch> (without leading dash) would be captured in property I<op> and C<192.168.2/24> would be captured in property I<wordb>.
1192              
1193             The array reference in property I<elements> will contain more information on I<worda> and I<wordb>
1194              
1195             Also the details of elements for I<worda> can be accessed with property I<worda_def> as an array reference and likewise for I<wordb> with I<wordb_def>.
1196              
1197             =item I<function>
1198              
1199             This contains the function name and arguments when the lefthand side word is compared to a list function.
1200              
1201             For example :
1202              
1203             192.168.1.10 in split( /\,/, $ip_list )
1204              
1205             In this example, C<192.168.1.10> would be captured in I<word> and C<split( /\,/, $ip_list )> would be captured in I<function> with the array reference I<elements> containing more information about the word and the function.
1206              
1207             Also the details of elements for I<word> can be accessed with property I<word_def> as an array reference and likewise for I<function> with I<function_def>.
1208              
1209             =item I<list>
1210              
1211             Is true when the comparison is of a word on the lefthand side to a list of words, such as :
1212              
1213             %{SOME_VALUE} in {"John", "Peter", "Paul"}
1214              
1215             In this example, C<%{SOME_VALUE}> would be captured in property I<word> and C<"John", "Peter", "Paul"> (without enclosing accolades or possible spaces after and before them) would be captured in property I<list>
1216              
1217             The array reference I<elements> will possibly contain more information on I<word> and each element in I<list>
1218              
1219             Also the details of elements for I<word> can be accessed with property I<word_def> as an array reference and likewise for I<list> with I<list_def>.
1220              
1221             =item I<regexp>
1222              
1223             When the lefthand side word is being compared to a regular expression.
1224              
1225             For example :
1226              
1227             %{HTTP_COOKIE} =~ /lang\%22\%3A\%22([a-zA-Z]+\-[a-zA-Z]+)\%22\%7D;?/
1228              
1229             In this example, C<%{HTTP_COOKIE}> would be captured in property I<word> and C</lang\%22\%3A\%22([a-zA-Z]+\-[a-zA-Z]+)\%22\%7D;?/> would be captured in property I<regexp> and C<=~> would be captured in property I<op>
1230              
1231             Check the array reference in property I<elements> for more details about the I<word> and the regular expression in I<regexp>.
1232              
1233             Also the details of elements for I<word> can be accessed with property I<word_def> as an array reference and likewise for I<regexp> with I<regexp_def>.
1234              
1235             =item I<unary>
1236              
1237             When the following operator is used against a word :
1238              
1239             -d, -e, -f, -s, -L, -h, -F, -U, -A, -n, -z, -T, -R
1240              
1241             For example:
1242              
1243             -A /some/uri.html # (same as -U)
1244             -d /some/folder # file is a directory
1245             -e /some/folder/file.txt # file exists
1246             -f /some/folder/file.txt # file is a regular file
1247             -F /some/folder/file.txt # file is a regular file and is accessible to all (Apache2 does a sub query to check)
1248             -h /some/folder/link.txt # true if file is a symbolic link
1249             -n %{QUERY_STRING} # true if string is not empty (opposite of -z)
1250             -s /some/folder/file.txt # true if file is not empty
1251             -L /some/folder/link.txt # true if file is a symbolic link (same as -h)
1252             -R 192.168.1.1/24 # remote ip match this ip block; same as %{REMOTE_ADDR} -ipmatch 192.168.1.1/24
1253             -T %{HTTPS} # false if string is empty, "0", "off", "false", or "no" (case insensitive). True otherwise.
1254             -U /some/uri.html # check if the uri is accessible to all (Apache2 does a sub query to check)
1255             -z %{QUERY_STRING} # true if string is empty (opposite of -n)
1256              
1257             In this example C<-e /some/folder/file.txt>, C<e> (without leading dash) would be captured in I<op> and C</some/folder/file.txt> would be captured in I<word>
1258              
1259             Check the array reference in property I<elements> for more information about the word in I<word>
1260              
1261             Also the details of elements for I<word> can be accessed with property I<word_def> as an array reference.
1262              
1263             See here for more information: L<Regexp::Common::Apache2::comp>
1264              
1265             =back
1266              
1267             Available properties:
1268              
1269             =over 8
1270              
1271             =item I<op>
1272              
1273             Contains the operator used. See L<Regexp::Common::Apache2::comp>, L<Regexp::Common::Apache2/stringcomp> and L<Regexp::Common::Apache2/integercomp>
1274              
1275             This may be for unary operators :
1276              
1277             -d, -e, -f, -s, -L, -h, -F, -U, -A, -n, -z, -T, -R
1278              
1279             For binary operators :
1280              
1281             ==, =, !=, <, <=, >, >=, -ipmatch, -strmatch, -strcmatch, -fnmatch
1282              
1283             For integer comparison :
1284              
1285             -eq, -ne, -lt, -le, -gt, -ge
1286              
1287             For string comparison :
1288              
1289             ==, !=, <, <=, >, >=
1290              
1291             In all the possible operators above, I<op> contains the value, but without the leading dash, if any.
1292              
1293             =item I<word>
1294              
1295             The word being compared.
1296              
1297             =item I<worda>
1298              
1299             The first word being compared, and on the left of the operator. For example :
1300              
1301             12 -ne 10
1302              
1303             =item I<wordb>
1304              
1305             The second word, being compared to, and on the right of the operator.
1306              
1307             =back
1308              
1309             See L<Regexp::Common::Apache2/comp> for more information.
1310              
1311             =item B<cond>
1312              
1313             Type: cond
1314              
1315             Possible sub types:
1316              
1317             =over 8
1318              
1319             =item I<and>
1320              
1321             When the condition is an ANDed expression such as :
1322              
1323             $ap_true && $ap_false
1324              
1325             In this case, C<$ap_true> would be captured in property I<expr1> and C<$ap_false> would be captured in property I<expr2>
1326              
1327             Also the details of elements for the variable can be accessed with property I<and_def> as an array reference and I<and_expr1_def> and I<and_expr2_def>
1328              
1329             =item I<comp>
1330              
1331             Contains the expression when the condition is actually a comparison.
1332              
1333             This will recurse and you can see more information in the array reference in the property I<elements>. For more information on what it will contain, check the B<comp> type.
1334              
1335             =item I<cond>
1336              
1337             Default sub type
1338              
1339             =item I<negative>
1340              
1341             When the condition is negative, ie prefixed by an exclamation mark.
1342              
1343             For example :
1344              
1345             !-z /some/folder/file.txt
1346              
1347             You need to check for the details in array reference contained in property I<elements>
1348              
1349             Also the details of elements for the variable can be accessed with property I<negative_def> as an array reference.
1350              
1351             =item I<or>
1352              
1353             When the condition is an ORed expression such as :
1354              
1355             $ap_true || $ap_false
1356              
1357             In this case, C<$ap_true> would be captured in property I<expr1> and C<$ap_false> would be captured in property I<expr2>
1358              
1359             Also the details of elements for the variable can be accessed with property I<and_def> as an array reference and I<and_expr1_def> and I<and_expr2_def>
1360              
1361             =item I<parenthesis>
1362              
1363             When the condition is embedded within parenthesis
1364              
1365             You need to check the array reference in property I<elements> for information about the embedded condition.
1366              
1367             Also the details of elements for the variable can be accessed with property I<parenthesis_def> as an array reference.
1368              
1369             =item I<variable>
1370              
1371             Contains the expression when the condition is based on a variable, such as :
1372              
1373             %{REQUEST_URI}
1374              
1375             Check the array reference in property I<elements> for more details about the variable, especially the property I<name> which would contain the name of the variable; in this case : C<REQUEST_URI>
1376              
1377             Also the details of elements for the variable can be accessed with property I<variable_def> as an array reference.
1378              
1379             =back
1380              
1381             Available properties:
1382              
1383             =over 8
1384              
1385             =item I<args>
1386              
1387             Function arguments. See the content of the I<elements> array reference for more breakdown on the arguments provided.
1388              
1389             =item I<is_negative>
1390              
1391             If the condition is negative, this value is true
1392              
1393             =item I<name>
1394              
1395             Function name
1396              
1397             =back
1398              
1399             See L<Regexp::Common::Apache2/cond> for more information.
1400              
1401             =item B<function>
1402              
1403             Type: function
1404              
1405             Possible sub types: none
1406              
1407             Available properties:
1408              
1409             =over 8
1410              
1411             =item I<args>
1412              
1413             Function arguments. See the content of the I<elements> array reference for more breakdown on the arguments provided.
1414              
1415             Also the details of elements for those args can be accessed with property I<args_def> as an array reference.
1416              
1417             =item I<name>
1418              
1419             Function name
1420              
1421             =back
1422              
1423             See L<Regexp::Common::Apache2/function> for more information.
1424              
1425             =item B<integercomp>
1426              
1427             Type: integercomp
1428              
1429             Possible sub types: none
1430              
1431             Available properties:
1432              
1433             =over 8
1434              
1435             =item I<op>
1436              
1437             Contains the operator used. See L<Regexp::Common::Apache2/integercomp>
1438              
1439             =item I<worda>
1440              
1441             The first word being compared, and on the left of the operator. For example :
1442              
1443             12 -ne 10
1444              
1445             Also the details of elements for I<worda> can be accessed with property I<worda_def> as an array reference.
1446              
1447             =item I<wordb>
1448              
1449             The second word, being compared to, and on the right of the operator.
1450              
1451             Also the details of elements for I<wordb> can be accessed with property I<wordb_def> as an array reference.
1452              
1453             =back
1454              
1455             See L<Regexp::Common::Apache2/integercomp> for more information.
1456              
1457             =item B<join>
1458              
1459             Type: join
1460              
1461             Possible sub types: none
1462              
1463             Available properties:
1464              
1465             =over 8
1466              
1467             =item I<list>
1468              
1469             The list of strings to be joined. See the content of the I<elements> array reference for more breakdown on the arguments provided.
1470              
1471             Also the details of elements for those args can be accessed with property I<list_def> as an array reference.
1472              
1473             =item I<word>
1474              
1475             The word used to join the list. This parameter is optional.
1476              
1477             Details for the word parameter, if any, can be found in the I<elements> array reference or can be accessed with the I<word_def> property.
1478              
1479             =back
1480              
1481             For example :
1482              
1483             join({"John Paul Doe"}, ', ')
1484             # or
1485             join({"John", "Paul", "Doe"}, ', ')
1486             # or just
1487             join({"John", "Paul", "Doe"})
1488              
1489             See L<Regexp::Common::Apache2/join> for more information.
1490              
1491             =item B<listfunc>
1492              
1493             Type: listfunc
1494              
1495             Possible sub types: none
1496              
1497             Available properties:
1498              
1499             =over 8
1500              
1501             =item I<args>
1502              
1503             Function arguments. See the content of the I<elements> array reference for more breakdown on the arguments provided.
1504              
1505             Also the details of elements for those args can be accessed with property I<args_def> as an array reference.
1506              
1507             =item I<name>
1508              
1509             Function name
1510              
1511             =back
1512              
1513             See L<Regexp::Common::Apache2/listfunc> for more information.
1514              
1515             =item B<regex>
1516              
1517             Type: regex
1518              
1519             Possible sub types: none
1520              
1521             Available properties:
1522              
1523             =over 8
1524              
1525             =item I<flags>
1526              
1527             Example: C<mgis>
1528              
1529             =item I<pattern>
1530              
1531             Regular expression pattern, excluding enclosing separators.
1532              
1533             =item I<sep>
1534              
1535             Type of separators used. It can be: /, #, $, %, ^, |, ?, !, ', ", ",", ";", ":", ".", _, and -
1536              
1537             =back
1538              
1539             See L<Regexp::Common::Apache2/regex> for more information.
1540              
1541             =item B<stringcomp>
1542              
1543             Type: stringcomp
1544              
1545             Possible sub types: none
1546              
1547             Available properties:
1548              
1549             =over 8
1550              
1551             =item I<op>
1552              
1553             COntains the operator used. See L<Regexp::Common::Apache2/stringcomp>
1554              
1555             =item I<worda>
1556              
1557             The first word being compared, and on the left of the operator. For example :
1558              
1559             12 -ne 10
1560              
1561             Also the details of elements for I<worda> can be accessed with property I<worda_def> as an array reference.
1562              
1563             =item I<wordb>
1564              
1565             The second word, being compared to, and on the right of the operator.
1566              
1567             Also the details of elements for I<wordb> can be accessed with property I<wordb_def> as an array reference.
1568              
1569             =back
1570              
1571             See L<Regexp::Common::Apache2/stringcomp> for more information.
1572              
1573             =item B<variable>
1574              
1575             Type: variable
1576              
1577             Possible sub types:
1578              
1579             =over 8
1580              
1581             =item I<function>
1582              
1583             %{md5:"some arguments"}
1584              
1585             =item I<rebackref>
1586              
1587             This is a regular expression back reference, such as C<$1>, C<$2>, etc. up to 9
1588              
1589             =item I<variable>
1590              
1591             %{REQUEST_URI}
1592             # or by enabling the legacy expressions
1593             ${REQUEST_URI}
1594              
1595             =back
1596              
1597             Available properties:
1598              
1599             =over 8
1600              
1601             =item I<args>
1602              
1603             Function arguments. See the content of the I<elements> array reference for more breakdown on the arguments provided.
1604              
1605             =item I<name>
1606              
1607             Function name, or variable name.
1608              
1609             =item I<value>
1610              
1611             The regular expression back reference value, such as C<1>, C<2>, etc
1612              
1613             =back
1614              
1615             See L<Regexp::Common::Apache2/variable> for more information.
1616              
1617             =item B<word>
1618              
1619             Type: word
1620              
1621             Possible sub types:
1622              
1623             =over 8
1624              
1625             =item I<digits>
1626              
1627             When the word contains one or more digits.
1628              
1629             =item I<dotted>
1630              
1631             When the word contains words sepsrated by dots, such as C<192.168.1.10>
1632              
1633             =item I<function>
1634              
1635             When the word is a function.
1636              
1637             =item I<parens>
1638              
1639             When the word is surrounded by parenthesis
1640              
1641             =item I<quote>
1642              
1643             When the word is surrounded by single or double quotes
1644              
1645             =item I<rebackref>
1646              
1647             When the word is a regular expression back reference such as C<$1>, C<$2>, etc up to 9.
1648              
1649             =item I<regex>
1650              
1651             This is an extension I added to make work some function such as C<split( /\w+/, $ip_list)>
1652              
1653             Without it, the regular expression would not be recognised as the Apache BNF stands.
1654              
1655             =item I<variable>
1656              
1657             When the word is a variable. For example : C<%{REQUEST_URI}>, and it can also be a variable like C<${REQUEST_URI> if the legacy mode is enabled.
1658              
1659             =back
1660              
1661             Available properties:
1662              
1663             =over 8
1664              
1665             =item I<flags>
1666              
1667             The regular expression flags used, such as C<mgis>
1668              
1669             =item I<parens>
1670              
1671             Contains an array reference of the open and close parenthesis, such as:
1672              
1673             ["(", ")"]
1674              
1675             =item I<pattern>
1676              
1677             The regular expression pattern
1678              
1679             =item I<quote>
1680              
1681             Contains the type of quote used if the sub type is I<quote>
1682              
1683             =item I<regex>
1684              
1685             Contains the regular expression
1686              
1687             =item I<sep>
1688              
1689             The separator used in the regular expression, such as C</>
1690              
1691             =item I<value>
1692              
1693             The value of the digits if the sub type is I<digits> or I<rebackref>
1694              
1695             =item I<word>
1696              
1697             The word enclosed in quotes
1698              
1699             =back
1700              
1701             See L<Regexp::Common::Apache2/variable> for more information.
1702              
1703             =back
1704              
1705             =head1 CAVEAT
1706              
1707             This module supports well Apache2 expressions. However, some expression are difficult to process. For example:
1708              
1709             Expressions with functions not using enclosing parenthesis:
1710              
1711             %{REMOTE_ADDR} -in split s/.*?IP Address:([^,]+)/$1/, PeerExtList('subjectAltName')
1712              
1713             Instead, use:
1714              
1715             %{REMOTE_ADDR} -in split(s/.*?IP Address:([^,]+)/$1/, PeerExtList('subjectAltName'))
1716              
1717             There is no mechanism yet to prevent infinite recursion. This needs to be implemented.
1718              
1719             =head1 CHANGES & CONTRIBUTIONS
1720              
1721             Feel free to reach out to the author for possible corrections, improvements, or suggestions.
1722              
1723             =head1 AUTHOR
1724              
1725             Jacques Deguest E<lt>F<jack@deguest.jp>E<gt>
1726              
1727             =head1 SEE ALSO
1728              
1729             L<Apache2::SSI>, L<Regexp::Common::Apache2>,
1730             L<https://httpd.apache.org/docs/current/expr.html>
1731              
1732             =head1 COPYRIGHT & LICENSE
1733              
1734             Copyright (c) 2020 DEGUEST Pte. Ltd.
1735              
1736             You can use, copy, modify and redistribute this package and associated
1737             files under the same terms as Perl itself.
1738              
1739             =cut