File Coverage

blib/lib/App/HTTP_Proxy_IMP/IMP.pm
Criterion Covered Total %
statement 49 446 10.9
branch 6 316 1.9
condition 0 127 0.0
subroutine 11 33 33.3
pod 0 9 0.0
total 66 931 7.0


line stmt bran cond sub pod time code
1 1     1   9 use strict;
  1         3  
  1         60  
2 1     1   10 use warnings;
  1         4  
  1         74  
3              
4             package App::HTTP_Proxy_IMP::IMP;
5              
6 1     1   1349 use Net::Inspect::Debug qw(:DEFAULT $DEBUG);
  1         1899  
  1         8  
7 1     1   1661 use Net::IMP::Debug var => \$DEBUG, sub => \&debug;
  1         1417  
  1         5  
8 1     1   961 use Net::IMP qw(:DEFAULT :log);
  1         2527  
  1         222  
9 1     1   888 use Net::IMP::HTTP;
  1         11012  
  1         128  
10 1     1   66 use Scalar::Util 'weaken';
  1         3  
  1         125  
11 1     1   3185 use Hash::Util 'lock_ref_keys';
  1         6244  
  1         13  
12 1     1   2237 use Compress::Raw::Zlib;
  1         12735  
  1         323  
13 1     1   8 use Carp;
  1         3  
  1         6588  
14              
15             my %METHODS_RFC2616 = map { ($_,1) } qw( GET HEAD POST PUT DELETE OPTIONS CONNECT TRACE );
16             my %METHODS_WITHOUT_RQBODY = map { ($_,1) } qw( GET HEAD DELETE CONNECT );
17             my %METHODS_WITH_RQBODY = map { ($_,1) } qw( POST PUT );
18             my %CODE_WITHOUT_RPBODY = map { ($_,1) } qw(204 205 304);
19             my %METHODS_WITHOUT_RPBODY = map { ($_,1) } qw(HEAD);
20              
21             # we want plugins to suppport the HTTP Request innterface
22             my $interface = [
23             IMP_DATA_HTTPRQ,
24             [
25             IMP_PASS,
26             IMP_PREPASS,
27             IMP_REPLACE,
28             IMP_TOSENDER,
29             IMP_DENY,
30             IMP_LOG,
31             IMP_ACCTFIELD,
32             IMP_PAUSE,
33             IMP_CONTINUE,
34             IMP_FATAL,
35             ]
36             ];
37              
38             sub can_modify {
39 0     0 0 0 return shift->{can_modify};
40             }
41              
42             # create a new factory object
43             sub new_factory {
44 2     2 0 9 my ($class,%args) = @_;
45 2         3 my @factory;
46 2 50       4 for my $module (@{ delete $args{mod} || [] }) {
  2         10  
47 2 50       7 if ( ref($module)) {
48             # assume it is already an IMP factory object
49 2         4 push @factory, $module;
50 2         4 next;
51             }
52              
53             # --filter mod=args
54 0 0       0 my ($mod,$args) = $module =~m{^([a-z][\w:]*)(?:=(.*))?$}i
55             or die "invalid module $module";
56 0 0       0 eval "require $mod" or die "cannot load $mod args=$args: $@";
57 0   0     0 my %args = $mod->str2cfg($args//'');
58 0 0       0 my $factory = $mod->new_factory(%args)
59             or croak("cannot create Net::IMP factory for $mod");
60 0 0 0     0 $factory =
61             $factory->get_interface( $interface ) &&
62             $factory->set_interface( $interface )
63             or croak("$mod does not implement the interface supported by us");
64 0         0 push @factory,$factory;
65             }
66              
67 2 50       6 @factory or return;
68 2 50       7 if (@factory>1) {
69             # for cascading filters we need Net::IMP::Cascade
70 0         0 require Net::IMP::Cascade;
71 0 0       0 my $cascade = Net::IMP::Cascade->new_factory( parts => [ @factory ])
72             or croak("cannot create Net::IMP::Cascade factory");
73 0 0       0 $cascade = $cascade->set_interface( $interface ) or
74             croak("cascade does not implement the interface supported by us");
75 0         0 @factory = $cascade;
76             }
77 2         5 my $factory = $factory[0];
78              
79 2         11 my $self = bless {
80             %args,
81             imp => $factory, # IMP factory object
82             can_modify => 0, # does interface support IMP_REPLACE, IMP_TOSENDER
83             }, $class;
84 2         8 lock_ref_keys($self);
85              
86             # update can_modify
87 2         27 CHKIF: for my $if ( $factory->get_interface ) {
88 2         24 my ($dt,$rt) = @$if;
89 2         6 for (@$rt) {
90 4 100       19 $_ ~~ [ IMP_REPLACE, IMP_TOSENDER ] or next;
91 2         6 $self->{can_modify} =1;
92 2         6 last CHKIF;
93             }
94             }
95            
96 2         12 return $self;
97             }
98              
99             # create a new analyzer based on the factory
100             sub new_analyzer {
101 0     0 0   my ($factory,$request,$meta) = @_;
102              
103 0           my %meta = %$meta;
104             # IMP uses different *addr than Net::Inspect, translate
105             # [s]ource -> [c]lient, [d]estination -> [s]erver
106 0           $meta{caddr} = delete $meta{saddr};
107 0           $meta{cport} = delete $meta{sport};
108 0           $meta{saddr} = delete $meta{daddr};
109 0           $meta{sport} = delete $meta{dport};
110              
111 0           my $analyzer = $factory->{imp}->new_analyzer( meta => \%meta );
112              
113             my $self = bless {
114             request => $request, # App::HTTP_Proxy_IMP::Request object
115             imp => $analyzer,
116             # incoming data, put into analyzer
117             # \@list of [ buf_base,buf,type,callback,$cb_arg ] per dir
118             ibuf => [
119             [ [0,''] ],
120             [ [0,''] ],
121             ],
122             pass => [0,0], # pass allowed up to given offset (per dir)
123             prepass => [0,0], # prepass allowed up to given offset (per dir)
124             fixup_header => [], # sub to fixup content-length in header once known
125             eof => [0,0], # got eof in dir ?
126             decode => undef, # decoder for content-encoding decode{type}[dir]
127             pass_encoded => undef, # pass body encoded (analyzer will not change body)
128             method => undef, # request method
129             logsub => $factory->{logsub}, # how to log IMP_OG
130 0           }, ref($factory);
131 0           lock_ref_keys($self);
132 0           weaken($self->{request});
133              
134             # set callback, this might trigger callback immediately if there are
135             # results pending
136 0           weaken( my $wself = $self );
137 0     0     $analyzer->set_callback( sub { _imp_callback($wself,@_) } );
  0            
138 0           return $self;
139             }
140              
141              
142             sub request_header {
143 0     0 0   my ($self,$hdr,$xhdr,@callback) = @_;
144 0           my $clen = $xhdr->{content_length};
145              
146             # new body might change content-length info in request header
147             # need to defer sending header until body length is known
148 0 0         if ( ! $METHODS_WITHOUT_RQBODY{$xhdr->{method}} ) {
149 0 0 0       if ( ! defined $clen and $xhdr->{method} ne 'CONNECT') {
150             # length not known -> chunking
151 0           die "FIXME: chunking request body not yet supported";
152             }
153              
154 0           my $hlen = length($hdr);
155             $self->{fixup_header}[0] = sub {
156 0     0     my ($self,$hdr,%args) = @_;
157 0           my $size = $args{content};
158 0 0         goto fix_clen if defined $size;
159              
160 0 0         if ( my $pass = $self->{pass}[0] ) {
161 0 0 0       if ( $pass == IMP_MAXOFFSET or $pass >= $hlen + $clen ) {
162             # will not change body
163 0           goto fix_clen;
164             }
165             }
166 0 0         if ( my $prepass = $self->{prepass}[0] ) {
167 0 0 0       if ( $prepass == IMP_MAXOFFSET or $prepass >= $hlen + $clen ) {
168             # will not change body
169 0           goto fix_clen;
170             }
171             }
172 0 0         if ($self->{ibuf}[0][0][0] >= $hlen + $clen) { # ibuf[client].base
173             # everything passed thru ibuf
174 0           goto fix_clen;
175             }
176              
177             # need to defer header until all of the body is passed
178             # or replaced, then we know the size
179 0           return;
180              
181             fix_clen:
182              
183 0 0         if (!defined $size) {
184             # bytes in ibuf and outstanding bytes will not be changed, so:
185             # new_content_length =
186             # ( orig_clen + orig_hlen - received ) # not yet received
187             # + ( received - ibuf.base ) # still in ibuf
188             # + defered_body.length # ready to forward
189             # --->
190             # orig_clen + orig_hlen - ibuf.base + defered_body.length
191             $size = $clen + $hlen # orig_clen + orig_hlen
192             - $self->{ibuf}[0][0][0] # ibuf.base
193 0           + $args{defered}; # defered_body.length
194             }
195              
196 0 0         $DEBUG && $self->{request}->xdebug("fixup header with clen=%d",$size);
197             # replace or add content-length header
198 0 0         $$hdr =~s{^(Content-length:[ \t]*)(\d+)}{$1$size}mi
199             || $$hdr =~s{(\n)}{$1Content-length: $size\r\n};
200 0           return 1;
201 0           };
202             }
203              
204              
205             # send data to analyzer.
206             # will call back into request on processed data
207 0           _imp_data($self,0,$hdr,0,IMP_DATA_HTTPRQ_HEADER,
208             \&_request_header_imp,[ $xhdr,@callback ]);
209             }
210              
211              
212             ############################################################################
213             # callback from IMP after passing/replacing the HTTP request header
214             # will reparse the header if changed and continue in @callback from request
215             ############################################################################
216             sub _request_header_imp {
217 0     0     my ($self,$hdr,$changed,$args) = @_;
218 0           my ($xhdr,$callback,@cb_args) = @$args;
219              
220 0 0         if ( $changed ) {
221             # we need to parse the header again and update xhdr
222 0           my ($met,$url,$version,$fields) = $hdr =~m{ \A
223             (\S+)[\040\t]+
224             (\S+)[\040\t]+
225             HTTP/(1\.[01])[\040\t]*
226             \r?\n
227             (.*?\n)
228             \r?\n\Z
229             }isx;
230              
231             # internal URL are not accepted by the client itself, only from
232             # plugin. Set xhdr.internal_url if we see, that IMP plugin rewrote
233             # url to internal one and strip internal:// again so that original
234             # URL could be logged
235             my $internal = $met ne 'CONNECT'
236 0   0       && $xhdr->{url} !~m{^internal://}i
237             && $url =~s{^internal://}{}i;
238              
239 0           my %kv;
240 0           my $bad = _parse_hdrfields($fields,\%kv);
241 0 0         $xhdr = {
    0          
242             method => uc($met),
243             version => $version,
244             url => $url,
245             fields => \%kv,
246             $bad ? ( junk => $bad ) :(),
247             $internal ? ( internal_url => 1 ):(),
248             };
249             }
250              
251             # we don't know the content length yet, unless it can be determined by the
252             # request method. If we got a (pre)pass until the end of the request body
253             # fixup_header will know it and adjust the header
254             $xhdr->{content_length} =
255 0 0         $METHODS_WITHOUT_RQBODY{$xhdr->{method}} ? 0:undef;
256              
257 0           $self->{method} = $xhdr->{method};
258 0           return $callback->(@cb_args,$hdr,$xhdr);
259             }
260              
261             ############################################################################
262             # fix request header by setting correct content-length
263             # returns true if header could be fixed
264             ############################################################################
265             sub fixup_request_header {
266 0     0 0   my ($self,$hdr_ref,%args) = @_;
267 0 0         my $sub = $self->{fixup_header}[0] or return 1;
268 0           my $ok = $sub->($self,$hdr_ref,%args);
269 0 0         $self->{fixup_header}[0] = undef if $ok;
270 0           return $ok;
271             }
272              
273              
274             ############################################################################
275             # process request body data
276             # just feed to analyzer and call back into request once done
277             ############################################################################
278             sub request_body {
279 0     0 0   my ($self,$data,@callback) = @_;
280              
281             # feed data into IMP
282 0 0         $self->{eof}[0] = 1 if $data eq '';
283 0           _imp_data($self,0,$data,0,IMP_DATA_HTTPRQ_CONTENT,
284             \&_request_body_imp,\@callback );
285             }
286              
287             sub _request_body_imp {
288 0     0     my ($self,$data,$changed,$args) = @_;
289 0           my ($callback,@cb_args) = @$args;
290 0           my $eof = _check_eof($self,0);
291 0 0 0       $callback->(@cb_args,$data,$eof) if $data ne '' || $eof;
292             }
293              
294             ############################################################################
295             # process response header
296             ############################################################################
297             sub response_header {
298 0     0 0   my ($self,$hdr,$xhdr,@callback) = @_;
299              
300             # if content is encoded we need to decode it in order to analyze
301             # it. For now only set decode to the encoding method, this will
302             # be changed to a decoding function once we need it in the body
303 0 0         if ( my $ce = $xhdr->{fields}{'content-encoding'} ) {
304             # the right way would be to extract all encodings and then complain, if
305             # there is an encoding we don't support. Instead we just look for the
306             # encodings we support
307 0           my %ce = map { lc($_) => 1 } map { m{\b(?:x-)?(gzip|deflate)\b}ig } @$ce;
  0            
  0            
308 0 0         $self->{decode}{IMP_DATA_HTTPRQ_CONTENT+0}[1] = join(", ",keys %ce)
309             if %ce;
310             }
311              
312             # header length is needed in callback
313 0           $xhdr->{header_length} = length($hdr);
314 0           _imp_data($self,1,$hdr,0,IMP_DATA_HTTPRQ_HEADER,
315             \&_response_header_imp,[$xhdr,@callback] );
316             }
317              
318              
319             ############################################################################
320             # callback after passing/replacing the HTTP response header
321             # will reparse the header if changed and continue in the request via
322             # callback
323             ############################################################################
324             sub _response_header_imp {
325 0     0     my ($self,$hdr,$changed,$args) = @_;
326 0           my ($xhdr,$callback,@cb_args) = @$args;
327              
328 0           my $orig_clen = $xhdr->{content_length};
329 0           my $orig_hlen = $xhdr->{header_length};
330              
331 0 0         if ( $changed ) {
332             # we need to parse the header again and update xhdr
333 0           my ($version,$code,$reason,$fields) = $hdr =~m{ \A
334             HTTP/(1\.[01])[\040\t]+
335             (\d\d\d)
336             (?:[\040\t]+([^\r\n]*))?
337             \r?\n
338             (.*?\n)
339             \r?\n\Z
340             }isx;
341              
342 0           my %kv;
343 0           my $bad = _parse_hdrfields($fields,\%kv);
344 0 0         $xhdr = {
345             code => $code,
346             version => $version,
347             reason => $reason,
348             fields => \%kv,
349             $bad ? ( junk => $bad ) :(),
350             };
351             }
352              
353             # except for some codes or request methods we don't know the
354             # content-length of the body yet
355             # in these cases we try in this order
356             # - check if we got a (pre)pass for the whole body already
357             # - use chunked encoding if client speaks HTTP/1.1
358             # - don't specify content-length and close request with connection close
359              
360             # we don't change $hdr here because it will be rebuild from fields anyway
361 0 0 0       if ( $CODE_WITHOUT_RPBODY{$xhdr->{code}} or $xhdr->{code} < 200 ) {
362 0           $xhdr->{content_length} = 0;
363             # better remove them
364 0           delete @{ $xhdr->{fields} }{ qw/ content-length transfer-encoding / };
  0            
365 0           goto callback;
366             }
367              
368 0 0         if ( $METHODS_WITHOUT_RPBODY{ $self->{method} } ) {
369 0           $xhdr->{content_length} = 0;
370             # keep content-length etc, client might want to peek into it using HEAD
371 0           goto callback;
372             }
373            
374             # reset infos about content-length
375 0           $xhdr->{content_length} = $xhdr->{chunked} = undef;
376 0           delete @{ $xhdr->{fields} }{ qw/ content-length transfer-encoding / };
  0            
377              
378             # if we have read the whole body already or at least know, that we will
379             # not change anymore data, we could compute the new content-length
380 0           my $clen;
381             my $nochange;
382 0           while ( defined $orig_clen ) {
383 0           my $rpsize = $orig_hlen + $orig_clen;
384              
385 0 0         if ( my $pass = $self->{pass}[1] ) {
386 0 0 0       if ( $pass == IMP_MAXOFFSET or $pass >= $rpsize ) {
387             # will not look at and not change body
388 0           $nochange = 1;
389 0           goto compute_clen;
390             }
391             }
392 0 0         if ( my $prepass = $self->{prepass}[1] ) {
393 0 0 0       if ( $prepass == IMP_MAXOFFSET or $prepass >= $rpsize ) {
394             # will not change body
395 0           $nochange = 1;
396 0           goto compute_clen;
397             }
398             }
399 0 0         if ($self->{ibuf}[1][0][0] >= $rpsize) { # ibuf[server].base
400             # everything passed thru ibuf
401 0           goto compute_clen;
402             }
403              
404             # we still don't know final size
405 0           last;
406              
407             compute_clen:
408             # bytes in ibuf and outstanding bytes will not be changed, so:
409             # new_content_length =
410             # ( total_size - received ) # not yet received
411             # + ( received - ibuf.base ) # still in ibuf
412             # --->
413             # total_size - ibuf.base
414 0           $clen = $rpsize - $self->{ibuf}[1][0][0];
415              
416 0           last;
417             }
418              
419 0 0         if ( $self->{decode}{IMP_DATA_HTTPRQ_CONTENT+0}[1] ) {
420 0 0         if ( $nochange ) {
421             # we will pass encoded stuff, either no decoding needs to
422             # be done (pass) or we will decode only for the analyzer (prepass)
423             # which will only watch at the content, but not change it
424 0           $self->{pass_encoded}[1] = 1;
425              
426 0           my $pass = $self->{pass}[1];
427 0 0 0       if ( $pass and defined $orig_clen and (
      0        
      0        
428             $pass == IMP_MAXOFFSET or
429             $pass >= $orig_clen + $orig_hlen )) {
430             # no need to decode body
431 0           $self->{decode}{IMP_DATA_HTTPRQ_CONTENT+0}[1] = undef;
432             }
433             } else {
434             # content is encoded and inspection wants to see decoded stuff,
435             # which we then will forward too
436             # but decoding might change length
437 0           $clen = undef;
438             # the content will be delivered decoded
439 0           delete $xhdr->{fields}{'content-encoding'}
440             }
441             }
442 0 0         if ( defined $clen ) {
443 0           $xhdr->{fields}{'content-length'} = [ $clen ];
444 0           $xhdr->{content_length} = $clen;
445             }
446              
447             callback:
448 0           $callback->(@cb_args,$hdr,$xhdr);
449             }
450              
451              
452              
453             ############################################################################
454             # handle response body data
455             ############################################################################
456             sub response_body {
457 0     0 0   my ($self,$data,@callback) = @_;
458              
459             # forward to IMP analyzer
460 0 0         $self->{eof}[1] = 1 if $data eq '';
461 0           _imp_data($self,1,$data,0,IMP_DATA_HTTPRQ_CONTENT,
462             \&_response_body_imp,\@callback);
463             }
464              
465             sub _response_body_imp {
466 0     0     my ($self,$data,$changed,$args) = @_;
467 0           my ($callback,@cb_args) = @$args;
468 0           my $eof = _check_eof($self,1);
469 0 0 0       $callback->(@cb_args,$data,$eof) if $data ne '' || $eof;
470             }
471              
472              
473             sub _check_eof {
474 0     0     my ($self,$dir) = @_;
475             $DEBUG && $self->{request}->xdebug(
476             "check eof[%d] - eof=%d - %s - (pre)pass=%d/%d",
477             $dir,$self->{eof}[$dir], _show_buf($self,$dir),
478             $self->{prepass}[$dir],
479 0 0         $self->{pass}[$dir]
480             );
481             return $self->{eof}[$dir] # received eof
482             && ! defined $self->{ibuf}[$dir][0][2] # no more data in buf
483             && ( # (pre)pass til end ok
484             $self->{prepass}[$dir] == IMP_MAXOFFSET
485 0   0       || $self->{pass}[$dir] == IMP_MAXOFFSET
486             );
487             }
488              
489             sub _show_buf {
490 0     0     my ($self,$dir) = @_;
491             return join('|',
492 0   0       map { ($_->[2]||'none')."($_->[0],+".length($_->[1]).")" }
493 0           @{ $self->{ibuf}[$dir] }
  0            
494             );
495             }
496              
497              
498              
499             ############################################################################
500             # Websockets, TLS upgrades etc
501             # if not IMP the forwarding will be done inside this function, otherwise it
502             # will be done in _in_data_imp, which gets called by IMP callback
503             ############################################################################
504             sub data {
505 0     0 0   my ($self,$dir,$data,@callback) = @_;
506              
507             # forward to IMP analyzer
508 0 0         $self->{eof}[$dir] = 1 if $data eq '';
509 0           _imp_data($self,$dir,$data,0,IMP_DATA_HTTPRQ_CONTENT,
510             \&_data_imp,[$dir,@callback]);
511             }
512              
513             sub _data_imp {
514 0     0     my ($self,$data,$changed,$args) = @_;
515 0           my ($dir,$callback,@cb_args) = @$args;
516             my $eof = $self->{eof}[$dir] && # got eof from server
517 0   0       ! defined $self->{ibuf}[$dir][0][2]; # no more data in ibuf[server]
518 0 0 0       $callback->(@cb_args,$dir,$data,$eof) if $data ne '' || $eof;
519             }
520              
521              
522              
523             ############################################################################
524             # callback from IMP
525             # process return types and trigger type specific callbacks on (pre)pass/replace
526             ############################################################################
527             sub _imp_callback {
528 0     0     my $self = shift;
529              
530 0           my %fwd; # forwarded data, per dir
531 0           for my $rv (@_) {
532              
533             # if the request got closed in between just return
534 0 0         my $request = $self->{request} or return;
535              
536 0           my $rtype = shift(@$rv);
537              
538             # deny further data
539 0 0         if ( $rtype == IMP_DENY ) {
540 0           my ($impdir,$msg) = @$rv;
541 0 0         $DEBUG && $request->xdebug("got deny($impdir) $msg");
542 0   0       return $request->deny($msg // 'closed by imp');
543             }
544              
545             # log some data
546 0 0         if ( $rtype == IMP_LOG ) {
547 0           my ($impdir,$offset,$len,$level,$msg) = @$rv;
548 0 0         $DEBUG && $request->xdebug("got log($impdir,$level) $msg");
549 0 0         if ( my $sub = $self->{logsub} ) {
550 0           $sub->($level,$msg,$impdir,$offset,$len)
551             }
552 0           next;
553             }
554              
555             # set accounting field
556 0 0         if ( $rtype == IMP_ACCTFIELD ) {
557 0           my ($key,$value) = @$rv;
558 0 0         $DEBUG && $request->xdebug("got acct $key => $value");
559 0           $request->{acct}{$key} = $value;
560 0           next;
561             }
562              
563             # (pre)pass data up to offset
564 0 0         if ( $rtype ~~ [ IMP_PASS, IMP_PREPASS ]) {
565 0           my ($dir,$offset) = @$rv;
566 0 0         $DEBUG && $request->xdebug("got $rtype($dir) off=$offset "._show_buf($self,$dir));
567              
568 0 0         if ( $rtype == IMP_PASS ) {
569             # ignore pass if it's not better than a previous pass
570 0 0 0       if ( $self->{pass}[$dir] == IMP_MAXOFFSET ) {
    0          
571             # there is no better thing than IMP_MAXOFFSET
572 0           next;
573             } elsif ( $offset == IMP_MAXOFFSET
574             or $offset > $self->{ibuf}[$dir][0][0] ) {
575             # we can pass new data
576 0           $self->{pass}[$dir] = $offset;
577             } else {
578             # offset is no better than previous pass
579 0           next;
580             }
581              
582             } else { # IMP_PREPASS
583             # ignore prepass if it's not better than a previous pass
584             # and a previous prepaself->{ibuf}[1][0]
585 0 0 0       if ( $self->{pass}[$dir] == IMP_MAXOFFSET
    0 0        
586             or $self->{prepass}[$dir] == IMP_MAXOFFSET ) {
587             # there is no better thing than IMP_MAXOFFSET
588 0 0         $DEBUG && debug("new off $offset no better than existing (pre)pass=max");
589 0           next;
590             } elsif ( $offset == IMP_MAXOFFSET
591             or $offset > $self->{ibuf}[$dir][0][0] ) {
592             # we can prepass new data
593 0           $self->{prepass}[$dir] = $offset;
594 0 0         $DEBUG && debug("update prepass with new off $offset");
595             } else {
596             # offset is no better than previous pass
597 0 0         $DEBUG && debug(
598             "new off $offset no better than existing $self->{ibuf}[$dir][0][0]");
599 0           next;
600             }
601             }
602              
603             # collect data up to offset for forwarding
604             # list of [ changed,data,callback,cbarg ]
605 0   0       my $fwd = $fwd{$dir} ||= [];
606              
607 0           my $ibuf = $self->{ibuf}[$dir];
608 0           my $ib0; # top of ibuf, e.g. ibuf[0]
609              
610 0           while ( @$ibuf ) {
611 0           $ib0 = shift(@$ibuf);
612 0 0         defined $ib0->[2] or last; # dummy entry with no type
613              
614 0 0         if ( $offset == IMP_MAXOFFSET ) {
615             # forward this buf and maybe more
616 0           push @$fwd, [ 0, @{$ib0}[1,3,4] ];
  0            
617             } else {
618 0           my $pass = $offset - $ib0->[0];
619 0           my $len0 = length($ib0->[1]);
620 0 0         if ( $pass > $len0 ) {
    0          
    0          
621             # forward this buf and maybe more
622 0           push @$fwd, [ 0, @{$ib0}[1,3,4] ];
  0            
623             } elsif ( $pass == $len0 ) {
624             # forward this buf, but not more
625 0           push @$fwd, [ 0, @{$ib0}[1,3,4] ];
  0            
626              
627             # add empty buf if this was the last, this will also
628             # trigger resetting pass,prepass below
629 0 0         if ( @$ibuf ) { # still data in buffer
    0          
630             } elsif ( $ib0->[2] < 0 ) {
631             # no eof yet and no further data in ibuf
632             # we might get a replacement at the end of the
633             # buffer so put emptied buffer back
634 0           $ib0->[1] = '';
635 0           push @$ibuf, $ib0;
636             } else {
637 0           push @$ibuf, [ $offset,'' ];
638             }
639 0           last;
640             } elsif ( $ib0->[2] < 0 ) {
641             # streaming type:
642             # forward part of buf
643 0           push @$fwd, [
644             0, # not changed
645             substr($ib0->[1],0,$pass,''), # data
646             $ib0->[3], # callback
647             $ib0->[4], # args
648             ];
649             # keep rest in ibuf
650 0           unshift @$ibuf,$ib0;
651 0           $ib0->[0] += $pass;
652 0           last; # nothing more to forward
653             } else {
654             # packet type: they need to be processed in total
655 0           return $request->fatal("partial $rtype for $ib0->[2]");
656             }
657             }
658             }
659              
660 0 0         if ( @$ibuf ) {
661             # there are still data in ibuf which cannot get passed,
662             # so reset pass, prepass
663 0           $self->{pass}[$dir] = $self->{prepass}[$dir] = 0;
664             } else {
665             # add empty buffer containing only current offset based on
666             # what we last removed from ibuf
667 0           push @$ibuf, [ $ib0->[0] + length($ib0->[1]),'' ];
668             }
669              
670 0           next;
671             }
672              
673              
674             # replace data up to offset
675 0 0         if ( $rtype == IMP_REPLACE ) {
676 0           my ($dir,$offset,$newdata) = @$rv;
677 0 0         $DEBUG && $request->xdebug("got replace($dir) off=$offset data.len=".
678             length($newdata));
679 0           my $ibuf = $self->{ibuf}[$dir];
680 0 0         @$ibuf or die "no ibuf";
681              
682             # if there is an active pass|prepass (e.g. pointing to future data)
683             # the data cannot be replaced
684             return $request->fatal(
685             "cannot replace data which are said to be passed")
686 0 0 0       if $self->{pass}[$dir] or $self->{prepass}[$dir];
687              
688             # we cannot replace future data
689 0 0         return $request->fatal('IMP', "cannot use replace with maxoffset")
690             if $offset == IMP_MAXOFFSET;
691              
692             # data to replace cannot span different types, so they must be in
693             # the first ibuf
694 0           my $ib0 = $ibuf->[0];
695 0           my $rlen = $offset - $ib0->[0];
696 0           my $len0 = length($ib0->[1]);
697              
698             # some sanity checks
699 0 0         if ( $rlen < 0 ) {
    0          
    0          
700 0           return $request->fatal("cannot replace already passed data");
701             } elsif ( $rlen > $len0 ) {
702 0 0 0       return $request->fatal(
703             "replacement cannot span multiple data types")
704             if @$ibuf>1 or $ib0->[2]>0;
705 0           return $request->fatal("cannot replace future data ($rlen>$len0)");
706             } elsif ( $rlen < $len0 ) {
707             # replace part of buffer
708 0 0         return $request->fatal("cannot replace part of packet type")
709             if $ib0->[2]>0;
710              
711             # keep rest and update position
712 0 0         substr( $ib0->[1],0,$rlen,'' ) if $rlen;
713 0           $ib0->[0] += $rlen;
714             } else {
715             # remove complete buffer
716 0 0         if ( @$ibuf>1 ) { # still data in buffer
    0          
717             } elsif ( $ib0->[2] < 0 ) {
718             # no eof yet and no further data in ibuf
719             # we might get a replacement at the end of the
720             # buffer so put emptied buffer back
721 0           $ib0->[0] += $len0;
722 0           $ib0->[1] = '';
723             } else {
724             # replace with empty
725 0           @$ibuf = [ $offset,'' ];
726             }
727             }
728              
729 0           push @{$fwd{$dir}}, [
  0            
730             1, # changed
731             $newdata, # new data
732             $ib0->[3], # callback
733             $ib0->[4], # cbargs
734             ];
735              
736 0           next;
737             }
738 0 0         if ( $rtype ~~ [ IMP_PAUSE, IMP_CONTINUE ] ) {
739 0           my $dir = shift(@$rv);
740 0           my $relay = $self->{request}{conn}{relay};
741 0 0 0       if ( $relay and my $fo = $relay->fd($dir)) {
742 0 0         $fo->mask( r => ($rtype == IMP_PAUSE ? 0:1));
743             }
744 0           next;
745             }
746              
747 0 0         if ( $rtype == IMP_FATAL ) {
748 0           $request->fatal(shift(@$rv));
749 0           next;
750             }
751              
752 0           return $request->fatal("unsupported IMP return type: $rtype");
753             }
754              
755 0 0         %fwd or return; # no passes/replacements...
756              
757 0           while ( my ($dir,$fwd) = each %fwd ) {
758 0           while ( my $fw = shift(@$fwd)) {
759             #warn Dumper($fw); use Data::Dumper;
760 0           my ($changed,$data,$callback,$args) = @$fw;
761 0           $callback->($self,$data,$changed,$args);
762             }
763             }
764             }
765              
766             ############################################################################
767             # send data to IMP analyzer
768             # if we had a previous (pre)pass some data can be forwarded immediatly, for
769             # others we have to wait for the analyzer callback
770             # returns how many bytes of data are waiting for callback, e.g. 0 if we
771             # we can pass everything immediately
772             ############################################################################
773             sub _imp_data {
774 0     0     my ($self,$dir,$data,$offset,$type,$callback,$args) = @_;
775 0           my $ibuf = $self->{ibuf}[$dir];
776 0           my $eobuf = $ibuf->[-1][0] + length($ibuf->[-1][1]);
777              
778 0           my $encoded_data;
779 0 0         if ( my $decode = $self->{decode}{$type+0}[$dir] ) {
780             # set up decoder if not set up yet
781 0 0         if ( ! ref($decode)) {
782             # create function to decode content
783             $self->{decode}{$type+0}[$dir] = $decode = _create_decoder($decode)
784             || return $self->{request}->fatal(
785 0   0       "cannot decode content-encoding $decode");
786             }
787              
788             # offsets relates to original stream, but we put the decoded stream
789             # into ibuf. And offset>0 means, that we have a gap in the input,
790             # which is not allowed, when decoding a stream.
791 0 0         die "cannot use content decoder with gap in data" if $offset;
792              
793 0 0         $encoded_data = $data if $self->{pass_encoded}[$dir];
794             defined( $data = $decode->($data) )
795 0 0         or return $self->{request}->fatal("decoding content failed");
796             }
797              
798 0 0         if ( $offset ) {
799 0 0         die "offset($offset)
800 0 0         $offset = 0 if $offset == $eobuf;
801             }
802              
803 0           my $fwd; # what gets send to analyzer
804              
805 0           my $dlen = length($data);
806 0           my $pass = $self->{pass}[$dir];
807 0 0         if ( $pass ) {
808             # if pass is set there should be no data in ibuf, e.g. everything
809             # before should have been passed
810 0 0         ! $ibuf->[0][2] or die "unexpected data in ibuf";
811              
812 0 0         if ( $pass == IMP_MAXOFFSET ) {
813             # pass thru w/o analyzing
814 0           $ibuf->[0][0] += $dlen;
815 0 0         $DEBUG && $self->{request}->xdebug("can pass($dir) infinite");
816 0   0       return $callback->($self,$encoded_data // $data,0,$args);
817             }
818              
819 0   0       my $canpass = $pass - ( $offset||$eobuf );
820 0 0         if ( $canpass <= 0 ) {
    0          
    0          
821             # cannot pass anything, pass should have been reset already
822 0           die "pass($dir,$pass) must be point into future ($canpass)";
823             } elsif ( $canpass >= $dlen) {
824             # can pass everything
825 0           $ibuf->[0][0] += $dlen;
826 0 0         if ( $data eq '' ) {
827             # forward eof to analyzer
828 0           $fwd = $data;
829 0 0         $DEBUG && $self->{request}->xdebug("pass($dir) eof");
830 0           goto SEND2IMP;
831             }
832             $DEBUG && $self->{request}->xdebug(
833 0 0         "can pass($dir) all: pass($canpass)>=data.len($dlen)");
834 0   0       return $callback->($self,$encoded_data // $data,0,$args);
835             } elsif ( $type < 0 ) {
836             # can pass part of data, only for streaming types
837             # remove from data what can be passed
838 0 0         die "body might change" if $self->{pass_encoded}[$dir];
839 0           $ibuf->[0][0] += $canpass;
840 0           my $passed_data = substr($data,0,$canpass,'');
841 0           $eobuf += $canpass;
842 0           $dlen = length($data);
843             $DEBUG && $self->{request}->xdebug(
844 0 0         "can pass($dir) part: pass($canpass)
845 0           $callback->($self,$passed_data,0,$args); # callback but continue
846             }
847             }
848              
849 0           $fwd = $data; # this must be forwarded to analyzer
850              
851 0           my $prepass = $self->{prepass}[$dir];
852 0 0         if ( $prepass ) {
853             # if prepass is set there should be no data in ibuf, e.g. everything
854             # before should have been passed
855 0 0         ! $ibuf->[0][2] or die "unexpected data in ibuf";
856 0 0         if ( $prepass == IMP_MAXOFFSET ) {
857             # prepass everything
858 0           $ibuf->[0][0] += $dlen;
859 0 0         $DEBUG && $self->{request}->xdebug("can prepass($dir) infinite");
860 0   0       $callback->($self,$encoded_data // $data,0,$args); # callback but continue
861 0           goto SEND2IMP;
862             }
863              
864 0   0       my $canprepass = $prepass - ( $offset||$eobuf );
865 0 0         if ( $canprepass <= 0 ) {
    0          
    0          
866             # cannot prepass anything, prepass should have been reset already
867 0           die "prepass must be point into future";
868             } elsif ( $canprepass >= $dlen) {
869             # can prepass everything
870 0           $ibuf->[0][0] += $dlen;
871 0   0       $callback->($self,$encoded_data // $data,0,$args); # callback but continue
872             $DEBUG && $self->{request}->xdebug(
873 0 0         "can prepass($dir) all: pass($canprepass)>=data.len($dlen)");
874 0           goto SEND2IMP;
875             } elsif ( $type < 0 ) {
876             # can prepass part of data, only for streaming types
877             # remove from data what can be prepassed
878 0 0         die "body might change" if $self->{pass_encoded}[$dir];
879 0           $ibuf->[0][0] += $canprepass;
880 0           my $passed_data = substr($data,0,$canprepass,'');
881 0           $eobuf += $canprepass;
882 0           $dlen = length($data);
883             $DEBUG && $self->{request}->xdebug(
884 0 0         "can prepass($dir) part: prepass($canprepass)
885 0           $callback->($self,$passed_data,0,$args); # callback but continue
886             }
887             }
888              
889             # everything else in $data must be added to buffer
890            
891             # there can be no gaps inside ibuf because caller is only allowed to
892             # pass data which we explicitly allowed
893 0 0 0       if ( $offset && $offset > $eobuf ) {
894 0 0         defined $ibuf->[0][2] and # we have still data in ibuf!
895             die "there can be no gaps in ibuf";
896             }
897 0 0 0       if ( ! defined $ibuf->[-1][2] ) {
    0 0        
898             # replace buf, because it was empty
899 0   0       $ibuf->[-1] = [ $offset||$eobuf,$data,$type,$callback,$args ];
900             } elsif ( $type < 0
901             and $type == $ibuf->[-1][2]
902             and $callback == $ibuf->[-1][3]
903             ) {
904             # streaming data, concatinate to existing buf of same type
905 0           $ibuf->[-1][1] .= $data;
906             } else {
907             # different type or non-streaming data, add new buf
908 0   0       push @$ibuf,[ $offset||$eobuf,$data,$type,$callback,$args ];
909             }
910 0 0         $DEBUG && $self->{request}->xdebug( "ibuf.length=%d",
911             $ibuf->[-1][0] + length($ibuf->[-1][1]) - $ibuf->[0][0]);
912              
913             SEND2IMP:
914 0 0         $DEBUG && $self->{request}->xdebug("forward(%d) %d bytes type=%s off=%d to analyzer",
915             $dir,length($fwd),$type,$offset);
916 0           $self->{imp}->data($dir,$fwd,$offset,$type);
917 0           return length($fwd);
918             }
919              
920             #####################################################################
921             # parse header fields
922             # taken from Net::Inspect::L7::HTTP (where it got put in by myself)
923             #####################################################################
924             my $token = qr{[^()<>@,;:\\"/\[\]?={}\x00-\x20\x7f-\xff]+};
925             my $token_value_cont = qr{
926             ($token): # key:
927             [\040\t]*([^\r\n]*?)[\040\t]* # value
928             ((?:\r?\n[\040\t][^\r\n]*)*) # continuation lines
929             \r?\n # (CR)LF
930             }x;
931             sub _parse_hdrfields {
932 0     0     my ($hdr,$fields) = @_;
933 0           my $bad = '';
934             parse:
935 0           while ( $hdr =~m{\G$token_value_cont}gc ) {
936 0 0         if ($3 eq '') {
937             # no continuation line
938 0           push @{$fields->{ lc($1) }},$2;
  0            
939             } else {
940             # with continuation line
941 0           my ($k,$v) = ($1,$2.$3);
942             # value-part -> ' ' + value-part
943 0           $v =~s{[\r\n]+[ \t](.*?)[ \t]*}{ $1}g;
944 0           push @{$fields->{ lc($k) }},$v;
  0            
945             }
946             }
947 0 0 0       if (pos($hdr)//0 != length($hdr)) {
948             # bad line inside
949 0           substr($hdr,0,pos($hdr),'');
950 0 0         $bad .= $1 if $hdr =~s{\A([^\n]*)\n}{};
951 0           goto parse;
952             }
953 0           return $bad;
954             }
955              
956             #####################################################################
957             # create decoder function for gzip|deflate content-encoding
958             #####################################################################
959             sub _create_decoder {
960 0     0     my $typ = shift;
961 0 0         $typ ~~ [ 'gzip','deflate' ] or return; # not supported
962              
963 0           my $gzip_csum;
964 0           my $buf = '';
965 0           my $inflate;
966              
967             return sub {
968 0     0     my $data = shift;
969 0           $buf .= $data;
970              
971 0 0         goto inflate if defined $inflate;
972              
973             # read gzip|deflate header
974 0           my $wb;
975 0 0         my $more = $data eq '' ? undef:''; # need more data if possible
976 0 0         if ( $typ eq 'gzip' ) {
977 0           my $hdr_len = 10; # minimum gzip header
978              
979 0 0         return $more if length($buf) < $hdr_len;
980 0           my ($magic,$method,$flags) = unpack('vCC',$buf);
981 0 0 0       if ( $magic != 0x8b1f or $method != Z_DEFLATED or $flags & 0xe0 ) {
      0        
982 0 0         $DEBUG && debug("no valid gzip header. assuming plain text");
983 0           $inflate = ''; # defined but false
984 0           goto inflate;
985             }
986 0 0         if ( $flags & 4 ) {
987             # skip extra section
988 0 0         return $more if length($buf) < ($hdr_len+=2);
989 0           $hdr_len += unpack('x10v',$buf);
990 0 0         return $more if length($buf) < $hdr_len;
991             }
992 0 0         if ( $flags & 8 ) {
993             # skip filename
994 0           my $o = index($buf,"\0",$hdr_len);
995 0 0         return $more if $o == -1; # end of filename not found
996 0           $hdr_len = $o+1;
997             }
998 0 0         if ( $flags & 16 ) {
999             # skip comment
1000 0           my $o = index($buf,"\0",$hdr_len);
1001 0 0         return $more if $o == -1; # end of comment not found
1002 0           $hdr_len = $o+1;
1003             }
1004 0 0         if ( $flags & 2 ) {
1005             # skip CRC
1006 0 0         return $more if length($buf) < ($hdr_len+=2);
1007             }
1008              
1009             # remove header
1010 0           substr($buf,0,$hdr_len,'');
1011 0           $gzip_csum = 8; # 8 byte Adler CRC at end
1012 0           $wb = -MAX_WBITS(); # see Compress::Raw::Zlib
1013              
1014             } else {
1015             # deflate
1016             # according to RFC it should be zlib, but due to the encoding name
1017             # often real deflate is used instead
1018             # check magic bytes to decide
1019              
1020             # lets see if it looks like a zlib header
1021             # check for CM=8, CMID<=7 in first byte and valid FCHECK in
1022             # seconds byte
1023 0 0         return $more if length($buf)<2;
1024 0           my $magic = unpack('C',substr($buf,0,1));
1025 0 0 0       if (
      0        
1026             ( $magic & 0b1111 ) == 8 # CM = 8
1027             and $magic >> 4 <= 7 # CMID <= 7
1028             and unpack('n',substr($buf,0,2)) % 31 == 0 # valid FCHECK
1029             ) {
1030             # looks like zlib header
1031 0           $wb = +MAX_WBITS(); # see Compress::Raw::Zlib
1032             } else {
1033             # assume deflate
1034 0           $wb = -MAX_WBITS(); # see Compress::Raw::Zlib
1035             }
1036             }
1037              
1038 0 0         $inflate = Compress::Raw::Zlib::Inflate->new(
1039             -WindowBits => $wb,
1040             -AppendOutput => 1,
1041             -ConsumeInput => 1
1042             ) or die "cannot create inflation stream";
1043              
1044 0 0         inflate:
1045              
1046             return '' if $buf eq '';
1047              
1048 0 0         if ( ! $inflate ) {
1049             # wrong gzip header: sometimes servers claim to use gzip
1050             # if confronted with "Accept-Encoding: identity" but in reality
1051             # they send plain text
1052             # so consider it plain text and don't decode
1053 0           my $out = $buf;
1054 0           $buf = '';
1055 0           return $out
1056             }
1057              
1058 0           my $out = '';
1059 0           my $stat = $inflate->inflate(\$buf,\$out);
1060 0 0         if ( $stat == Z_STREAM_END ) {
    0          
1061 0 0 0       if ( $gzip_csum and length($buf) >= $gzip_csum ) {
1062             # TODO - check checksum - but what would it help?
1063 0           substr($buf,0,$gzip_csum,'');
1064 0           $gzip_csum = 0;
1065             }
1066             } elsif ( $stat != Z_OK ) {
1067 0 0         $DEBUG && debug("decode failed: $stat");
1068 0           return; # error
1069             }
1070 0           return $out
1071 0           };
1072             }
1073              
1074             1;