File Coverage

blib/lib/App/HTTP_Proxy_IMP/IMP.pm
Criterion Covered Total %
statement 52 449 11.5
branch 6 316 1.9
condition 0 127 0.0
subroutine 12 34 35.2
pod 0 9 0.0
total 70 935 7.4


line stmt bran cond sub pod time code
1 1     1   7 use strict;
  1         2  
  1         31  
2 1     1   6 use warnings;
  1         2  
  1         36  
3              
4             package App::HTTP_Proxy_IMP::IMP;
5              
6 1     1   520 use Net::Inspect::Debug qw(:DEFAULT $DEBUG);
  1         1274  
  1         5  
7 1     1   677 use Net::IMP::Debug var => \$DEBUG, sub => \&debug;
  1         697  
  1         6  
8 1     1   583 use Net::IMP qw(:DEFAULT :log);
  1         2640  
  1         172  
9 1     1   458 use Net::IMP::HTTP;
  1         1344  
  1         79  
10 1     1   7 use Scalar::Util 'weaken';
  1         2  
  1         57  
11 1     1   638 use Hash::Util 'lock_ref_keys';
  1         3003  
  1         8  
12 1     1   731 use Compress::Raw::Zlib;
  1         5498  
  1         216  
13 1     1   9 no warnings 'experimental'; # smartmatch
  1         2  
  1         36  
14 1     1   6 use Carp;
  1         2  
  1         6092  
15              
16             my %METHODS_RFC2616 = map { ($_,1) } qw( GET HEAD POST PUT DELETE OPTIONS CONNECT TRACE );
17             my %METHODS_WITHOUT_RQBODY = map { ($_,1) } qw( GET HEAD DELETE CONNECT );
18             my %METHODS_WITH_RQBODY = map { ($_,1) } qw( POST PUT );
19             my %CODE_WITHOUT_RPBODY = map { ($_,1) } qw(204 205 304);
20             my %METHODS_WITHOUT_RPBODY = map { ($_,1) } qw(HEAD);
21              
22             # we want plugins to suppport the HTTP Request innterface
23             my $interface = [
24             IMP_DATA_HTTPRQ,
25             [
26             IMP_PASS,
27             IMP_PREPASS,
28             IMP_REPLACE,
29             IMP_TOSENDER,
30             IMP_DENY,
31             IMP_LOG,
32             IMP_ACCTFIELD,
33             IMP_PAUSE,
34             IMP_CONTINUE,
35             IMP_FATAL,
36             ]
37             ];
38              
39             sub can_modify {
40 0     0 0 0 return shift->{can_modify};
41             }
42              
43             # create a new factory object
44             sub new_factory {
45 2     2 0 9 my ($class,%args) = @_;
46 2         4 my @factory;
47 2 50       3 for my $module (@{ delete $args{mod} || [] }) {
  2         10  
48 2 50       6 if ( ref($module)) {
49             # assume it is already an IMP factory object
50 2         4 push @factory, $module;
51 2         5 next;
52             }
53              
54             # --filter mod=args
55 0 0       0 my ($mod,$args) = $module =~m{^([a-z][\w:]*)(?:=(.*))?$}i
56             or die "invalid module $module";
57 0 0       0 eval "require $mod" or die "cannot load $mod args=$args: $@";
58 0   0     0 my %args = $mod->str2cfg($args//'');
59 0 0       0 my $factory = $mod->new_factory(%args)
60             or croak("cannot create Net::IMP factory for $mod");
61 0 0 0     0 $factory =
62             $factory->get_interface( $interface ) &&
63             $factory->set_interface( $interface )
64             or croak("$mod does not implement the interface supported by us");
65 0         0 push @factory,$factory;
66             }
67              
68 2 50       5 @factory or return;
69 2 50       7 if (@factory>1) {
70             # for cascading filters we need Net::IMP::Cascade
71 0         0 require Net::IMP::Cascade;
72 0 0       0 my $cascade = Net::IMP::Cascade->new_factory( parts => [ @factory ])
73             or croak("cannot create Net::IMP::Cascade factory");
74 0 0       0 $cascade = $cascade->set_interface( $interface ) or
75             croak("cascade does not implement the interface supported by us");
76 0         0 @factory = $cascade;
77             }
78 2         3 my $factory = $factory[0];
79              
80 2         10 my $self = bless {
81             %args,
82             imp => $factory, # IMP factory object
83             can_modify => 0, # does interface support IMP_REPLACE, IMP_TOSENDER
84             }, $class;
85 2         7 lock_ref_keys($self);
86              
87             # update can_modify
88 2         32 CHKIF: for my $if ( $factory->get_interface ) {
89 2         20 my ($dt,$rt) = @$if;
90 2         5 for (@$rt) {
91 4 100       36 $_ ~~ [ IMP_REPLACE, IMP_TOSENDER ] or next;
92 2         7 $self->{can_modify} =1;
93 2         5 last CHKIF;
94             }
95             }
96            
97 2         10 return $self;
98             }
99              
100             # create a new analyzer based on the factory
101             sub new_analyzer {
102 0     0 0   my ($factory,$request,$meta) = @_;
103              
104 0           my %meta = %$meta;
105             # IMP uses different *addr than Net::Inspect, translate
106             # [s]ource -> [c]lient, [d]estination -> [s]erver
107 0           $meta{caddr} = delete $meta{saddr};
108 0           $meta{cport} = delete $meta{sport};
109 0           $meta{saddr} = delete $meta{daddr};
110 0           $meta{sport} = delete $meta{dport};
111              
112 0           my $analyzer = $factory->{imp}->new_analyzer( meta => \%meta );
113              
114             my $self = bless {
115             request => $request, # App::HTTP_Proxy_IMP::Request object
116             imp => $analyzer,
117             # incoming data, put into analyzer
118             # \@list of [ buf_base,buf,type,callback,$cb_arg ] per dir
119             ibuf => [
120             [ [0,''] ],
121             [ [0,''] ],
122             ],
123             pass => [0,0], # pass allowed up to given offset (per dir)
124             prepass => [0,0], # prepass allowed up to given offset (per dir)
125             fixup_header => [], # sub to fixup content-length in header once known
126             eof => [0,0], # got eof in dir ?
127             decode => undef, # decoder for content-encoding decode{type}[dir]
128             pass_encoded => undef, # pass body encoded (analyzer will not change body)
129             method => undef, # request method
130             logsub => $factory->{logsub}, # how to log IMP_OG
131 0           }, ref($factory);
132 0           lock_ref_keys($self);
133 0           weaken($self->{request});
134              
135             # set callback, this might trigger callback immediately if there are
136             # results pending
137 0           weaken( my $wself = $self );
138 0     0     $analyzer->set_callback( sub { _imp_callback($wself,@_) } );
  0            
139 0           return $self;
140             }
141              
142              
143             sub request_header {
144 0     0 0   my ($self,$hdr,$xhdr,@callback) = @_;
145 0           my $clen = $xhdr->{content_length};
146              
147             # new body might change content-length info in request header
148             # need to defer sending header until body length is known
149 0 0         if ( ! $METHODS_WITHOUT_RQBODY{$xhdr->{method}} ) {
150 0 0 0       if ( ! defined $clen and $xhdr->{method} ne 'CONNECT') {
151             # length not known -> chunking
152 0           die "FIXME: chunking request body not yet supported";
153             }
154              
155 0           my $hlen = length($hdr);
156             $self->{fixup_header}[0] = sub {
157 0     0     my ($self,$hdr,%args) = @_;
158 0           my $size = $args{content};
159 0 0         goto fix_clen if defined $size;
160              
161 0 0         if ( my $pass = $self->{pass}[0] ) {
162 0 0 0       if ( $pass == IMP_MAXOFFSET or $pass >= $hlen + $clen ) {
163             # will not change body
164 0           goto fix_clen;
165             }
166             }
167 0 0         if ( my $prepass = $self->{prepass}[0] ) {
168 0 0 0       if ( $prepass == IMP_MAXOFFSET or $prepass >= $hlen + $clen ) {
169             # will not change body
170 0           goto fix_clen;
171             }
172             }
173 0 0         if ($self->{ibuf}[0][0][0] >= $hlen + $clen) { # ibuf[client].base
174             # everything passed thru ibuf
175 0           goto fix_clen;
176             }
177              
178             # need to defer header until all of the body is passed
179             # or replaced, then we know the size
180 0           return;
181              
182             fix_clen:
183              
184 0 0         if (!defined $size) {
185             # bytes in ibuf and outstanding bytes will not be changed, so:
186             # new_content_length =
187             # ( orig_clen + orig_hlen - received ) # not yet received
188             # + ( received - ibuf.base ) # still in ibuf
189             # + defered_body.length # ready to forward
190             # --->
191             # orig_clen + orig_hlen - ibuf.base + defered_body.length
192             $size = $clen + $hlen # orig_clen + orig_hlen
193             - $self->{ibuf}[0][0][0] # ibuf.base
194 0           + $args{defered}; # defered_body.length
195             }
196              
197 0 0         $DEBUG && $self->{request}->xdebug("fixup header with clen=%d",$size);
198             # replace or add content-length header
199 0 0         $$hdr =~s{^(Content-length:[ \t]*)(\d+)}{$1$size}mi
200             || $$hdr =~s{(\n)}{$1Content-length: $size\r\n};
201 0           return 1;
202 0           };
203             }
204              
205              
206             # send data to analyzer.
207             # will call back into request on processed data
208 0           _imp_data($self,0,$hdr,0,IMP_DATA_HTTPRQ_HEADER,
209             \&_request_header_imp,[ $xhdr,@callback ]);
210             }
211              
212              
213             ############################################################################
214             # callback from IMP after passing/replacing the HTTP request header
215             # will reparse the header if changed and continue in @callback from request
216             ############################################################################
217             sub _request_header_imp {
218 0     0     my ($self,$hdr,$changed,$args) = @_;
219 0           my ($xhdr,$callback,@cb_args) = @$args;
220              
221 0 0         if ( $changed ) {
222             # we need to parse the header again and update xhdr
223 0           my ($met,$url,$version,$fields) = $hdr =~m{ \A
224             (\S+)[\040\t]+
225             (\S+)[\040\t]+
226             HTTP/(1\.[01])[\040\t]*
227             \r?\n
228             (.*?\n)
229             \r?\n\Z
230             }isx;
231              
232             # internal URL are not accepted by the client itself, only from
233             # plugin. Set xhdr.internal_url if we see, that IMP plugin rewrote
234             # url to internal one and strip internal:// again so that original
235             # URL could be logged
236             my $internal = $met ne 'CONNECT'
237 0   0       && $xhdr->{url} !~m{^internal://}i
238             && $url =~s{^internal://}{}i;
239              
240 0           my %kv;
241 0           my $bad = _parse_hdrfields($fields,\%kv);
242 0 0         $xhdr = {
    0          
243             method => uc($met),
244             version => $version,
245             url => $url,
246             fields => \%kv,
247             $bad ? ( junk => $bad ) :(),
248             $internal ? ( internal_url => 1 ):(),
249             };
250             }
251              
252             # we don't know the content length yet, unless it can be determined by the
253             # request method. If we got a (pre)pass until the end of the request body
254             # fixup_header will know it and adjust the header
255             $xhdr->{content_length} =
256 0 0         $METHODS_WITHOUT_RQBODY{$xhdr->{method}} ? 0:undef;
257              
258 0           $self->{method} = $xhdr->{method};
259 0           return $callback->(@cb_args,$hdr,$xhdr);
260             }
261              
262             ############################################################################
263             # fix request header by setting correct content-length
264             # returns true if header could be fixed
265             ############################################################################
266             sub fixup_request_header {
267 0     0 0   my ($self,$hdr_ref,%args) = @_;
268 0 0         my $sub = $self->{fixup_header}[0] or return 1;
269 0           my $ok = $sub->($self,$hdr_ref,%args);
270 0 0         $self->{fixup_header}[0] = undef if $ok;
271 0           return $ok;
272             }
273              
274              
275             ############################################################################
276             # process request body data
277             # just feed to analyzer and call back into request once done
278             ############################################################################
279             sub request_body {
280 0     0 0   my ($self,$data,@callback) = @_;
281              
282             # feed data into IMP
283 0 0         $self->{eof}[0] = 1 if $data eq '';
284 0           _imp_data($self,0,$data,0,IMP_DATA_HTTPRQ_CONTENT,
285             \&_request_body_imp,\@callback );
286             }
287              
288             sub _request_body_imp {
289 0     0     my ($self,$data,$changed,$args) = @_;
290 0           my ($callback,@cb_args) = @$args;
291 0           my $eof = _check_eof($self,0);
292 0 0 0       $callback->(@cb_args,$data,$eof) if $data ne '' || $eof;
293             }
294              
295             ############################################################################
296             # process response header
297             ############################################################################
298             sub response_header {
299 0     0 0   my ($self,$hdr,$xhdr,@callback) = @_;
300              
301             # if content is encoded we need to decode it in order to analyze
302             # it. For now only set decode to the encoding method, this will
303             # be changed to a decoding function once we need it in the body
304 0 0         if ( my $ce = $xhdr->{fields}{'content-encoding'} ) {
305             # the right way would be to extract all encodings and then complain, if
306             # there is an encoding we don't support. Instead we just look for the
307             # encodings we support
308 0           my %ce = map { lc($_) => 1 } map { m{\b(?:x-)?(gzip|deflate)\b}ig } @$ce;
  0            
  0            
309 0 0         $self->{decode}{IMP_DATA_HTTPRQ_CONTENT+0}[1] = join(", ",keys %ce)
310             if %ce;
311             }
312              
313             # header length is needed in callback
314 0           $xhdr->{header_length} = length($hdr);
315 0           _imp_data($self,1,$hdr,0,IMP_DATA_HTTPRQ_HEADER,
316             \&_response_header_imp,[$xhdr,@callback] );
317             }
318              
319              
320             ############################################################################
321             # callback after passing/replacing the HTTP response header
322             # will reparse the header if changed and continue in the request via
323             # callback
324             ############################################################################
325             sub _response_header_imp {
326 0     0     my ($self,$hdr,$changed,$args) = @_;
327 0           my ($xhdr,$callback,@cb_args) = @$args;
328              
329 0           my $orig_clen = $xhdr->{content_length};
330 0           my $orig_hlen = $xhdr->{header_length};
331              
332 0 0         if ( $changed ) {
333             # we need to parse the header again and update xhdr
334 0           my ($version,$code,$reason,$fields) = $hdr =~m{ \A
335             HTTP/(1\.[01])[\040\t]+
336             (\d\d\d)
337             (?:[\040\t]+([^\r\n]*))?
338             \r?\n
339             (.*?\n)
340             \r?\n\Z
341             }isx;
342              
343 0           my %kv;
344 0           my $bad = _parse_hdrfields($fields,\%kv);
345 0 0         $xhdr = {
346             code => $code,
347             version => $version,
348             reason => $reason,
349             fields => \%kv,
350             $bad ? ( junk => $bad ) :(),
351             };
352             }
353              
354             # except for some codes or request methods we don't know the
355             # content-length of the body yet
356             # in these cases we try in this order
357             # - check if we got a (pre)pass for the whole body already
358             # - use chunked encoding if client speaks HTTP/1.1
359             # - don't specify content-length and close request with connection close
360              
361             # we don't change $hdr here because it will be rebuild from fields anyway
362 0 0 0       if ( $CODE_WITHOUT_RPBODY{$xhdr->{code}} or $xhdr->{code} < 200 ) {
363 0           $xhdr->{content_length} = 0;
364             # better remove them
365 0           delete @{ $xhdr->{fields} }{ qw/ content-length transfer-encoding / };
  0            
366 0           goto callback;
367             }
368              
369 0 0         if ( $METHODS_WITHOUT_RPBODY{ $self->{method} } ) {
370 0           $xhdr->{content_length} = 0;
371             # keep content-length etc, client might want to peek into it using HEAD
372 0           goto callback;
373             }
374            
375             # reset infos about content-length
376 0           $xhdr->{content_length} = $xhdr->{chunked} = undef;
377 0           delete @{ $xhdr->{fields} }{ qw/ content-length transfer-encoding / };
  0            
378              
379             # if we have read the whole body already or at least know, that we will
380             # not change anymore data, we could compute the new content-length
381 0           my $clen;
382             my $nochange;
383 0           while ( defined $orig_clen ) {
384 0           my $rpsize = $orig_hlen + $orig_clen;
385              
386 0 0         if ( my $pass = $self->{pass}[1] ) {
387 0 0 0       if ( $pass == IMP_MAXOFFSET or $pass >= $rpsize ) {
388             # will not look at and not change body
389 0           $nochange = 1;
390 0           goto compute_clen;
391             }
392             }
393 0 0         if ( my $prepass = $self->{prepass}[1] ) {
394 0 0 0       if ( $prepass == IMP_MAXOFFSET or $prepass >= $rpsize ) {
395             # will not change body
396 0           $nochange = 1;
397 0           goto compute_clen;
398             }
399             }
400 0 0         if ($self->{ibuf}[1][0][0] >= $rpsize) { # ibuf[server].base
401             # everything passed thru ibuf
402 0           goto compute_clen;
403             }
404              
405             # we still don't know final size
406 0           last;
407              
408             compute_clen:
409             # bytes in ibuf and outstanding bytes will not be changed, so:
410             # new_content_length =
411             # ( total_size - received ) # not yet received
412             # + ( received - ibuf.base ) # still in ibuf
413             # --->
414             # total_size - ibuf.base
415 0           $clen = $rpsize - $self->{ibuf}[1][0][0];
416              
417 0           last;
418             }
419              
420 0 0         if ( $self->{decode}{IMP_DATA_HTTPRQ_CONTENT+0}[1] ) {
421 0 0         if ( $nochange ) {
422             # we will pass encoded stuff, either no decoding needs to
423             # be done (pass) or we will decode only for the analyzer (prepass)
424             # which will only watch at the content, but not change it
425 0           $self->{pass_encoded}[1] = 1;
426              
427 0           my $pass = $self->{pass}[1];
428 0 0 0       if ( $pass and defined $orig_clen and (
      0        
      0        
429             $pass == IMP_MAXOFFSET or
430             $pass >= $orig_clen + $orig_hlen )) {
431             # no need to decode body
432 0           $self->{decode}{IMP_DATA_HTTPRQ_CONTENT+0}[1] = undef;
433             }
434             } else {
435             # content is encoded and inspection wants to see decoded stuff,
436             # which we then will forward too
437             # but decoding might change length
438 0           $clen = undef;
439             # the content will be delivered decoded
440 0           delete $xhdr->{fields}{'content-encoding'}
441             }
442             }
443 0 0         if ( defined $clen ) {
444 0           $xhdr->{fields}{'content-length'} = [ $clen ];
445 0           $xhdr->{content_length} = $clen;
446             }
447              
448             callback:
449 0           $callback->(@cb_args,$hdr,$xhdr);
450             }
451              
452              
453              
454             ############################################################################
455             # handle response body data
456             ############################################################################
457             sub response_body {
458 0     0 0   my ($self,$data,@callback) = @_;
459              
460             # forward to IMP analyzer
461 0 0         $self->{eof}[1] = 1 if $data eq '';
462 0           _imp_data($self,1,$data,0,IMP_DATA_HTTPRQ_CONTENT,
463             \&_response_body_imp,\@callback);
464             }
465              
466             sub _response_body_imp {
467 0     0     my ($self,$data,$changed,$args) = @_;
468 0           my ($callback,@cb_args) = @$args;
469 0           my $eof = _check_eof($self,1);
470 0 0 0       $callback->(@cb_args,$data,$eof) if $data ne '' || $eof;
471             }
472              
473              
474             sub _check_eof {
475 0     0     my ($self,$dir) = @_;
476             $DEBUG && $self->{request}->xdebug(
477             "check eof[%d] - eof=%d - %s - (pre)pass=%d/%d",
478             $dir,$self->{eof}[$dir], _show_buf($self,$dir),
479             $self->{prepass}[$dir],
480 0 0         $self->{pass}[$dir]
481             );
482             return $self->{eof}[$dir] # received eof
483             && ! defined $self->{ibuf}[$dir][0][2] # no more data in buf
484             && ( # (pre)pass til end ok
485             $self->{prepass}[$dir] == IMP_MAXOFFSET
486 0   0       || $self->{pass}[$dir] == IMP_MAXOFFSET
487             );
488             }
489              
490             sub _show_buf {
491 0     0     my ($self,$dir) = @_;
492             return join('|',
493 0   0       map { ($_->[2]||'none')."($_->[0],+".length($_->[1]).")" }
494 0           @{ $self->{ibuf}[$dir] }
  0            
495             );
496             }
497              
498              
499              
500             ############################################################################
501             # Websockets, TLS upgrades etc
502             # if not IMP the forwarding will be done inside this function, otherwise it
503             # will be done in _in_data_imp, which gets called by IMP callback
504             ############################################################################
505             sub data {
506 0     0 0   my ($self,$dir,$data,@callback) = @_;
507              
508             # forward to IMP analyzer
509 0 0         $self->{eof}[$dir] = 1 if $data eq '';
510 0           _imp_data($self,$dir,$data,0,IMP_DATA_HTTPRQ_CONTENT,
511             \&_data_imp,[$dir,@callback]);
512             }
513              
514             sub _data_imp {
515 0     0     my ($self,$data,$changed,$args) = @_;
516 0           my ($dir,$callback,@cb_args) = @$args;
517             my $eof = $self->{eof}[$dir] && # got eof from server
518 0   0       ! defined $self->{ibuf}[$dir][0][2]; # no more data in ibuf[server]
519 0 0 0       $callback->(@cb_args,$dir,$data,$eof) if $data ne '' || $eof;
520             }
521              
522              
523              
524             ############################################################################
525             # callback from IMP
526             # process return types and trigger type specific callbacks on (pre)pass/replace
527             ############################################################################
528             sub _imp_callback {
529 0     0     my $self = shift;
530              
531 0           my %fwd; # forwarded data, per dir
532 0           for my $rv (@_) {
533              
534             # if the request got closed in between just return
535 0 0         my $request = $self->{request} or return;
536              
537 0           my $rtype = shift(@$rv);
538              
539             # deny further data
540 0 0         if ( $rtype == IMP_DENY ) {
541 0           my ($impdir,$msg) = @$rv;
542 0 0         $DEBUG && $request->xdebug("got deny($impdir) $msg");
543 0   0       return $request->deny($msg // 'closed by imp');
544             }
545              
546             # log some data
547 0 0         if ( $rtype == IMP_LOG ) {
548 0           my ($impdir,$offset,$len,$level,$msg) = @$rv;
549 0 0         $DEBUG && $request->xdebug("got log($impdir,$level) $msg");
550 0 0         if ( my $sub = $self->{logsub} ) {
551 0           $sub->($level,$msg,$impdir,$offset,$len)
552             }
553 0           next;
554             }
555              
556             # set accounting field
557 0 0         if ( $rtype == IMP_ACCTFIELD ) {
558 0           my ($key,$value) = @$rv;
559 0 0         $DEBUG && $request->xdebug("got acct $key => $value");
560 0           $request->{acct}{$key} = $value;
561 0           next;
562             }
563              
564             # (pre)pass data up to offset
565 0 0         if ( $rtype ~~ [ IMP_PASS, IMP_PREPASS ]) {
566 0           my ($dir,$offset) = @$rv;
567 0 0         $DEBUG && $request->xdebug("got $rtype($dir) off=$offset "._show_buf($self,$dir));
568              
569 0 0         if ( $rtype == IMP_PASS ) {
570             # ignore pass if it's not better than a previous pass
571 0 0 0       if ( $self->{pass}[$dir] == IMP_MAXOFFSET ) {
    0          
572             # there is no better thing than IMP_MAXOFFSET
573 0           next;
574             } elsif ( $offset == IMP_MAXOFFSET
575             or $offset > $self->{ibuf}[$dir][0][0] ) {
576             # we can pass new data
577 0           $self->{pass}[$dir] = $offset;
578             } else {
579             # offset is no better than previous pass
580 0           next;
581             }
582              
583             } else { # IMP_PREPASS
584             # ignore prepass if it's not better than a previous pass
585             # and a previous prepaself->{ibuf}[1][0]
586 0 0 0       if ( $self->{pass}[$dir] == IMP_MAXOFFSET
    0 0        
587             or $self->{prepass}[$dir] == IMP_MAXOFFSET ) {
588             # there is no better thing than IMP_MAXOFFSET
589 0 0         $DEBUG && debug("new off $offset no better than existing (pre)pass=max");
590 0           next;
591             } elsif ( $offset == IMP_MAXOFFSET
592             or $offset > $self->{ibuf}[$dir][0][0] ) {
593             # we can prepass new data
594 0           $self->{prepass}[$dir] = $offset;
595 0 0         $DEBUG && debug("update prepass with new off $offset");
596             } else {
597             # offset is no better than previous pass
598 0 0         $DEBUG && debug(
599             "new off $offset no better than existing $self->{ibuf}[$dir][0][0]");
600 0           next;
601             }
602             }
603              
604             # collect data up to offset for forwarding
605             # list of [ changed,data,callback,cbarg ]
606 0   0       my $fwd = $fwd{$dir} ||= [];
607              
608 0           my $ibuf = $self->{ibuf}[$dir];
609 0           my $ib0; # top of ibuf, e.g. ibuf[0]
610              
611 0           while ( @$ibuf ) {
612 0           $ib0 = shift(@$ibuf);
613 0 0         defined $ib0->[2] or last; # dummy entry with no type
614              
615 0 0         if ( $offset == IMP_MAXOFFSET ) {
616             # forward this buf and maybe more
617 0           push @$fwd, [ 0, @{$ib0}[1,3,4] ];
  0            
618             } else {
619 0           my $pass = $offset - $ib0->[0];
620 0           my $len0 = length($ib0->[1]);
621 0 0         if ( $pass > $len0 ) {
    0          
    0          
622             # forward this buf and maybe more
623 0           push @$fwd, [ 0, @{$ib0}[1,3,4] ];
  0            
624             } elsif ( $pass == $len0 ) {
625             # forward this buf, but not more
626 0           push @$fwd, [ 0, @{$ib0}[1,3,4] ];
  0            
627              
628             # add empty buf if this was the last, this will also
629             # trigger resetting pass,prepass below
630 0 0         if ( @$ibuf ) { # still data in buffer
    0          
631             } elsif ( $ib0->[2] < 0 ) {
632             # no eof yet and no further data in ibuf
633             # we might get a replacement at the end of the
634             # buffer so put emptied buffer back
635 0           $ib0->[1] = '';
636 0           push @$ibuf, $ib0;
637             } else {
638 0           push @$ibuf, [ $offset,'' ];
639             }
640 0           last;
641             } elsif ( $ib0->[2] < 0 ) {
642             # streaming type:
643             # forward part of buf
644 0           push @$fwd, [
645             0, # not changed
646             substr($ib0->[1],0,$pass,''), # data
647             $ib0->[3], # callback
648             $ib0->[4], # args
649             ];
650             # keep rest in ibuf
651 0           unshift @$ibuf,$ib0;
652 0           $ib0->[0] += $pass;
653 0           last; # nothing more to forward
654             } else {
655             # packet type: they need to be processed in total
656 0           return $request->fatal("partial $rtype for $ib0->[2]");
657             }
658             }
659             }
660              
661 0 0         if ( @$ibuf ) {
662             # there are still data in ibuf which cannot get passed,
663             # so reset pass, prepass
664 0           $self->{pass}[$dir] = $self->{prepass}[$dir] = 0;
665             } else {
666             # add empty buffer containing only current offset based on
667             # what we last removed from ibuf
668 0           push @$ibuf, [ $ib0->[0] + length($ib0->[1]),'' ];
669             }
670              
671 0           next;
672             }
673              
674              
675             # replace data up to offset
676 0 0         if ( $rtype == IMP_REPLACE ) {
677 0           my ($dir,$offset,$newdata) = @$rv;
678 0 0         $DEBUG && $request->xdebug("got replace($dir) off=$offset data.len=".
679             length($newdata));
680 0           my $ibuf = $self->{ibuf}[$dir];
681 0 0         @$ibuf or die "no ibuf";
682              
683             # if there is an active pass|prepass (e.g. pointing to future data)
684             # the data cannot be replaced
685             return $request->fatal(
686             "cannot replace data which are said to be passed")
687 0 0 0       if $self->{pass}[$dir] or $self->{prepass}[$dir];
688              
689             # we cannot replace future data
690 0 0         return $request->fatal('IMP', "cannot use replace with maxoffset")
691             if $offset == IMP_MAXOFFSET;
692              
693             # data to replace cannot span different types, so they must be in
694             # the first ibuf
695 0           my $ib0 = $ibuf->[0];
696 0           my $rlen = $offset - $ib0->[0];
697 0           my $len0 = length($ib0->[1]);
698              
699             # some sanity checks
700 0 0         if ( $rlen < 0 ) {
    0          
    0          
701 0           return $request->fatal("cannot replace already passed data");
702             } elsif ( $rlen > $len0 ) {
703 0 0 0       return $request->fatal(
704             "replacement cannot span multiple data types")
705             if @$ibuf>1 or $ib0->[2]>0;
706 0           return $request->fatal("cannot replace future data ($rlen>$len0)");
707             } elsif ( $rlen < $len0 ) {
708             # replace part of buffer
709 0 0         return $request->fatal("cannot replace part of packet type")
710             if $ib0->[2]>0;
711              
712             # keep rest and update position
713 0 0         substr( $ib0->[1],0,$rlen,'' ) if $rlen;
714 0           $ib0->[0] += $rlen;
715             } else {
716             # remove complete buffer
717 0 0         if ( @$ibuf>1 ) { # still data in buffer
    0          
718             } elsif ( $ib0->[2] < 0 ) {
719             # no eof yet and no further data in ibuf
720             # we might get a replacement at the end of the
721             # buffer so put emptied buffer back
722 0           $ib0->[0] += $len0;
723 0           $ib0->[1] = '';
724             } else {
725             # replace with empty
726 0           @$ibuf = [ $offset,'' ];
727             }
728             }
729              
730 0           push @{$fwd{$dir}}, [
  0            
731             1, # changed
732             $newdata, # new data
733             $ib0->[3], # callback
734             $ib0->[4], # cbargs
735             ];
736              
737 0           next;
738             }
739 0 0         if ( $rtype ~~ [ IMP_PAUSE, IMP_CONTINUE ] ) {
740 0           my $dir = shift(@$rv);
741 0           my $relay = $self->{request}{conn}{relay};
742 0 0 0       if ( $relay and my $fo = $relay->fd($dir)) {
743 0 0         $fo->mask( r => ($rtype == IMP_PAUSE ? 0:1));
744             }
745 0           next;
746             }
747              
748 0 0         if ( $rtype == IMP_FATAL ) {
749 0           $request->fatal(shift(@$rv));
750 0           next;
751             }
752              
753 0           return $request->fatal("unsupported IMP return type: $rtype");
754             }
755              
756 0 0         %fwd or return; # no passes/replacements...
757              
758 0           while ( my ($dir,$fwd) = each %fwd ) {
759 0           while ( my $fw = shift(@$fwd)) {
760             #warn Dumper($fw); use Data::Dumper;
761 0           my ($changed,$data,$callback,$args) = @$fw;
762 0           $callback->($self,$data,$changed,$args);
763             }
764             }
765             }
766              
767             ############################################################################
768             # send data to IMP analyzer
769             # if we had a previous (pre)pass some data can be forwarded immediatly, for
770             # others we have to wait for the analyzer callback
771             # returns how many bytes of data are waiting for callback, e.g. 0 if we
772             # we can pass everything immediately
773             ############################################################################
774             sub _imp_data {
775 0     0     my ($self,$dir,$data,$offset,$type,$callback,$args) = @_;
776 0           my $ibuf = $self->{ibuf}[$dir];
777 0           my $eobuf = $ibuf->[-1][0] + length($ibuf->[-1][1]);
778              
779 0           my $encoded_data;
780 0 0         if ( my $decode = $self->{decode}{$type+0}[$dir] ) {
781             # set up decoder if not set up yet
782 0 0         if ( ! ref($decode)) {
783             # create function to decode content
784             $self->{decode}{$type+0}[$dir] = $decode = _create_decoder($decode)
785             || return $self->{request}->fatal(
786 0   0       "cannot decode content-encoding $decode");
787             }
788              
789             # offsets relates to original stream, but we put the decoded stream
790             # into ibuf. And offset>0 means, that we have a gap in the input,
791             # which is not allowed, when decoding a stream.
792 0 0         die "cannot use content decoder with gap in data" if $offset;
793              
794 0 0         $encoded_data = $data if $self->{pass_encoded}[$dir];
795             defined( $data = $decode->($data) )
796 0 0         or return $self->{request}->fatal("decoding content failed");
797             }
798              
799 0 0         if ( $offset ) {
800 0 0         die "offset($offset)
801 0 0         $offset = 0 if $offset == $eobuf;
802             }
803              
804 0           my $fwd; # what gets send to analyzer
805              
806 0           my $dlen = length($data);
807 0           my $pass = $self->{pass}[$dir];
808 0 0         if ( $pass ) {
809             # if pass is set there should be no data in ibuf, e.g. everything
810             # before should have been passed
811 0 0         ! $ibuf->[0][2] or die "unexpected data in ibuf";
812              
813 0 0         if ( $pass == IMP_MAXOFFSET ) {
814             # pass thru w/o analyzing
815 0           $ibuf->[0][0] += $dlen;
816 0 0         $DEBUG && $self->{request}->xdebug("can pass($dir) infinite");
817 0   0       return $callback->($self,$encoded_data // $data,0,$args);
818             }
819              
820 0   0       my $canpass = $pass - ( $offset||$eobuf );
821 0 0         if ( $canpass <= 0 ) {
    0          
    0          
822             # cannot pass anything, pass should have been reset already
823 0           die "pass($dir,$pass) must be point into future ($canpass)";
824             } elsif ( $canpass >= $dlen) {
825             # can pass everything
826 0           $ibuf->[0][0] += $dlen;
827 0 0         if ( $data eq '' ) {
828             # forward eof to analyzer
829 0           $fwd = $data;
830 0 0         $DEBUG && $self->{request}->xdebug("pass($dir) eof");
831 0           goto SEND2IMP;
832             }
833             $DEBUG && $self->{request}->xdebug(
834 0 0         "can pass($dir) all: pass($canpass)>=data.len($dlen)");
835 0   0       return $callback->($self,$encoded_data // $data,0,$args);
836             } elsif ( $type < 0 ) {
837             # can pass part of data, only for streaming types
838             # remove from data what can be passed
839 0 0         die "body might change" if $self->{pass_encoded}[$dir];
840 0           $ibuf->[0][0] += $canpass;
841 0           my $passed_data = substr($data,0,$canpass,'');
842 0           $eobuf += $canpass;
843 0           $dlen = length($data);
844             $DEBUG && $self->{request}->xdebug(
845 0 0         "can pass($dir) part: pass($canpass)
846 0           $callback->($self,$passed_data,0,$args); # callback but continue
847             }
848             }
849              
850 0           $fwd = $data; # this must be forwarded to analyzer
851              
852 0           my $prepass = $self->{prepass}[$dir];
853 0 0         if ( $prepass ) {
854             # if prepass is set there should be no data in ibuf, e.g. everything
855             # before should have been passed
856 0 0         ! $ibuf->[0][2] or die "unexpected data in ibuf";
857 0 0         if ( $prepass == IMP_MAXOFFSET ) {
858             # prepass everything
859 0           $ibuf->[0][0] += $dlen;
860 0 0         $DEBUG && $self->{request}->xdebug("can prepass($dir) infinite");
861 0   0       $callback->($self,$encoded_data // $data,0,$args); # callback but continue
862 0           goto SEND2IMP;
863             }
864              
865 0   0       my $canprepass = $prepass - ( $offset||$eobuf );
866 0 0         if ( $canprepass <= 0 ) {
    0          
    0          
867             # cannot prepass anything, prepass should have been reset already
868 0           die "prepass must be point into future";
869             } elsif ( $canprepass >= $dlen) {
870             # can prepass everything
871 0           $ibuf->[0][0] += $dlen;
872 0   0       $callback->($self,$encoded_data // $data,0,$args); # callback but continue
873             $DEBUG && $self->{request}->xdebug(
874 0 0         "can prepass($dir) all: pass($canprepass)>=data.len($dlen)");
875 0           goto SEND2IMP;
876             } elsif ( $type < 0 ) {
877             # can prepass part of data, only for streaming types
878             # remove from data what can be prepassed
879 0 0         die "body might change" if $self->{pass_encoded}[$dir];
880 0           $ibuf->[0][0] += $canprepass;
881 0           my $passed_data = substr($data,0,$canprepass,'');
882 0           $eobuf += $canprepass;
883 0           $dlen = length($data);
884             $DEBUG && $self->{request}->xdebug(
885 0 0         "can prepass($dir) part: prepass($canprepass)
886 0           $callback->($self,$passed_data,0,$args); # callback but continue
887             }
888             }
889              
890             # everything else in $data must be added to buffer
891            
892             # there can be no gaps inside ibuf because caller is only allowed to
893             # pass data which we explicitly allowed
894 0 0 0       if ( $offset && $offset > $eobuf ) {
895 0 0         defined $ibuf->[0][2] and # we have still data in ibuf!
896             die "there can be no gaps in ibuf";
897             }
898 0 0 0       if ( ! defined $ibuf->[-1][2] ) {
    0 0        
899             # replace buf, because it was empty
900 0   0       $ibuf->[-1] = [ $offset||$eobuf,$data,$type,$callback,$args ];
901             } elsif ( $type < 0
902             and $type == $ibuf->[-1][2]
903             and $callback == $ibuf->[-1][3]
904             ) {
905             # streaming data, concatinate to existing buf of same type
906 0           $ibuf->[-1][1] .= $data;
907             } else {
908             # different type or non-streaming data, add new buf
909 0   0       push @$ibuf,[ $offset||$eobuf,$data,$type,$callback,$args ];
910             }
911 0 0         $DEBUG && $self->{request}->xdebug( "ibuf.length=%d",
912             $ibuf->[-1][0] + length($ibuf->[-1][1]) - $ibuf->[0][0]);
913              
914             SEND2IMP:
915 0 0         $DEBUG && $self->{request}->xdebug("forward(%d) %d bytes type=%s off=%d to analyzer",
916             $dir,length($fwd),$type,$offset);
917 0           $self->{imp}->data($dir,$fwd,$offset,$type);
918 0           return length($fwd);
919             }
920              
921             #####################################################################
922             # parse header fields
923             # taken from Net::Inspect::L7::HTTP (where it got put in by myself)
924             #####################################################################
925             my $token = qr{[^()<>@,;:\\"/\[\]?={}\x00-\x20\x7f-\xff]+};
926             my $token_value_cont = qr{
927             ($token): # key:
928             [\040\t]*([^\r\n]*?)[\040\t]* # value
929             ((?:\r?\n[\040\t][^\r\n]*)*) # continuation lines
930             \r?\n # (CR)LF
931             }x;
932             sub _parse_hdrfields {
933 0     0     my ($hdr,$fields) = @_;
934 0           my $bad = '';
935             parse:
936 0           while ( $hdr =~m{\G$token_value_cont}gc ) {
937 0 0         if ($3 eq '') {
938             # no continuation line
939 0           push @{$fields->{ lc($1) }},$2;
  0            
940             } else {
941             # with continuation line
942 0           my ($k,$v) = ($1,$2.$3);
943             # value-part -> ' ' + value-part
944 0           $v =~s{[\r\n]+[ \t](.*?)[ \t]*}{ $1}g;
945 0           push @{$fields->{ lc($k) }},$v;
  0            
946             }
947             }
948 0 0 0       if (pos($hdr)//0 != length($hdr)) {
949             # bad line inside
950 0           substr($hdr,0,pos($hdr),'');
951 0 0         $bad .= $1 if $hdr =~s{\A([^\n]*)\n}{};
952 0           goto parse;
953             }
954 0           return $bad;
955             }
956              
957             #####################################################################
958             # create decoder function for gzip|deflate content-encoding
959             #####################################################################
960             sub _create_decoder {
961 0     0     my $typ = shift;
962 0 0         $typ ~~ [ 'gzip','deflate' ] or return; # not supported
963              
964 0           my $gzip_csum;
965 0           my $buf = '';
966 0           my $inflate;
967              
968             return sub {
969 0     0     my $data = shift;
970 0           $buf .= $data;
971              
972 0 0         goto inflate if defined $inflate;
973              
974             # read gzip|deflate header
975 0           my $wb;
976 0 0         my $more = $data eq '' ? undef:''; # need more data if possible
977 0 0         if ( $typ eq 'gzip' ) {
978 0           my $hdr_len = 10; # minimum gzip header
979              
980 0 0         return $more if length($buf) < $hdr_len;
981 0           my ($magic,$method,$flags) = unpack('vCC',$buf);
982 0 0 0       if ( $magic != 0x8b1f or $method != Z_DEFLATED or $flags & 0xe0 ) {
      0        
983 0 0         $DEBUG && debug("no valid gzip header. assuming plain text");
984 0           $inflate = ''; # defined but false
985 0           goto inflate;
986             }
987 0 0         if ( $flags & 4 ) {
988             # skip extra section
989 0 0         return $more if length($buf) < ($hdr_len+=2);
990 0           $hdr_len += unpack('x10v',$buf);
991 0 0         return $more if length($buf) < $hdr_len;
992             }
993 0 0         if ( $flags & 8 ) {
994             # skip filename
995 0           my $o = index($buf,"\0",$hdr_len);
996 0 0         return $more if $o == -1; # end of filename not found
997 0           $hdr_len = $o+1;
998             }
999 0 0         if ( $flags & 16 ) {
1000             # skip comment
1001 0           my $o = index($buf,"\0",$hdr_len);
1002 0 0         return $more if $o == -1; # end of comment not found
1003 0           $hdr_len = $o+1;
1004             }
1005 0 0         if ( $flags & 2 ) {
1006             # skip CRC
1007 0 0         return $more if length($buf) < ($hdr_len+=2);
1008             }
1009              
1010             # remove header
1011 0           substr($buf,0,$hdr_len,'');
1012 0           $gzip_csum = 8; # 8 byte Adler CRC at end
1013 0           $wb = -MAX_WBITS(); # see Compress::Raw::Zlib
1014              
1015             } else {
1016             # deflate
1017             # according to RFC it should be zlib, but due to the encoding name
1018             # often real deflate is used instead
1019             # check magic bytes to decide
1020              
1021             # lets see if it looks like a zlib header
1022             # check for CM=8, CMID<=7 in first byte and valid FCHECK in
1023             # seconds byte
1024 0 0         return $more if length($buf)<2;
1025 0           my $magic = unpack('C',substr($buf,0,1));
1026 0 0 0       if (
      0        
1027             ( $magic & 0b1111 ) == 8 # CM = 8
1028             and $magic >> 4 <= 7 # CMID <= 7
1029             and unpack('n',substr($buf,0,2)) % 31 == 0 # valid FCHECK
1030             ) {
1031             # looks like zlib header
1032 0           $wb = +MAX_WBITS(); # see Compress::Raw::Zlib
1033             } else {
1034             # assume deflate
1035 0           $wb = -MAX_WBITS(); # see Compress::Raw::Zlib
1036             }
1037             }
1038              
1039 0 0         $inflate = Compress::Raw::Zlib::Inflate->new(
1040             -WindowBits => $wb,
1041             -AppendOutput => 1,
1042             -ConsumeInput => 1
1043             ) or die "cannot create inflation stream";
1044              
1045 0 0         inflate:
1046              
1047             return '' if $buf eq '';
1048              
1049 0 0         if ( ! $inflate ) {
1050             # wrong gzip header: sometimes servers claim to use gzip
1051             # if confronted with "Accept-Encoding: identity" but in reality
1052             # they send plain text
1053             # so consider it plain text and don't decode
1054 0           my $out = $buf;
1055 0           $buf = '';
1056 0           return $out
1057             }
1058              
1059 0           my $out = '';
1060 0           my $stat = $inflate->inflate(\$buf,\$out);
1061 0 0         if ( $stat == Z_STREAM_END ) {
    0          
1062 0 0 0       if ( $gzip_csum and length($buf) >= $gzip_csum ) {
1063             # TODO - check checksum - but what would it help?
1064 0           substr($buf,0,$gzip_csum,'');
1065 0           $gzip_csum = 0;
1066             }
1067             } elsif ( $stat != Z_OK ) {
1068 0 0         $DEBUG && debug("decode failed: $stat");
1069 0           return; # error
1070             }
1071 0           return $out
1072 0           };
1073             }
1074              
1075             1;