File Coverage

blib/lib/App/HTTP_Proxy_IMP/IMP.pm
Criterion Covered Total %
statement 7 9 77.7
branch n/a
condition n/a
subroutine 3 3 100.0
pod n/a
total 10 12 83.3


line stmt bran cond sub pod time code
1 1     1   5 use strict;
  1         2  
  1         30  
2 1     1   5 use warnings;
  1         2  
  1         38  
3              
4             package App::HTTP_Proxy_IMP::IMP;
5              
6 1     1   467 use Net::Inspect::Debug qw(:DEFAULT $DEBUG);
  0            
  0            
7             use Net::IMP::Debug var => \$DEBUG, sub => \&debug;
8             use Net::IMP qw(:DEFAULT :log);
9             use Net::IMP::HTTP;
10             use Scalar::Util 'weaken';
11             use Hash::Util 'lock_ref_keys';
12             use Compress::Raw::Zlib;
13             use Carp;
14              
15             my %METHODS_RFC2616 = map { ($_,1) } qw( GET HEAD POST PUT DELETE OPTIONS CONNECT TRACE );
16             my %METHODS_WITHOUT_RQBODY = map { ($_,1) } qw( GET HEAD DELETE CONNECT );
17             my %METHODS_WITH_RQBODY = map { ($_,1) } qw( POST PUT );
18             my %CODE_WITHOUT_RPBODY = map { ($_,1) } qw(204 205 304);
19             my %METHODS_WITHOUT_RPBODY = map { ($_,1) } qw(HEAD);
20              
21             # we want plugins to suppport the HTTP Request innterface
22             my $interface = [
23             IMP_DATA_HTTPRQ,
24             [
25             IMP_PASS,
26             IMP_PREPASS,
27             IMP_REPLACE,
28             IMP_TOSENDER,
29             IMP_DENY,
30             IMP_LOG,
31             IMP_ACCTFIELD,
32             IMP_PAUSE,
33             IMP_CONTINUE,
34             IMP_FATAL,
35             ]
36             ];
37              
38             sub can_modify {
39             return shift->{can_modify};
40             }
41              
42             # create a new factory object
43             sub new_factory {
44             my ($class,%args) = @_;
45             my @factory;
46             for my $module (@{ delete $args{mod} || [] }) {
47             if ( ref($module)) {
48             # assume it is already an IMP factory object
49             push @factory, $module;
50             next;
51             }
52              
53             # --filter mod=args
54             my ($mod,$args) = $module =~m{^([a-z][\w:]*)(?:=(.*))?$}i
55             or die "invalid module $module";
56             eval "require $mod" or die "cannot load $mod args=$args: $@";
57             my %args = $mod->str2cfg($args//'');
58             my $factory = $mod->new_factory(%args)
59             or croak("cannot create Net::IMP factory for $mod");
60             $factory =
61             $factory->get_interface( $interface ) &&
62             $factory->set_interface( $interface )
63             or croak("$mod does not implement the interface supported by us");
64             push @factory,$factory;
65             }
66              
67             @factory or return;
68             if (@factory>1) {
69             # for cascading filters we need Net::IMP::Cascade
70             require Net::IMP::Cascade;
71             my $cascade = Net::IMP::Cascade->new_factory( parts => [ @factory ])
72             or croak("cannot create Net::IMP::Cascade factory");
73             $cascade = $cascade->set_interface( $interface ) or
74             croak("cascade does not implement the interface supported by us");
75             @factory = $cascade;
76             }
77             my $factory = $factory[0];
78              
79             my $self = bless {
80             %args,
81             imp => $factory, # IMP factory object
82             can_modify => 0, # does interface support IMP_REPLACE, IMP_TOSENDER
83             }, $class;
84             lock_ref_keys($self);
85              
86             # update can_modify
87             CHKIF: for my $if ( $factory->get_interface ) {
88             my ($dt,$rt) = @$if;
89             for (@$rt) {
90             $_ ~~ [ IMP_REPLACE, IMP_TOSENDER ] or next;
91             $self->{can_modify} =1;
92             last CHKIF;
93             }
94             }
95            
96             return $self;
97             }
98              
99             # create a new analyzer based on the factory
100             sub new_analyzer {
101             my ($factory,$request,$meta) = @_;
102              
103             my %meta = %$meta;
104             # IMP uses different *addr than Net::Inspect, translate
105             # [s]ource -> [c]lient, [d]estination -> [s]erver
106             $meta{caddr} = delete $meta{saddr};
107             $meta{cport} = delete $meta{sport};
108             $meta{saddr} = delete $meta{daddr};
109             $meta{sport} = delete $meta{dport};
110              
111             my $analyzer = $factory->{imp}->new_analyzer( meta => \%meta );
112              
113             my $self = bless {
114             request => $request, # App::HTTP_Proxy_IMP::Request object
115             imp => $analyzer,
116             # incoming data, put into analyzer
117             # \@list of [ buf_base,buf,type,callback,$cb_arg ] per dir
118             ibuf => [
119             [ [0,''] ],
120             [ [0,''] ],
121             ],
122             pass => [0,0], # pass allowed up to given offset (per dir)
123             prepass => [0,0], # prepass allowed up to given offset (per dir)
124             fixup_header => [], # sub to fixup content-length in header once known
125             eof => [0,0], # got eof in dir ?
126             decode => undef, # decoder for content-encoding decode{type}[dir]
127             pass_encoded => undef, # pass body encoded (analyzer will not change body)
128             method => undef, # request method
129             logsub => $factory->{logsub}, # how to log IMP_OG
130             }, ref($factory);
131             lock_ref_keys($self);
132             weaken($self->{request});
133              
134             # set callback, this might trigger callback immediately if there are
135             # results pending
136             weaken( my $wself = $self );
137             $analyzer->set_callback( sub { _imp_callback($wself,@_) } );
138             return $self;
139             }
140              
141              
142             sub request_header {
143             my ($self,$hdr,$xhdr,@callback) = @_;
144             my $clen = $xhdr->{content_length};
145             if ( ! defined $clen and $xhdr->{method} ne 'CONNECT') {
146             # length not known -> chunking
147             die "FIXME: chunking request body not yet supported";
148             }
149              
150             # new body might change content-length info in request header
151             # need to defer sending header until body length is known
152             if ( ! $METHODS_WITHOUT_RQBODY{$xhdr->{method}} ) {
153             my $hlen = length($hdr);
154             $self->{fixup_header}[0] = sub {
155             my ($self,$hdr,%args) = @_;
156             my $size = $args{content};
157             goto fix_clen if defined $size;
158              
159             if ( my $pass = $self->{pass}[0] ) {
160             if ( $pass == IMP_MAXOFFSET or $pass >= $hlen + $clen ) {
161             # will not change body
162             goto fix_clen;
163             }
164             }
165             if ( my $prepass = $self->{prepass}[0] ) {
166             if ( $prepass == IMP_MAXOFFSET or $prepass >= $hlen + $clen ) {
167             # will not change body
168             goto fix_clen;
169             }
170             }
171             if ($self->{ibuf}[0][0][0] >= $hlen + $clen) { # ibuf[client].base
172             # everything passed thru ibuf
173             goto fix_clen;
174             }
175              
176             # need to defer header until all of the body is passed
177             # or replaced, then we know the size
178             return;
179              
180             fix_clen:
181              
182             if (!defined $size) {
183             # bytes in ibuf and outstanding bytes will not be changed, so:
184             # new_content_length =
185             # ( orig_clen + orig_hlen - received ) # not yet received
186             # + ( received - ibuf.base ) # still in ibuf
187             # + defered_body.length # ready to forward
188             # --->
189             # orig_clen + orig_hlen - ibuf.base + defered_body.length
190             $size = $clen + $hlen # orig_clen + orig_hlen
191             - $self->{ibuf}[0][0][0] # ibuf.base
192             + $args{defered}; # defered_body.length
193             }
194              
195             $DEBUG && $self->{request}->xdebug("fixup header with clen=%d",$size);
196             # replace or add content-length header
197             $$hdr =~s{^(Content-length:[ \t]*)(\d+)}{$1$size}mi
198             || $$hdr =~s{(\n)}{$1Content-length: $size\r\n};
199             return 1;
200             };
201             }
202              
203              
204             # send data to analyzer.
205             # will call back into request on processed data
206             _imp_data($self,0,$hdr,0,IMP_DATA_HTTPRQ_HEADER,
207             \&_request_header_imp,[ $xhdr,@callback ]);
208             }
209              
210              
211             ############################################################################
212             # callback from IMP after passing/replacing the HTTP request header
213             # will reparse the header if changed and continue in @callback from request
214             ############################################################################
215             sub _request_header_imp {
216             my ($self,$hdr,$changed,$args) = @_;
217             my ($xhdr,$callback,@cb_args) = @$args;
218              
219             if ( $changed ) {
220             # we need to parse the header again and update xhdr
221             my ($met,$url,$version,$fields) = $hdr =~m{ \A
222             (\S+)[\040\t]+
223             (\S+)[\040\t]+
224             HTTP/(1\.[01])[\040\t]*
225             \r?\n
226             (.*?\n)
227             \r?\n\Z
228             }isx;
229              
230             # internal URL are not accepted by the client itself, only from
231             # plugin. Set xhdr.internal_url if we see, that IMP plugin rewrote
232             # url to internal one and strip internal:// again so that original
233             # URL could be logged
234             my $internal = $met ne 'CONNECT'
235             && $xhdr->{url} !~m{^internal://}i
236             && $url =~s{^internal://}{}i;
237              
238             my %kv;
239             my $bad = _parse_hdrfields($fields,\%kv);
240             $xhdr = {
241             method => uc($met),
242             version => $version,
243             url => $url,
244             fields => \%kv,
245             $bad ? ( junk => $bad ) :(),
246             $internal ? ( internal_url => 1 ):(),
247             };
248             }
249              
250             # we don't know the content length yet, unless it can be determined by the
251             # request method. If we got a (pre)pass until the end of the request body
252             # fixup_header will know it and adjust the header
253             $xhdr->{content_length} =
254             $METHODS_WITHOUT_RQBODY{$xhdr->{method}} ? 0:undef;
255              
256             $self->{method} = $xhdr->{method};
257             return $callback->(@cb_args,$hdr,$xhdr);
258             }
259              
260             ############################################################################
261             # fix request header by setting correct content-length
262             # returns true if header could be fixed
263             ############################################################################
264             sub fixup_request_header {
265             my ($self,$hdr_ref,%args) = @_;
266             my $sub = $self->{fixup_header}[0] or return 1;
267             my $ok = $sub->($self,$hdr_ref,%args);
268             $self->{fixup_header}[0] = undef if $ok;
269             return $ok;
270             }
271              
272              
273             ############################################################################
274             # process request body data
275             # just feed to analyzer and call back into request once done
276             ############################################################################
277             sub request_body {
278             my ($self,$data,@callback) = @_;
279              
280             # feed data into IMP
281             $self->{eof}[0] = 1 if $data eq '';
282             _imp_data($self,0,$data,0,IMP_DATA_HTTPRQ_CONTENT,
283             \&_request_body_imp,\@callback );
284             }
285              
286             sub _request_body_imp {
287             my ($self,$data,$changed,$args) = @_;
288             my ($callback,@cb_args) = @$args;
289             my $eof = _check_eof($self,0);
290             $callback->(@cb_args,$data,$eof) if $data ne '' || $eof;
291             }
292              
293             ############################################################################
294             # process response header
295             ############################################################################
296             sub response_header {
297             my ($self,$hdr,$xhdr,@callback) = @_;
298              
299             # if content is encoded we need to decode it in order to analyze
300             # it. For now only set decode to the encoding method, this will
301             # be changed to a decoding function once we need it in the body
302             if ( my $ce = $xhdr->{fields}{'content-encoding'} ) {
303             # the right way would be to extract all encodings and then complain, if
304             # there is an encoding we don't support. Instead we just look for the
305             # encodings we support
306             my %ce = map { lc($_) => 1 } map { m{\b(?:x-)?(gzip|deflate)\b}ig } @$ce;
307             $self->{decode}{IMP_DATA_HTTPRQ_CONTENT+0}[1] = join(", ",keys %ce)
308             if %ce;
309             }
310              
311             # header length is needed in callback
312             $xhdr->{header_length} = length($hdr);
313             _imp_data($self,1,$hdr,0,IMP_DATA_HTTPRQ_HEADER,
314             \&_response_header_imp,[$xhdr,@callback] );
315             }
316              
317              
318             ############################################################################
319             # callback after passing/replacing the HTTP response header
320             # will reparse the header if changed and continue in the request via
321             # callback
322             ############################################################################
323             sub _response_header_imp {
324             my ($self,$hdr,$changed,$args) = @_;
325             my ($xhdr,$callback,@cb_args) = @$args;
326              
327             my $orig_clen = $xhdr->{content_length};
328             my $orig_hlen = $xhdr->{header_length};
329              
330             if ( $changed ) {
331             # we need to parse the header again and update xhdr
332             my ($version,$code,$reason,$fields) = $hdr =~m{ \A
333             HTTP/(1\.[01])[\040\t]+
334             (\d\d\d)
335             (?:[\040\t]+([^\r\n]*))?
336             \r?\n
337             (.*?\n)
338             \r?\n\Z
339             }isx;
340              
341             my %kv;
342             my $bad = _parse_hdrfields($fields,\%kv);
343             $xhdr = {
344             code => $code,
345             version => $version,
346             reason => $reason,
347             fields => \%kv,
348             $bad ? ( junk => $bad ) :(),
349             };
350             }
351              
352             # except for some codes or request methods we don't know the
353             # content-length of the body yet
354             # in these cases we try in this order
355             # - check if we got a (pre)pass for the whole body already
356             # - use chunked encoding if client speaks HTTP/1.1
357             # - don't specify content-length and close request with connection close
358              
359             # we don't change $hdr here because it will be rebuild from fields anyway
360             if ( $CODE_WITHOUT_RPBODY{$xhdr->{code}} or $xhdr->{code} < 200 ) {
361             $xhdr->{content_length} = 0;
362             # better remove them
363             delete @{ $xhdr->{fields} }{ qw/ content-length transfer-encoding / };
364             goto callback;
365             }
366              
367             if ( $METHODS_WITHOUT_RPBODY{ $self->{method} } ) {
368             $xhdr->{content_length} = 0;
369             # keep content-length etc, client might want to peek into it using HEAD
370             goto callback;
371             }
372            
373             # reset infos about content-length
374             $xhdr->{content_length} = $xhdr->{chunked} = undef;
375             delete @{ $xhdr->{fields} }{ qw/ content-length transfer-encoding / };
376              
377             # if we have read the whole body already or at least know, that we will
378             # not change anymore data, we could compute the new content-length
379             my $clen;
380             my $nochange;
381             while ( defined $orig_clen ) {
382             my $rpsize = $orig_hlen + $orig_clen;
383              
384             if ( my $pass = $self->{pass}[1] ) {
385             if ( $pass == IMP_MAXOFFSET or $pass >= $rpsize ) {
386             # will not look at and not change body
387             $nochange = 1;
388             goto compute_clen;
389             }
390             }
391             if ( my $prepass = $self->{prepass}[1] ) {
392             if ( $prepass == IMP_MAXOFFSET or $prepass >= $rpsize ) {
393             # will not change body
394             $nochange = 1;
395             goto compute_clen;
396             }
397             }
398             if ($self->{ibuf}[1][0][0] >= $rpsize) { # ibuf[server].base
399             # everything passed thru ibuf
400             goto compute_clen;
401             }
402              
403             # we still don't know final size
404             last;
405              
406             compute_clen:
407             # bytes in ibuf and outstanding bytes will not be changed, so:
408             # new_content_length =
409             # ( total_size - received ) # not yet received
410             # + ( received - ibuf.base ) # still in ibuf
411             # --->
412             # total_size - ibuf.base
413             $clen = $rpsize - $self->{ibuf}[1][0][0];
414              
415             last;
416             }
417              
418             if ( $self->{decode}{IMP_DATA_HTTPRQ_CONTENT+0}[1] ) {
419             if ( $nochange ) {
420             # we will pass encoded stuff, either no decoding needs to
421             # be done (pass) or we will decode only for the analyzer (prepass)
422             # which will only watch at the content, but not change it
423             $self->{pass_encoded}[1] = 1;
424              
425             my $pass = $self->{pass}[1];
426             if ( $pass and defined $orig_clen and (
427             $pass == IMP_MAXOFFSET or
428             $pass >= $orig_clen + $orig_hlen )) {
429             # no need to decode body
430             $self->{decode}{IMP_DATA_HTTPRQ_CONTENT+0}[1] = undef;
431             }
432             } else {
433             # content is encoded and inspection wants to see decoded stuff,
434             # which we then will forward too
435             # but decoding might change length
436             $clen = undef;
437             # the content will be delivered decoded
438             delete $xhdr->{fields}{'content-encoding'}
439             }
440             }
441             if ( defined $clen ) {
442             $xhdr->{fields}{'content-length'} = [ $clen ];
443             $xhdr->{content_length} = $clen;
444             }
445              
446             callback:
447             $callback->(@cb_args,$hdr,$xhdr);
448             }
449              
450              
451              
452             ############################################################################
453             # handle response body data
454             ############################################################################
455             sub response_body {
456             my ($self,$data,@callback) = @_;
457              
458             # forward to IMP analyzer
459             $self->{eof}[1] = 1 if $data eq '';
460             _imp_data($self,1,$data,0,IMP_DATA_HTTPRQ_CONTENT,
461             \&_response_body_imp,\@callback);
462             }
463              
464             sub _response_body_imp {
465             my ($self,$data,$changed,$args) = @_;
466             my ($callback,@cb_args) = @$args;
467             my $eof = _check_eof($self,1);
468             $callback->(@cb_args,$data,$eof) if $data ne '' || $eof;
469             }
470              
471              
472             sub _check_eof {
473             my ($self,$dir) = @_;
474             $DEBUG && $self->{request}->xdebug(
475             "check eof[%d] - eof=%d - %s - (pre)pass=%d/%d",
476             $dir,$self->{eof}[$dir], _show_buf($self,$dir),
477             $self->{prepass}[$dir],
478             $self->{pass}[$dir]
479             );
480             return $self->{eof}[$dir] # received eof
481             && ! defined $self->{ibuf}[$dir][0][2] # no more data in buf
482             && ( # (pre)pass til end ok
483             $self->{prepass}[$dir] == IMP_MAXOFFSET
484             || $self->{pass}[$dir] == IMP_MAXOFFSET
485             );
486             }
487              
488             sub _show_buf {
489             my ($self,$dir) = @_;
490             return join('|',
491             map { ($_->[2]||'none')."($_->[0],+".length($_->[1]).")" }
492             @{ $self->{ibuf}[$dir] }
493             );
494             }
495              
496              
497              
498             ############################################################################
499             # Websockets, TLS upgrades etc
500             # if not IMP the forwarding will be done inside this function, otherwise it
501             # will be done in _in_data_imp, which gets called by IMP callback
502             ############################################################################
503             sub data {
504             my ($self,$dir,$data,@callback) = @_;
505              
506             # forward to IMP analyzer
507             $self->{eof}[$dir] = 1 if $data eq '';
508             _imp_data($self,$dir,$data,0,IMP_DATA_HTTPRQ_CONTENT,
509             \&_data_imp,[$dir,@callback]);
510             }
511              
512             sub _data_imp {
513             my ($self,$data,$changed,$args) = @_;
514             my ($dir,$callback,@cb_args) = @$args;
515             my $eof = $self->{eof}[$dir] && # got eof from server
516             ! defined $self->{ibuf}[$dir][0][2]; # no more data in ibuf[server]
517             $callback->(@cb_args,$dir,$data,$eof) if $data ne '' || $eof;
518             }
519              
520              
521              
522             ############################################################################
523             # callback from IMP
524             # process return types and trigger type specific callbacks on (pre)pass/replace
525             ############################################################################
526             sub _imp_callback {
527             my $self = shift;
528              
529             my %fwd; # forwarded data, per dir
530             for my $rv (@_) {
531              
532             # if the request got closed in between just return
533             my $request = $self->{request} or return;
534              
535             my $rtype = shift(@$rv);
536              
537             # deny further data
538             if ( $rtype == IMP_DENY ) {
539             my ($impdir,$msg) = @$rv;
540             $DEBUG && $request->xdebug("got deny($impdir) $msg");
541             return $request->deny($msg // 'closed by imp');
542             }
543              
544             # log some data
545             if ( $rtype == IMP_LOG ) {
546             my ($impdir,$offset,$len,$level,$msg) = @$rv;
547             $DEBUG && $request->xdebug("got log($impdir,$level) $msg");
548             if ( my $sub = $self->{logsub} ) {
549             $sub->($level,$msg,$impdir,$offset,$len)
550             }
551             next;
552             }
553              
554             # set accounting field
555             if ( $rtype == IMP_ACCTFIELD ) {
556             my ($key,$value) = @$rv;
557             $DEBUG && $request->xdebug("got acct $key => $value");
558             $request->{acct}{$key} = $value;
559             next;
560             }
561              
562             # (pre)pass data up to offset
563             if ( $rtype ~~ [ IMP_PASS, IMP_PREPASS ]) {
564             my ($dir,$offset) = @$rv;
565             $DEBUG && $request->xdebug("got $rtype($dir) off=$offset "._show_buf($self,$dir));
566              
567             if ( $rtype == IMP_PASS ) {
568             # ignore pass if it's not better than a previous pass
569             if ( $self->{pass}[$dir] == IMP_MAXOFFSET ) {
570             # there is no better thing than IMP_MAXOFFSET
571             next;
572             } elsif ( $offset == IMP_MAXOFFSET
573             or $offset > $self->{ibuf}[$dir][0][0] ) {
574             # we can pass new data
575             $self->{pass}[$dir] = $offset;
576             } else {
577             # offset is no better than previous pass
578             next;
579             }
580              
581             } else { # IMP_PREPASS
582             # ignore prepass if it's not better than a previous pass
583             # and a previous prepaself->{ibuf}[1][0]
584             if ( $self->{pass}[$dir] == IMP_MAXOFFSET
585             or $self->{prepass}[$dir] == IMP_MAXOFFSET ) {
586             # there is no better thing than IMP_MAXOFFSET
587             $DEBUG && debug("new off $offset no better than existing (pre)pass=max");
588             next;
589             } elsif ( $offset == IMP_MAXOFFSET
590             or $offset > $self->{ibuf}[$dir][0][0] ) {
591             # we can prepass new data
592             $self->{prepass}[$dir] = $offset;
593             $DEBUG && debug("update prepass with new off $offset");
594             } else {
595             # offset is no better than previous pass
596             $DEBUG && debug(
597             "new off $offset no better than existing $self->{ibuf}[$dir][0][0]");
598             next;
599             }
600             }
601              
602             # collect data up to offset for forwarding
603             # list of [ changed,data,callback,cbarg ]
604             my $fwd = $fwd{$dir} ||= [];
605              
606             my $ibuf = $self->{ibuf}[$dir];
607             my $ib0; # top of ibuf, e.g. ibuf[0]
608              
609             while ( @$ibuf ) {
610             $ib0 = shift(@$ibuf);
611             defined $ib0->[2] or last; # dummy entry with no type
612              
613             if ( $offset == IMP_MAXOFFSET ) {
614             # forward this buf and maybe more
615             push @$fwd, [ 0, @{$ib0}[1,3,4] ];
616             } else {
617             my $pass = $offset - $ib0->[0];
618             my $len0 = length($ib0->[1]);
619             if ( $pass > $len0 ) {
620             # forward this buf and maybe more
621             push @$fwd, [ 0, @{$ib0}[1,3,4] ];
622             } elsif ( $pass == $len0 ) {
623             # forward this buf, but not more
624             push @$fwd, [ 0, @{$ib0}[1,3,4] ];
625              
626             # add empty buf if this was the last, this will also
627             # trigger resetting pass,prepass below
628             if ( @$ibuf ) { # still data in buffer
629             } elsif ( $ib0->[2] < 0 ) {
630             # no eof yet and no further data in ibuf
631             # we might get a replacement at the end of the
632             # buffer so put emptied buffer back
633             $ib0->[1] = '';
634             push @$ibuf, $ib0;
635             } else {
636             push @$ibuf, [ $offset,'' ];
637             }
638             last;
639             } elsif ( $ib0->[2] < 0 ) {
640             # streaming type:
641             # forward part of buf
642             push @$fwd, [
643             0, # not changed
644             substr($ib0->[1],0,$pass,''), # data
645             $ib0->[3], # callback
646             $ib0->[4], # args
647             ];
648             # keep rest in ibuf
649             unshift @$ibuf,$ib0;
650             $ib0->[0] += $pass;
651             last; # nothing more to forward
652             } else {
653             # packet type: they need to be processed in total
654             return $request->fatal("partial $rtype for $ib0->[2]");
655             }
656             }
657             }
658              
659             if ( @$ibuf ) {
660             # there are still data in ibuf which cannot get passed,
661             # so reset pass, prepass
662             $self->{pass}[$dir] = $self->{prepass}[$dir] = 0;
663             } else {
664             # add empty buffer containing only current offset based on
665             # what we last removed from ibuf
666             push @$ibuf, [ $ib0->[0] + length($ib0->[1]),'' ];
667             }
668              
669             next;
670             }
671              
672              
673             # replace data up to offset
674             if ( $rtype == IMP_REPLACE ) {
675             my ($dir,$offset,$newdata) = @$rv;
676             $DEBUG && $request->xdebug("got replace($dir) off=$offset data.len=".
677             length($newdata));
678             my $ibuf = $self->{ibuf}[$dir];
679             @$ibuf or die "no ibuf";
680              
681             # if there is an active pass|prepass (e.g. pointing to future data)
682             # the data cannot be replaced
683             return $request->fatal(
684             "cannot replace data which are said to be passed")
685             if $self->{pass}[$dir] or $self->{prepass}[$dir];
686              
687             # we cannot replace future data
688             return $request->fatal('IMP', "cannot use replace with maxoffset")
689             if $offset == IMP_MAXOFFSET;
690              
691             # data to replace cannot span different types, so they must be in
692             # the first ibuf
693             my $ib0 = $ibuf->[0];
694             my $rlen = $offset - $ib0->[0];
695             my $len0 = length($ib0->[1]);
696              
697             # some sanity checks
698             if ( $rlen < 0 ) {
699             return $request->fatal("cannot replace already passed data");
700             } elsif ( $rlen > $len0 ) {
701             return $request->fatal(
702             "replacement cannot span multiple data types")
703             if @$ibuf>1 or $ib0->[2]>0;
704             return $request->fatal("cannot replace future data ($rlen>$len0)");
705             } elsif ( $rlen < $len0 ) {
706             # replace part of buffer
707             return $request->fatal("cannot replace part of packet type")
708             if $ib0->[2]>0;
709              
710             # keep rest and update position
711             substr( $ib0->[1],0,$rlen,'' ) if $rlen;
712             $ib0->[0] += $rlen;
713             } else {
714             # remove complete buffer
715             if ( @$ibuf>1 ) { # still data in buffer
716             } elsif ( $ib0->[2] < 0 ) {
717             # no eof yet and no further data in ibuf
718             # we might get a replacement at the end of the
719             # buffer so put emptied buffer back
720             $ib0->[0] += $len0;
721             $ib0->[1] = '';
722             } else {
723             # replace with empty
724             @$ibuf = [ $offset,'' ];
725             }
726             }
727              
728             push @{$fwd{$dir}}, [
729             1, # changed
730             $newdata, # new data
731             $ib0->[3], # callback
732             $ib0->[4], # cbargs
733             ];
734              
735             next;
736             }
737             if ( $rtype ~~ [ IMP_PAUSE, IMP_CONTINUE ] ) {
738             my $dir = shift(@$rv);
739             my $relay = $self->{request}{conn}{relay};
740             if ( $relay and my $fo = $relay->fd($dir)) {
741             $fo->mask( r => ($rtype == IMP_PAUSE ? 0:1));
742             }
743             next;
744             }
745              
746             if ( $rtype == IMP_FATAL ) {
747             $request->fatal(shift(@$rv));
748             next;
749             }
750              
751             return $request->fatal("unsupported IMP return type: $rtype");
752             }
753              
754             %fwd or return; # no passes/replacements...
755              
756             while ( my ($dir,$fwd) = each %fwd ) {
757             while ( my $fw = shift(@$fwd)) {
758             #warn Dumper($fw); use Data::Dumper;
759             my ($changed,$data,$callback,$args) = @$fw;
760             $callback->($self,$data,$changed,$args);
761             }
762             }
763             }
764              
765             ############################################################################
766             # send data to IMP analyzer
767             # if we had a previous (pre)pass some data can be forwarded immediatly, for
768             # others we have to wait for the analyzer callback
769             # returns how many bytes of data are waiting for callback, e.g. 0 if we
770             # we can pass everything immediately
771             ############################################################################
772             sub _imp_data {
773             my ($self,$dir,$data,$offset,$type,$callback,$args) = @_;
774             my $ibuf = $self->{ibuf}[$dir];
775             my $eobuf = $ibuf->[-1][0] + length($ibuf->[-1][1]);
776              
777             my $encoded_data;
778             if ( my $decode = $self->{decode}{$type+0}[$dir] ) {
779             # set up decoder if not set up yet
780             if ( ! ref($decode)) {
781             # create function to decode content
782             $self->{decode}{$type+0}[$dir] = $decode = _create_decoder($decode)
783             || return $self->{request}->fatal(
784             "cannot decode content-encoding $decode");
785             }
786              
787             # offsets relates to original stream, but we put the decoded stream
788             # into ibuf. And offset>0 means, that we have a gap in the input,
789             # which is not allowed, when decoding a stream.
790             die "cannot use content decoder with gap in data" if $offset;
791              
792             $encoded_data = $data if $self->{pass_encoded}[$dir];
793             defined( $data = $decode->($data) )
794             or return $self->{request}->fatal("decoding content failed");
795             }
796              
797             if ( $offset ) {
798             die "offset($offset)
799             $offset = 0 if $offset == $eobuf;
800             }
801              
802             my $fwd; # what gets send to analyzer
803              
804             my $dlen = length($data);
805             my $pass = $self->{pass}[$dir];
806             if ( $pass ) {
807             # if pass is set there should be no data in ibuf, e.g. everything
808             # before should have been passed
809             ! $ibuf->[0][2] or die "unexpected data in ibuf";
810              
811             if ( $pass == IMP_MAXOFFSET ) {
812             # pass thru w/o analyzing
813             $ibuf->[0][0] += $dlen;
814             $DEBUG && $self->{request}->xdebug("can pass($dir) infinite");
815             return $callback->($self,$encoded_data // $data,0,$args);
816             }
817              
818             my $canpass = $pass - ( $offset||$eobuf );
819             if ( $canpass <= 0 ) {
820             # cannot pass anything, pass should have been reset already
821             die "pass($dir,$pass) must be point into future ($canpass)";
822             } elsif ( $canpass >= $dlen) {
823             # can pass everything
824             $ibuf->[0][0] += $dlen;
825             if ( $data eq '' ) {
826             # forward eof to analyzer
827             $fwd = $data;
828             $DEBUG && $self->{request}->xdebug("pass($dir) eof");
829             goto SEND2IMP;
830             }
831             $DEBUG && $self->{request}->xdebug(
832             "can pass($dir) all: pass($canpass)>=data.len($dlen)");
833             return $callback->($self,$encoded_data // $data,0,$args);
834             } elsif ( $type < 0 ) {
835             # can pass part of data, only for streaming types
836             # remove from data what can be passed
837             die "body might change" if $self->{pass_encoded}[$dir];
838             $ibuf->[0][0] += $canpass;
839             my $passed_data = substr($data,0,$canpass,'');
840             $eobuf += $canpass;
841             $dlen = length($data);
842             $DEBUG && $self->{request}->xdebug(
843             "can pass($dir) part: pass($canpass)
844             $callback->($self,$passed_data,0,$args); # callback but continue
845             }
846             }
847              
848             $fwd = $data; # this must be forwarded to analyzer
849              
850             my $prepass = $self->{prepass}[$dir];
851             if ( $prepass ) {
852             # if prepass is set there should be no data in ibuf, e.g. everything
853             # before should have been passed
854             ! $ibuf->[0][2] or die "unexpected data in ibuf";
855             if ( $prepass == IMP_MAXOFFSET ) {
856             # prepass everything
857             $ibuf->[0][0] += $dlen;
858             $DEBUG && $self->{request}->xdebug("can prepass($dir) infinite");
859             $callback->($self,$encoded_data // $data,0,$args); # callback but continue
860             goto SEND2IMP;
861             }
862              
863             my $canprepass = $prepass - ( $offset||$eobuf );
864             if ( $canprepass <= 0 ) {
865             # cannot prepass anything, prepass should have been reset already
866             die "prepass must be point into future";
867             } elsif ( $canprepass >= $dlen) {
868             # can prepass everything
869             $ibuf->[0][0] += $dlen;
870             $callback->($self,$encoded_data // $data,0,$args); # callback but continue
871             $DEBUG && $self->{request}->xdebug(
872             "can prepass($dir) all: pass($canprepass)>=data.len($dlen)");
873             goto SEND2IMP;
874             } elsif ( $type < 0 ) {
875             # can prepass part of data, only for streaming types
876             # remove from data what can be prepassed
877             die "body might change" if $self->{pass_encoded}[$dir];
878             $ibuf->[0][0] += $canprepass;
879             my $passed_data = substr($data,0,$canprepass,'');
880             $eobuf += $canprepass;
881             $dlen = length($data);
882             $DEBUG && $self->{request}->xdebug(
883             "can prepass($dir) part: prepass($canprepass)
884             $callback->($self,$passed_data,0,$args); # callback but continue
885             }
886             }
887              
888             # everything else in $data must be added to buffer
889            
890             # there can be no gaps inside ibuf because caller is only allowed to
891             # pass data which we explicitly allowed
892             if ( $offset && $offset > $eobuf ) {
893             defined $ibuf->[0][2] and # we have still data in ibuf!
894             die "there can be no gaps in ibuf";
895             }
896             if ( ! defined $ibuf->[-1][2] ) {
897             # replace buf, because it was empty
898             $ibuf->[-1] = [ $offset||$eobuf,$data,$type,$callback,$args ];
899             } elsif ( $type < 0
900             and $type == $ibuf->[-1][2]
901             and $callback == $ibuf->[-1][3]
902             ) {
903             # streaming data, concatinate to existing buf of same type
904             $ibuf->[-1][1] .= $data;
905             } else {
906             # different type or non-streaming data, add new buf
907             push @$ibuf,[ $offset||$eobuf,$data,$type,$callback,$args ];
908             }
909             $DEBUG && $self->{request}->xdebug( "ibuf.length=%d",
910             $ibuf->[-1][0] + length($ibuf->[-1][1]) - $ibuf->[0][0]);
911              
912             SEND2IMP:
913             $DEBUG && $self->{request}->xdebug("forward(%d) %d bytes type=%s off=%d to analyzer",
914             $dir,length($fwd),$type,$offset);
915             $self->{imp}->data($dir,$fwd,$offset,$type);
916             return length($fwd);
917             }
918              
919             #####################################################################
920             # parse header fields
921             # taken from Net::Inspect::L7::HTTP (where it got put in by myself)
922             #####################################################################
923             my $token = qr{[^()<>@,;:\\"/\[\]?={}\x00-\x20\x7f-\xff]+};
924             my $token_value_cont = qr{
925             ($token): # key:
926             [\040\t]*([^\r\n]*?)[\040\t]* # value
927             ((?:\r?\n[\040\t][^\r\n]*)*) # continuation lines
928             \r?\n # (CR)LF
929             }x;
930             sub _parse_hdrfields {
931             my ($hdr,$fields) = @_;
932             my $bad = '';
933             parse:
934             while ( $hdr =~m{\G$token_value_cont}gc ) {
935             if ($3 eq '') {
936             # no continuation line
937             push @{$fields->{ lc($1) }},$2;
938             } else {
939             # with continuation line
940             my ($k,$v) = ($1,$2.$3);
941             # value-part -> ' ' + value-part
942             $v =~s{[\r\n]+[ \t](.*?)[ \t]*}{ $1}g;
943             push @{$fields->{ lc($k) }},$v;
944             }
945             }
946             if (pos($hdr)//0 != length($hdr)) {
947             # bad line inside
948             substr($hdr,0,pos($hdr),'');
949             $bad .= $1 if $hdr =~s{\A([^\n]*)\n}{};
950             goto parse;
951             }
952             return $bad;
953             }
954              
955             #####################################################################
956             # create decoder function for gzip|deflate content-encoding
957             #####################################################################
958             sub _create_decoder {
959             my $typ = shift;
960             $typ ~~ [ 'gzip','deflate' ] or return; # not supported
961              
962             my $gzip_csum;
963             my $buf = '';
964             my $inflate;
965              
966             return sub {
967             my $data = shift;
968             $buf .= $data;
969              
970             goto inflate if defined $inflate;
971              
972             # read gzip|deflate header
973             my $wb;
974             my $more = $data eq '' ? undef:''; # need more data if possible
975             if ( $typ eq 'gzip' ) {
976             my $hdr_len = 10; # minimum gzip header
977              
978             return $more if length($buf) < $hdr_len;
979             my ($magic,$method,$flags) = unpack('vCC',$buf);
980             if ( $magic != 0x8b1f or $method != Z_DEFLATED or $flags & 0xe0 ) {
981             $DEBUG && debug("no valid gzip header. assuming plain text");
982             $inflate = ''; # defined but false
983             goto inflate;
984             }
985             if ( $flags & 4 ) {
986             # skip extra section
987             return $more if length($buf) < ($hdr_len+=2);
988             $hdr_len += unpack('x10v',$buf);
989             return $more if length($buf) < $hdr_len;
990             }
991             if ( $flags & 8 ) {
992             # skip filename
993             my $o = index($buf,"\0",$hdr_len);
994             return $more if $o == -1; # end of filename not found
995             $hdr_len = $o+1;
996             }
997             if ( $flags & 16 ) {
998             # skip comment
999             my $o = index($buf,"\0",$hdr_len);
1000             return $more if $o == -1; # end of comment not found
1001             $hdr_len = $o+1;
1002             }
1003             if ( $flags & 2 ) {
1004             # skip CRC
1005             return $more if length($buf) < ($hdr_len+=2);
1006             }
1007              
1008             # remove header
1009             substr($buf,0,$hdr_len,'');
1010             $gzip_csum = 8; # 8 byte Adler CRC at end
1011             $wb = -MAX_WBITS(); # see Compress::Raw::Zlib
1012              
1013             } else {
1014             # deflate
1015             # according to RFC it should be zlib, but due to the encoding name
1016             # often real deflate is used instead
1017             # check magic bytes to decide
1018              
1019             # lets see if it looks like a zlib header
1020             # check for CM=8, CMID<=7 in first byte and valid FCHECK in
1021             # seconds byte
1022             return $more if length($buf)<2;
1023             my $magic = unpack('C',substr($buf,0,1));
1024             if (
1025             ( $magic & 0b1111 ) == 8 # CM = 8
1026             and $magic >> 4 <= 7 # CMID <= 7
1027             and unpack('n',substr($buf,0,2)) % 31 == 0 # valid FCHECK
1028             ) {
1029             # looks like zlib header
1030             $wb = +MAX_WBITS(); # see Compress::Raw::Zlib
1031             } else {
1032             # assume deflate
1033             $wb = -MAX_WBITS(); # see Compress::Raw::Zlib
1034             }
1035             }
1036              
1037             $inflate = Compress::Raw::Zlib::Inflate->new(
1038             -WindowBits => $wb,
1039             -AppendOutput => 1,
1040             -ConsumeInput => 1
1041             ) or die "cannot create inflation stream";
1042              
1043             inflate:
1044              
1045             return '' if $buf eq '';
1046              
1047             if ( ! $inflate ) {
1048             # wrong gzip header: sometimes servers claim to use gzip
1049             # if confronted with "Accept-Encoding: identity" but in reality
1050             # they send plain text
1051             # so consider it plain text and don't decode
1052             my $out = $buf;
1053             $buf = '';
1054             return $out
1055             }
1056              
1057             my $out = '';
1058             my $stat = $inflate->inflate(\$buf,\$out);
1059             if ( $stat == Z_STREAM_END ) {
1060             if ( $gzip_csum and length($buf) >= $gzip_csum ) {
1061             # TODO - check checksum - but what would it help?
1062             substr($buf,0,$gzip_csum,'');
1063             $gzip_csum = 0;
1064             }
1065             } elsif ( $stat != Z_OK ) {
1066             $DEBUG && debug("decode failed: $stat");
1067             return; # error
1068             }
1069             return $out
1070             };
1071             }
1072              
1073             1;