File Coverage

blib/lib/Sniffer/HTTP.pm
Criterion Covered Total %
statement 22 24 91.6
branch n/a
condition n/a
subroutine 8 8 100.0
pod n/a
total 30 32 93.7


line stmt bran cond sub pod time code
1             package Sniffer::HTTP;
2 4     4   155233 use strict;
  4         11  
  4         1361  
3 4     4   3379 use Sniffer::Connection::HTTP;
  4         16  
  4         37  
4 4     4   162 use base 'Class::Accessor';
  4         8  
  4         321  
5 4     4   25 use Data::Dumper;
  4         10  
  4         269  
6 4     4   5442 use NetPacket::Ethernet;
  4         5965  
  4         264  
7 4     4   3620 use NetPacket::IP;
  4         6350  
  4         178  
8 4     4   26 use NetPacket::TCP;
  4         7  
  4         310  
9 4     4   6205 use Net::Pcap; # just for the convenience function below
  0            
  0            
10             use Net::Pcap::FindDevice;
11             use Carp qw(croak);
12              
13             use vars qw($VERSION);
14              
15             $VERSION = '0.23';
16              
17             =head1 NAME
18              
19             Sniffer::HTTP - multi-connection sniffer driver
20              
21             =head1 SYNOPSIS
22              
23             use Sniffer::HTTP;
24             my $VERBOSE = 0;
25              
26             my $sniffer = Sniffer::HTTP->new(
27             callbacks => {
28             request => sub { my ($req,$conn) = @_; print $req->uri,"\n" if $req },
29             response => sub { my ($res,$req,$conn) = @_; print $res->code,"\n" },
30             log => sub { print $_[0] if $VERBOSE },
31             tcp_log => sub { print $_[0] if $VERBOSE > 1 },
32             },
33             timeout => 5*60, # seconds after which a connection is considered stale
34             stale_connection
35             => sub { my ($s,$conn,$key);
36             $s->log->("Connection $key is stale.");
37             $s->remove_connection($key);
38             },
39             );
40              
41             $sniffer->run(); # uses the "best" default device
42              
43             # Or, if you want to feed it the packets yourself:
44              
45             while (1) {
46              
47             # retrieve ethernet packet into $eth,
48             # for example via Net::Pcap
49             my $eth = sniff_ethernet_packet;
50              
51             # And handle the packet. Callbacks will be invoked as soon
52             # as complete data is available
53             $sniffer->handle_eth_packet($eth);
54             };
55              
56             This driver gives you callbacks with the completely accumulated
57             Ls or Ls as sniffed from the
58             TCP traffic. You need to feed it the Ethernet, IP or TCP packets
59             either from a dump file or from L by unpacking them via
60             L.
61              
62             As the whole response data is accumulated in memory you should
63             be aware of memory issues. If you want to write stuff
64             directly to disk, you will need to submit patches to L.
65              
66             A good example to start from is the C
67             script that comes with the distribution.
68              
69             =head1 METHODS
70              
71             =head2 C<< new %ARGS >>
72              
73             Creates a new object for handling many HTTP requests.
74             You can pass in the following arguments:
75              
76             =over 4
77              
78             =item *
79              
80             C - preexisting connections (optional)
81              
82             =item *
83              
84             C - callbacks for the new connections (hash reference)
85              
86             =item *
87              
88             C - timeout in seconds after which a connection is considered stale
89              
90             =item *
91              
92             C - callback for stale connections
93              
94             =item *
95              
96             C - maximum size of data to capture per packet. The default is 16384,
97             which should be plenty for all cases.
98              
99             =back
100              
101             Usually, you will want to create a new object like this:
102              
103             my $sniffer = Sniffer::HTTP->new( callbacks => {
104             request => sub { my ($req, $conn) = @_; print $req->uri,"\n"; },
105             response => sub { my ($res,$req,$conn) = @_; print $res->code,"\n"; },
106             });
107              
108             except that you will likely do more work than this example.
109              
110             =cut
111              
112             __PACKAGE__->mk_accessors(qw(connections callbacks timeout pcap_device stale_connection snaplen));
113              
114             sub new {
115             my ($class,%args) = @_;
116              
117             $args{connections} ||= {};
118             $args{callbacks} ||= {};
119             $args{callbacks}->{log} ||= sub {};
120             $args{stale_connection} ||= sub {
121             my ($s,$conn,$key) = @_;
122             $conn->log->("$key is stale.");
123             $s->remove_connection($key);
124             };
125             $args{ snaplen } ||= 16384;
126              
127             $args{timeout} = 300
128             unless exists $args{timeout};
129              
130             my $self = $class->SUPER::new(\%args);
131              
132             my $user_closed = delete $args{callbacks}->{closed};
133             $args{callbacks}->{closed} = sub {
134             my $key = $_[0]->flow;
135             if (! exists $args{connections}->{$key}) {
136             warn "Error: flow() ne connection-key!";
137             $key = join ":", reverse split /:/, $key;
138             };
139             $_[0]->{log}->("Removing $key");
140             $self->remove_connection($key);
141             goto &$user_closed
142             if $user_closed;
143             };
144              
145             $self;
146             };
147              
148             =head2 C<< $sniffer->remove_connection KEY >>
149              
150             Removes a connection (or a key) from the list
151             of connections. This will not have the intended
152             effect if the connection is still alive, as it
153             will be recreated as soon as the next packet
154             for it is received.
155              
156             =cut
157              
158             sub remove_connection {
159             my ($self,$key) = @_;
160             if (ref $key) {
161             my $real_key = $key->flow;
162             if (! exists $self->connections->{$real_key}) {
163             warn "Error: flow() ne connection-key!";
164             $real_key = join ":", reverse split /:/, $real_key;
165             };
166             $key = $real_key;
167             };
168             delete $self->connections->{$key};
169             };
170              
171             =head2 C<< $sniffer->find_or_create_connection TCP, %ARGS >>
172              
173             This parses a TCP packet and creates the TCP connection
174             to keep track of the packet order and resent packets.
175              
176             =cut
177              
178             sub find_or_create_connection {
179             my ($self,$tcp) = @_;
180              
181             my $connections = $self->connections;
182              
183             # Implement find_or_create() for connections in
184             # the base class ...
185             my $key = $tcp->{src_port} . ":" . $tcp->{dest_port};
186             if (! exists $connections->{$key}) {
187             my $key2 = $tcp->{dest_port} . ":" . $tcp->{src_port};
188             if (! exists $connections->{$key2}) {
189             $self->callbacks->{log}->("Creating connection $key, sequence start at " . $tcp->{seqnum});
190             my $c = $self->callbacks;
191             my $o = Sniffer::Connection::HTTP->new(
192             %$c,
193             tcp => $tcp,
194             );
195             $connections->{$key} = $o;
196             } else {
197             $key = $key2
198             };
199             };
200              
201             return $connections->{$key};
202             };
203              
204             =head2 C<< $sniffer->stale_connections( TIMEOUT, TIMESTAMP, HANDLER ) >>
205              
206             Will call the handler HANDLER for all connections that
207             have a C before TIMESTAMP - TIMEOUT.
208              
209             All parameters are optional and default to:
210              
211             TIMEOUT - $sniffer->timeout
212             TIMESTAMP - time()
213             HANDLER - $sniffer->stale_connection
214              
215             It returns all stale connections.
216              
217             =cut
218              
219             sub stale_connections {
220             my ($self,$timeout,$timestamp,$handler) = @_;
221             $timeout ||= $self->timeout;
222             $handler ||= $self->stale_connection;
223             $timestamp ||= time();
224              
225             my $cutoff = $timestamp - $timeout;
226              
227             my $connections = $self->connections;
228             my @stale = grep { $connections->{$_}->last_activity < $cutoff } keys %$connections;
229             for my $connection (@stale) {
230             $handler->($self, $connections->{$connection}, $connection);
231             };
232              
233             map {$connections->{$_}} @stale
234             };
235              
236             =head2 C<< $sniffer->live_connections TIMEOUT, TIMESTAMP >>
237              
238             Returns all live connections. No callback
239             mechanism is provided here.
240              
241             The defaults are
242             TIMEOUT - $sniffer->timeout
243             TIMESTAMP - time()
244              
245             =cut
246              
247             sub live_connections {
248             my ($self,$timeout,$timestamp) = @_;
249             $timeout ||= $self->timeout;
250             $timestamp ||= time();
251              
252             my $cutoff = $timestamp - $timeout;
253              
254             my $connections = $self->connections;
255             grep { $_->last_activity >= $cutoff } values %$connections;
256             };
257              
258             =head2 C<< $sniffer->handle_eth_packet ETH [, TIMESTAMP] >>
259              
260             Processes a raw ethernet packet. L will return
261             this kind of packet for most Ethernet network cards.
262              
263             You need to call this method (or one of the other protocol
264             methods) for every packet you wish to handle.
265              
266             The optional TIMESTAMP corresponds to the epoch time
267             the packet was captured at. It defaults to the value
268             of C.
269              
270             =cut
271              
272             sub handle_eth_packet {
273             my ($self,$eth,$ts) = @_;
274             $ts ||= time();
275             #warn Dumper( NetPacket::Ethernet->decode($eth) );
276             $self->handle_ip_packet(NetPacket::Ethernet->decode($eth)->{data}, $ts);
277             };
278              
279             =head2 C<< $sniffer->handle_ip_packet IP [, TIMESTAMP] >>
280              
281             Processes a raw ip packet.
282              
283             The optional TIMESTAMP corresponds to the epoch time
284             the packet was captured at. It defaults to the value
285             of C.
286              
287             =cut
288              
289             sub handle_ip_packet {
290             my ($self,$ip,$ts) = @_;
291             $ts ||= time();
292             #warn Dumper( NetPacket::IP->decode($ip) );
293             # This is a workaround around a bug in NetPacket::IP v0.04, which sets the
294             # payload to include the trailer
295             my $i = NetPacket::IP->decode($ip);
296              
297             # Safeguard against malformed IP headers
298             $i->{hlen} = 5
299             if $i->{hlen} < 5;
300             my $conn = $self->handle_tcp_packet(substr($i->{data}, 0, $i->{len}-($i->{hlen}*4)), $ts);
301             unless($conn->tcp_connection->dest_host) {
302             $conn->tcp_connection->dest_host($i->{dest_ip});
303             $conn->tcp_connection->src_host($i->{src_ip});
304             }
305             $conn;
306             };
307              
308             =head2 C<< $sniffer->handle_tcp_packet TCP [, TIMESTAMP] >>
309              
310             Processes a raw tcp packet. This processes the packet
311             by handing it off to the L which handles
312             the reordering of TCP packets.
313              
314             It returns the L object that
315             handled the packet.
316              
317             The optional TIMESTAMP corresponds to the epoch time
318             the packet was captured at. It defaults to the value
319             of C.
320              
321             =cut
322              
323             sub handle_tcp_packet {
324             my ($self,$tcp,$ts) = @_;
325             $ts ||= time();
326             if (! ref $tcp) {
327             $tcp = NetPacket::TCP->decode($tcp);
328             };
329             #warn $tcp->{src_port}.":".$tcp->{dest_port};;
330             my $conn = $self->find_or_create_connection($tcp);
331             $conn->handle_packet($tcp,$ts);
332             # Handle callbacks for detection of stale connections
333             $self->stale_connections();
334              
335             # Return the connection that the packet belongs to
336             $conn;
337             };
338              
339             =head2 C<< run DEVICE_NAME, PCAP_FILTER, %OPTIONS >>
340              
341             Listens on the given device for all TCP
342             traffic from and to port 80 and invokes the callbacks
343             as necessary. If you want finer control
344             over what C does, you need to set up
345             Net::Pcap yourself.
346              
347             The C parameter is used to determine
348             the device via C from L.
349              
350             The C<%OPTIONS> can be the following options:
351              
352             =over 4
353              
354             =item *
355              
356             C - filename to which the whole capture stream is
357             written, in L format.
358              
359             This is mostly
360             useful for remote debugging of a problematic
361             sequence of connections.
362              
363             =item *
364              
365             C - a preconfigured Net::Pcap device.
366              
367             This skips the detection of the device by name. If you have special
368             configuration options, configure the device to your needs in your
369             code and then pass it in.
370              
371             =item *
372              
373             C - the netmask to capture on.
374              
375             If you want to skip netmask detection, for example because your
376             capture device has no IP address, you can pass in the netmask
377             through this option.
378              
379             =item *
380              
381             C - size of the L capture buffer
382              
383             The size of this buffer can determine whether you lose packets
384             while processing. A large value led to lost packets in at least one case.
385             The default value is 16384.
386              
387             =item
388              
389             C - the read timeout in ms while waiting for packets. The default is
390             500 ms.
391              
392             =back
393              
394             =cut
395              
396             sub run {
397             my ($self,$device_name,$pcap_filter,%options) = @_;
398              
399             $options{ device } ||= find_device($device_name);
400            
401             # Set a name so the error messages look good
402             $device_name = ''
403             if exists $options{ device };
404             $device_name = ''
405             unless defined $device_name;
406            
407             $pcap_filter ||= "tcp port 80";
408             $options{ snaplen } ||= $self->snaplen;
409             $options{ timeout } ||= 500;
410              
411             my ($err);
412             if (! $options{ netmask }) {
413             # detect the netmask unless we have a user-specified netmask
414             my ($netmask, $address);
415             if (Net::Pcap::lookupnet($options{ device }, \$address, \$netmask, \$err)) {
416             die "Unable to look up device information for '$device_name': $err";
417             }
418             warn $err if $err;
419             $options{ netmask } = $netmask;
420             };
421              
422             # Create packet capture object on device
423             my $pcap = Net::Pcap::open_live($options{ device }, $options{ snaplen }, -1, $options{ timeout }, \$err);
424             unless (defined $pcap) {
425             die "Unable to create packet capture on device '$device_name': $err";
426             };
427              
428             $self->pcap_device($pcap);
429              
430             my $filter;
431             Net::Pcap::compile(
432             $pcap,
433             \$filter,
434             $pcap_filter,
435             0,
436             $options{ netmask },
437             ) && die 'Unable to compile packet capture filter';
438             Net::Pcap::setfilter($pcap,$filter);
439              
440             my $save;
441             if ($options{capture_file}) {
442             $save = Net::Pcap::dump_open($pcap,$options{capture_file});
443             if(! $save) {
444             warn "Could not save to $options{capture_file}";
445             };
446             #END {
447             # # Emergency cleanup
448             # if ($save) {
449             # Net::Pcap::dump_flush($save);
450             # Net::Pcap::dump_close($save);
451             # undef $save;
452             # }
453             #};
454             };
455              
456             Net::Pcap::loop($pcap, -1, sub {
457             if ($save) {
458             Net::Pcap::dump($save, @_[1,2]);
459             };
460             $self->handle_eth_packet($_[2], $_[1]->{tv_sec});
461             }, '')
462             || die 'Unable to perform packet capture';
463              
464             if ($save) {
465             Net::Pcap::dump_flush($save);
466             Net::Pcap::dump_close($save);
467             undef $save;
468             };
469             };
470              
471             =head2 C<< run_file FILENAME, PCAP_FILTER >>
472              
473             "Listens" to the packets dumped into
474             a file. This is convenient to use if you
475             have packet captures from a remote machine
476             or want to test new protocol sniffers.
477              
478             The file is presumed to contain an ethernet
479             stream of packets.
480              
481             =cut
482              
483             sub run_file {
484             my ($self, $filename, $pcap_filter) = @_;
485              
486             $pcap_filter ||= "tcp port 80";
487              
488             my $err;
489              
490             my $pcap = Net::Pcap::open_offline($filename, \$err);
491             unless (defined $pcap) {
492             croak "Unable to create packet capture from filename '$filename': $err";
493             };
494             $self->pcap_device($pcap);
495              
496             my $filter;
497             Net::Pcap::compile(
498             $pcap,
499             \$filter,
500             $pcap_filter,
501             0,
502             0,
503             ) && die 'Unable to compile packet capture filter';
504             Net::Pcap::setfilter($pcap,$filter);
505              
506             Net::Pcap::loop($pcap, -1, sub { $self->handle_eth_packet($_[2], $_[1]->{tv_sec}) }, '')
507             };
508              
509             1;
510              
511             =head1 CALLBACKS
512              
513             =head2 C<< request REQ, CONN >>
514              
515             The C callback is called with the parsed request and the connection
516             object. The request will be an instance of L and will
517             have an absolute URI if possible. Currently, the hostname for the absolute URI
518             is constructed from the C header.
519              
520             =head2 C<< response RES, REQ, CONN >>
521              
522             The C callback is called with the parsed response, the request
523             and the connection object.
524              
525             =head2 C<< log MESSAGE >>
526              
527             The C callback is called whenever the connection makes progress
528             and in other various situations.
529              
530             =head2 C<< tcp_log MESSAGE >>
531              
532             The C callback is passed on to the underlying C
533             object and can be used to monitor the TCP connection.
534              
535             =head2 C<< stale_connection SNIFFER, CONN >>
536              
537             Is called whenever a connection goes over the C limit
538             without any activity. The default handler weeds out stale
539             connections with the following code:
540              
541             sub {
542             my ($self,$conn,$key);
543             $self->log->("Connection $key is stale.");
544             delete $self->connections->{ $key }
545             }
546              
547             =head1 EXAMPLE PCAP FILTERS
548              
549             Here are some example Net::Pcap filters for common things:
550              
551             Capture all HTTP traffic between your machine and C:
552              
553             (dest www.example.com && (tcp port 80))
554             || (src www.example.com && (tcp port 80))
555              
556             Capture all HTTP traffic between your machine
557             and C or C:
558              
559             (dest www1.example.com && (tcp port 80))
560             ||(src www1.example.com && (tcp port 80))
561             ||(dest www2.example.com && (tcp port 80))
562             ||(src www2.example.com && (tcp port 80))
563              
564             Note that Net::Pcap resolves the IP addresses before using them, so you might
565             actually get more data than you asked for.
566              
567             =head1 BUGS
568              
569             =head2 Closing Connections Properly
570              
571             Currently, it is not well-detected when a connection is closed by the
572             starting side and no C packet is received from the remote side. This
573             can even happen is you close the browser window instead of waiting
574             for the connections to auto-close.
575              
576             I'm not sure how to fix this besides employing better guesswork
577             and "closing" connections as soon as the C packet gets sent.
578              
579              
580             =head2 Small Testsuite
581              
582             The whole module suite has almost no tests.
583              
584             If you experience problems, I supply me with a complete,
585             relevant packet dump as the included C creates. Even
586             better, supply me with (failing) tests.
587              
588             =head1 AUTHOR
589              
590             Max Maischein (corion@cpan.org)
591              
592             =head1 COPYRIGHT
593              
594             Copyright (C) 2005-2011 Max Maischein. All Rights Reserved.
595              
596             This code is free software; you can redistribute it and/or modify it
597             under the same terms as Perl itself.
598              
599             =head1 SEE ALSO
600              
601             L, L, L
602              
603             =cut