File Coverage

blib/lib/HTTP/Sessioniser.pm
Criterion Covered Total %
statement 16 18 88.8
branch n/a
condition n/a
subroutine 6 6 100.0
pod n/a
total 22 24 91.6


line stmt bran cond sub pod time code
1             package HTTP::Sessioniser;
2              
3 1     1   24168 use 5.008008;
  1         4  
  1         35  
4 1     1   5 use strict;
  1         2  
  1         33  
5 1     1   4 use warnings;
  1         6  
  1         34  
6              
7             # We don't actually use this, but if it isn't installed
8             # then HTTP::Response will silently not ungzip data
9             # Using it here will error if it is installed
10 1     1   6604 use IO::Compress::Gzip;
  1         88360  
  1         62  
11              
12             # Be sure to use version 0.05 of HTTP::Parser with the two patches submitted
13             # to the public CPAN bug tracker.
14 1     1   961 use HTTP::Parser 0.05;
  1         44875  
  1         40  
15 1     1   516 use Net::LibNIDS 0.02;
  0            
  0            
16             use Carp;
17              
18             use AutoLoader qw(AUTOLOAD);
19              
20             our $VERSION = '0.05';
21              
22             =head1 NAME
23              
24             HTTP::Sessioniser - Rebuild HTTP sessions from pcap streams
25              
26             =head1 SYNOPSIS
27              
28             use HTTP::Sessioniser;
29              
30             # This will be called once per HTTP request/response pair
31             sub my_callback {
32             my ($request, $response, $info) = @_;
33              
34             # $request is HTTP::Request
35             # $response is HTTP::Response
36             }
37              
38             my $s = HTTP::Sessioniser->new();
39             $s->parse_file('/path/to/file.pcap', \&my_callback);
40              
41             =head1 DESCRIPTION
42              
43             This module extracts HTTP sessions from pcap files with the help of Net::LibNIDS.
44              
45             It will piece HTTP data back together and return a pair of HTTP::Request and
46             HTTP::Response which correspond to one HTTP 'session'.
47              
48             HTTP CONNECT sessions are dealt with specially: the first request/response pair will
49             be returned as normal, subsequent requests will be skipped (as they do not contain
50             HTTP requests or responses, only SSL data).
51              
52             =head2 EXPORT
53              
54             None by default.
55              
56             =head2 Methods
57              
58             =head3 new
59              
60             my $s = HTTP::Sessioniser->new();
61              
62             Creates a new object.
63              
64             =cut
65              
66             sub new {
67             my ($class, %args) = @_;
68              
69             my $self = bless({}, $class);
70              
71             # Store statistics about the number of files parsed, requests found, failed
72             # requests. Useful if the same instance of this is used to parse multiple files.
73             $self->{statistics} = {};
74             $self->{statistics}{open_connections} = 0;
75             $self->{statistics}{total_connections} = 0;
76              
77             @{$self->{portlist}} = qw(80 443 8080 3128);
78              
79             return $self;
80             }
81              
82             =head3 parse_file
83              
84             $s->parse_file('/path/to/file.pcap', \&callback);
85              
86             Parses a pcap file using libnids, rebuilding pairs of HTTP::Request and HTTP::Response. These will be passed to the callback function along with a hash of information about the current connection.
87              
88             =cut
89              
90             sub parse_file {
91             my ($self, $filename, $callback) = @_;
92              
93             # Reset the current connections table
94             $self->{connections} = {};
95              
96             # Set this so we have them inside the callback
97             $self->{current_filename} = $filename;
98             $self->{callback} = $callback;
99              
100             Net::LibNIDS::param::set_filename($filename);
101              
102             # Set a pcap filter, see the manpage for tcpdump for more information. The manpage for
103             # libnids explains why the 'or (..)' is required.
104             my $bpf_ports = join(" or ", map { "port $_" } @{$self->{portlist}});
105             Net::LibNIDS::param::set_pcap_filter($bpf_ports . ' or (ip[6:2] & 0x1fff != 0)');
106              
107             if (!Net::LibNIDS::init()) {
108             warn "Uh oh, libnids failed to initialise!\n";
109             warn "Check you have successfully built and installed the module first.\n";
110             return;
111             }
112              
113             # Set the callback function and run libnids
114             my $data_callback = sub { $self->process_data(@_); };
115             Net::LibNIDS::tcp_callback($data_callback);
116              
117             # libnids resets state for each new file, so reset counter
118             $self->{statistics}{open_connections} = 0;
119             Net::LibNIDS::run();
120              
121             # TODO: At the end, go through and return all requests that had no responses?
122             }
123              
124             sub clear_statistics {
125             my ($self) = @_;
126             undef $self->{statistics};
127             $self->{statistics} = {};
128             }
129              
130             =head3 ports
131              
132             $s->ports( [ 80, 443, 8080, 3128 ] );
133             my @p = $s->ports;
134              
135             Set or return and array of ports we expect to see HTTP transmission on. The default set of ports is 80, 443, 8080 and 3128.
136              
137             If you are looking for HTTP on other ports (e.g. proxying or application servers) then use this to set the filter appropriately.
138              
139             =cut
140              
141             sub ports {
142             my ($self, @ports) = @_;
143              
144             if (@ports) {
145             $self->{portlist} = qw();
146              
147             foreach my $p (@ports) {
148             push(@{$self->{portlist}}, $p) if $p =~ /\d+/ and ($p >= 0 or $p <= 65535);
149             }
150              
151             # TODO: If our portlist is empty here, should we reset it?
152             }
153             return @{$self->{portlist}};
154             }
155              
156             =head3 add_port
157              
158             $s->add_port(8000);
159              
160             Add one port to the filter list.
161              
162             =cut
163             sub add_port {
164             my ($self, $tcpport) = @_;
165             return 0 if $tcpport !~ /^\d+$/;
166             return 0 if $tcpport < 0 or $tcpport > 65535;
167              
168             push(@{$self->{portlist}}, $tcpport);
169             return $tcpport;
170             }
171              
172             # The libnids callback
173             sub process_data {
174             my ($self, $args) = @_;
175             my $key = $args->client_ip . ":" . $args->client_port . "-" . $args->server_ip . ":" . $args->server_port;
176              
177             # If we want to stop processing a certain connection, we need
178             # to skip any new events for it. Turning libnids collect_off
179             # doesn't work, it will send NIDS_JUST_EST for new packets.
180             if (defined $self->{connections}{$key}{ignored}) {
181             return;
182             }
183              
184             if($args->state != Net::LibNIDS::NIDS_JUST_EST() && !defined $self->{connections}{$key}{request_obj}) {
185             #print "ERROR: not just established and no object in $key\n";
186             print "status was: " . $args->state . "\n";
187             exit 1;
188             }
189              
190             # Collect from any new connections
191             if($args->state == Net::LibNIDS::NIDS_JUST_EST()) {
192             $self->{statistics}{total_connections}++;
193             $self->{statistics}{open_connections}++;
194              
195             $args->server->collect_on();
196             $args->client->collect_on();
197              
198             # Create a request and response object
199             $self->{connections}{$key}{request_obj} = HTTP::Parser->new(request => 1);
200             $self->{connections}{$key}{response_obj} = HTTP::Parser->new(response => 1);
201              
202             # print STDERR "New connection: $key (" . $statistics{open_connections} . " open)\n";
203              
204             } elsif ($args->state == Net::LibNIDS::NIDS_CLOSE()) {
205              
206             # If this flag has been set, there was a successful response with
207             # no content length header. We can assume that the connection close
208             # marks the end-of-data, so pass it back now
209             if ($self->{connections}{$key}->{no_content_length}) {
210             # print STDERR "DEBUG: No content length header, connection closed, assuming finished\n";
211             $self->do_callback($key, $args);
212             }
213             # print "CLOSED CONNECTION EVENT FOR $key IS CALLING cleanup\n";
214             $self->cleanup($key);
215             return;
216              
217             } elsif (
218             $args->state == Net::LibNIDS::NIDS_RESET() ||
219             $args->state == Net::LibNIDS::NIDS_TIMED_OUT() ||
220             $args->state == Net::LibNIDS::NIDS_EXITING()
221             ) {
222             #print "EXIT/RESET CONNECTION EVENT FOR $key IS CALLING cleanup " . $args->state . "\n";
223             $self->cleanup($key);
224             return;
225              
226             } elsif ($args->state == Net::LibNIDS::NIDS_DATA()) {
227              
228             # Data toward the server
229             if ($args->server->count_new) {
230             #print "DEBUG: Parsing data client->server\n";
231             my $data = substr($args->server->data, 0, $args->server->count_new);
232              
233             # We should not receive new data for a new request if one is already complete.
234             # But HTTP pipelining is allowed, which might get us here. So error.
235             # TODO: Check HTTP::Parser when we hit this, or possibly implement pipelining
236             if (defined $self->{connections}{$key}{request_complete}) {
237             #print "ERROR: Got more client->server data when we we expecting server->client in $key\n";
238             $self->stop_collecting($args, $key);
239             return;
240             }
241              
242             if (!defined $self->{connections}{$key}{request_time}) {
243             $self->{connections}{$key}{request_time} = $args->lastpacket_sec;
244             $self->{connections}{$key}{request_time_usec} = $args->lastpacket_usec;
245             }
246              
247             my $status;
248             eval {
249             $status = $self->{connections}{$key}{request_obj}->add($data);
250             };
251              
252             if ($@) {
253             chomp $@;
254             #print "ERROR: $key HTTP::Parser died for some reason ($@), data was:\n";
255             $self->stop_collecting($args, $key);
256             return;
257             }
258              
259             # Once we have enough data, mark the request as complete
260             if ($status == 0) {
261             if ($self->{connections}{$key}{request_obj}->object->method eq 'CONNECT') {
262             # Set a flag for the rest of this connection so it is not parsed
263             $self->stop_collecting($args, $key);
264             return;
265             }
266             $self->{connections}{$key}{request_complete} = 1;
267             # print "DEBUG: We have a complete request now, data was $data\n";
268             } else {
269             #print "added to $key request:\n$data\nwhich got status $status\n";
270             }
271              
272             return;
273             }
274              
275             # Data toward the client
276             if ($args->client->count_new) {
277             my $data = substr($args->client->data, 0, $args->client->count_new);
278              
279             # Data from the server->client before we expected it. Possibly HTTP pipelining, which
280             # isn't yet supported.
281             if (!defined $self->{connections}{$key}{request_complete}) {
282             $self->stop_collecting($args, $key);
283             return;
284             }
285              
286             # Set the time from the first packet of the response
287             if (!defined $self->{connections}{$key}{response_time}) {
288             $self->{connections}{$key}{response_time} = $args->lastpacket_sec;
289             $self->{connections}{$key}{response_time_usec} = $args->lastpacket_usec;
290             }
291              
292             # HTTP::Parser uses die(), so catch that here
293             my $status;
294             eval {
295             $status = $self->{connections}{$key}{response_obj}->add($data);
296             };
297              
298             if ($@) {
299             chomp $@;
300             # print "ERROR: HTTP::Parser died for some reason in $key: $@\n";
301             $self->stop_collecting($args, $key);
302             return;
303             }
304              
305             # Missing content-length header
306             if ($status == -3) {
307             # Set a flag to show that the response had no content-length header,
308             # then assume at the end of this connection that we need to process it
309             $self->{connections}{$key}{no_content_length} = 1;
310             }
311              
312             # No more data needed
313             if ($status == 0) {
314             $self->do_callback($key, $args);
315              
316             }
317            
318             return;
319             }
320             }
321             }
322              
323             # We have a complete HTTP transaction, return it to the user
324             sub do_callback {
325             my ($self, $key, $nids_obj) = @_;
326              
327             my $request = $self->{connections}{$key}{request_obj}->object;
328             my $response = $self->{connections}{$key}{response_obj}->object;
329              
330             # BUG: Shouldn't ever get this!
331             if (!defined $request || !defined $response) {
332             print "DEBUG: request or response is not defined in $key\n";
333             exit 1;
334             }
335              
336             my $info = {
337             'request_time' => $self->{connections}{$key}{request_time},
338             'request_time_usec' => $self->{connections}{$key}{request_time_usec},
339             'response_time' => $self->{connections}{$key}{response_time},
340             'response_time_usec' => $self->{connections}{$key}{response_time_usec},
341             'filename' => $self->{current_filename},
342             'client_ip' => $nids_obj->client_ip,
343             'server_ip' => $nids_obj->server_ip
344             };
345              
346             $self->{callback}($request, $response, $info);
347              
348             # Reset state variables so that we can handle multiple HTTP
349             # requests per TCP connection
350             undef $self->{connections}{$key}{request_obj};
351             undef $self->{connections}{$key}{response_obj};
352             $self->{connections}{$key}{request_obj} = HTTP::Parser->new(request => 1);
353             $self->{connections}{$key}{response_obj} = HTTP::Parser->new(response => 1);
354             undef $self->{connections}{$key}{request_complete};
355             undef $self->{connections}{$key}{request_time};
356             undef $self->{connections}{$key}{response_time};
357              
358             return;
359             }
360              
361             # Stop collecting on a connection for some reason
362             sub stop_collecting {
363             my ($self, $args, $key) = @_;
364              
365             # TODO: Can we save the stream out as a pcap if it fails processing?
366              
367             # We could set libnids collect_off here, but it will just generate
368             # NIDS_JUST_EST events for future packets. So we don't bother.
369             $self->cleanup($key);
370             $self->{connections}{$key}{ignored} = 1;
371              
372             return;
373             }
374              
375             # A connection has finished, cleanup any associated data
376             sub cleanup {
377             my ($self, $key) = @_;
378              
379             delete $self->{connections}{$key};
380             $self->{statistics}{open_connections}--;
381             }
382              
383             =head1 BUGS
384              
385             This module does not support HTTP pipelining. It could be added if I find data which requires it.
386              
387             =head1 SEE ALSO
388              
389             HTTP::Parser - used to parse data into HTTP::Request or HTTP::Response objects
390              
391             =head1 AUTHOR
392              
393             David Cannings Edavid@edeca.netE
394              
395             =head1 COPYRIGHT AND LICENSE
396              
397             Copyright (C) 2010 by David Cannings
398              
399             =cut
400              
401             1;
402             __END__