File Coverage

blib/lib/POE/Filter/HTTPD/Chunked.pm
Criterion Covered Total %
statement 273 288 94.7
branch 64 82 78.0
condition 25 39 64.1
subroutine 44 46 95.6
pod 7 7 100.0
total 413 462 89.3


line stmt bran cond sub pod time code
1             # POE::Filter::HTTPD::Chunked Copyright 2010 Mark Morgan .
2             # based on POE::Filter::HTTPD Copyright 1998 Artur Bergman .
3              
4             package POE::Filter::HTTPD::Chunked;
5              
6 2     2   2771 use strict;
  2         4  
  2         72  
7 2     2   12 use warnings;
  2         3  
  2         113  
8              
9             our $VERSION = 0.90;
10              
11 2     2   11 use Carp qw(croak);
  2         3  
  2         143  
12 2         380 use HTTP::Status qw(
13             status_message
14             RC_BAD_REQUEST
15             RC_OK
16             RC_LENGTH_REQUIRED
17 2     2   12 );
  2         4  
18              
19 2     2   11 use HTTP::Request ();
  2         4  
  2         72  
20 2     2   2271 use HTTP::Response ();
  2         4342  
  2         50  
21              
22 2     2   13 use base qw( POE::Filter );
  2         2  
  2         1676  
23              
24             # set the following to get info on what the filter is doing. This is
25             # *very* noisy.
26 2   50 2   1072 use constant DEBUG => $ENV{ POE_FILTER_HTTPD_DEBUG } || 0;
  2         5  
  2         243  
27              
28             # indices into our self
29 2     2   12 use constant BUFFER => 0;
  2         3  
  2         98  
30 2     2   10 use constant IS_CHUNKED => 1;
  2         3  
  2         78  
31 2     2   9 use constant CHUNK_BUFFER => 2;
  2         3  
  2         77  
32 2     2   9 use constant FINISH => 3;
  2         3  
  2         79  
33 2     2   12 use constant REQUEST => 4;
  2         3  
  2         80  
34 2     2   12 use constant STATE => 5;
  2         4  
  2         83  
35 2     2   9 use constant EVENT_ON_PARTIAL_CHUNK => 6;
  2         2  
  2         90  
36              
37             # states for our mini-state machine
38 2     2   9 use constant STATE_REQUEST_LINE => 'parsing request line';
  2         4  
  2         84  
39 2     2   8 use constant STATE_PARSING_HEADER => 'parsing header';
  2         4  
  2         80  
40 2     2   9 use constant STATE_PARSING_BODY => 'parsing body';
  2         4  
  2         89  
41 2     2   14 use constant STATE_PARSING_COMPLETE => 'parsing complete';
  2         4  
  2         72  
42 2     2   9 use constant STATE_PARSING_TRAILER => 'parsing trailer';
  2         39  
  2         348  
43              
44             # very lenient CRLF matcher, matches CRLF, LFCR, CR, LF
45             my $CRLF = qr/(?:\x0D\x0A?|\x0A\x0D?)/;
46              
47             # for matching an HTTP header. Currently ignores continuation header lines...
48             my $HEADER_REGEX = qr/^([^()<>@,;:\\"\/\[\]?={}\s]+):\s*(.*?)$CRLF/;
49              
50             # for matching chunked transfer-encoding definition, within header value
51             my $CHUNKED_REGEX = qr/(?:,\s*|^)chunked\s*$/;
52              
53             # mapping of supported request types, and whether they support body
54 2     2   12 use constant DENY_CONTENT_BODY => 1; # can't have
  2         4  
  2         102  
55 2     2   10 use constant ALLOW_CONTENT_BODY => 2;
  2         3  
  2         95  
56 2     2   9 use constant REQUIRE_CONTENT_BODY => 3;
  2         3  
  2         236  
57              
58 2         692 use constant REQUEST_TYPES => {
59             OPTIONS => ALLOW_CONTENT_BODY,
60             GET => DENY_CONTENT_BODY,
61             HEAD => DENY_CONTENT_BODY,
62             POST => ALLOW_CONTENT_BODY,
63             PUT => ALLOW_CONTENT_BODY,
64             DELETE => DENY_CONTENT_BODY,
65             TRACE => ALLOW_CONTENT_BODY,
66 2     2   12 };
  2         4  
67              
68             my $HTTP_0_9 = "HTTP/0.9"; # not real, just give us something to compare with
69             my $HTTP_1_0 = "HTTP/1.0";
70             my $HTTP_1_1 = "HTTP/1.1";
71              
72             #------------------------------------------------------------------------------
73              
74             sub new {
75 49     49 1 157409 my ( $class, %args ) = @_;
76              
77 49         203 my $self = bless [], $class;
78 49         143 $self->__reset_state( 1 );
79              
80 49 100       189 $self->[ EVENT_ON_PARTIAL_CHUNK ] = $args{ event_on_chunk } ? 1 : 0;
81              
82 49         152 return $self;
83             }
84              
85             #------------------------------------------------------------------------------
86              
87             sub get_one_start {
88 0     0 1 0 my ($self, $stream) = @_;
89              
90             # TODO TESTS FOR THIS!
91              
92 0 0       0 $stream = [ $stream ] unless ( ref( $stream ) );
93 0         0 $self->[BUFFER] .= join( '', @$stream );
94             }
95              
96             sub get_one {
97 0     0 1 0 my ($self) = @_;
98 0 0       0 return ( $self->[FINISH] ) ? [] : $self->get( [] );
99             }
100              
101             sub get {
102 124     124 1 34202 my ($self, $stream) = @_;
103              
104             # append current stream input into buffer
105 124         556 $self->[ BUFFER ] .= join '', @$stream;
106              
107 124         133 DEBUG && warn "buffer = " . $self->[ BUFFER ];
108              
109 124         139 my $initial_chunk_size = do {
110 2     2   11 use bytes;
  2         3  
  2         16  
111 124         234 length $self->[ CHUNK_BUFFER ];
112             };
113              
114             # a basic state machine, to parse the request. Flows are basically:
115             # - no request -> request -> parsed_header -> parsed_body -> done (send and reset)
116             # At any point, can error out of this, at which point we reset the state machine
117 124         159 while ( 1 ) {
118 263         294 my $to_return;
119              
120 263 100       901 if ( $self->[ STATE ] eq STATE_REQUEST_LINE ) {
    100          
    100          
    100          
    50          
121 76         182 $to_return = $self->_parse_request_line;
122             } elsif ( $self->[ STATE ] eq STATE_PARSING_HEADER ) {
123 81         192 $to_return = $self->_parse_header;
124             } elsif ( $self->[ STATE ] eq STATE_PARSING_BODY ) {
125 59         153 $to_return = $self->_parse_body;
126             } elsif ( $self->[ STATE ] eq STATE_PARSING_TRAILER ) {
127 7         25 $to_return = $self->_parse_trailer;
128             } elsif ( $self->[ STATE ] eq STATE_PARSING_COMPLETE ) {
129             # request is done, clean up what we have now, and return it.
130 40         87 $to_return = [ $self->[ REQUEST ] ];
131              
132 40         51 DEBUG && warn "completed request: " . $self->[ REQUEST ]->as_string;
133              
134 40         93 $self->__reset_state( 0 );
135              
136 40         89 $self->[ FINISH ] = 1;
137             } else {
138 0         0 die "Unexpected state '$self->[ STATE ]'!!!";
139             }
140              
141 263 100 100     674 if ( $to_return and not scalar @{ $to_return } ) {
  124         451  
142 72 100       158 if ( $self->[ EVENT_ON_PARTIAL_CHUNK ] ) {
143             # if we are still in body parsing state, and have more chunk data
144             # than when we first ran through this loop, then return a marker
145             # response, to indicate that we've received partial chunk data.
146             # This is intended to allow the wheel/component to reset any
147             # timeouts or the like.
148 2     2   707 use bytes;
  2         3  
  2         9  
149              
150 2         4 my $current_chunk_size = length $self->[ CHUNK_BUFFER ];
151              
152 2 50       7 if ( $current_chunk_size > $initial_chunk_size ) {
153 2         16 my $chunked = HTTP::Request::Chunked->new;
154              
155             # set headers to indicate the offset and size of
156             # the chunk, and the content to the current chunk
157              
158 2         102 my $chunk_size = $current_chunk_size - $initial_chunk_size;
159              
160 2         12 $chunked->header( 'x-chunk-offset' => $initial_chunk_size );
161 2         141 $chunked->header( 'x-chunk-size' => $chunk_size );
162              
163 2         155 $chunked->content( substr( $self->[ CHUNK_BUFFER ], $initial_chunk_size, $chunk_size ) );
164              
165 2         77 DEBUG && warn "got partial chunk of size $chunk_size at offset $initial_chunk_size\n";
166              
167 2         3 push @{ $to_return }, $chunked;
  2         6  
168             }
169             }
170             }
171              
172 263 100       722 $to_return && return $to_return;
173             }
174             }
175              
176             #------------------------------------------------------------------------------
177              
178             sub put {
179 2     2 1 23 my ($self, $responses) = @_;
180 2         6 my @raw;
181              
182             # HTTP::Response's as_string method returns the header lines
183             # terminated by "\n", which does not do the right thing if we want
184             # to send it to a client. Here I've stolen HTTP::Response's
185             # as_string's code and altered it to use network newlines so picky
186             # browsers like lynx get what they expect.
187              
188 2         8 foreach (@$responses) {
189 2         8 my $code = $_->code;
190 2   50     26 my $status_message = status_message($code) || "Unknown Error";
191 2   50     17 my $message = $_->message || "";
192 2   50     33 my $proto = $_->protocol || 'HTTP/1.0';
193              
194 2         29 my $status_line = "$proto $code";
195 2 50       14 $status_line .= " ($status_message)" if $status_message ne $message;
196 2 50       9 $status_line .= " $message" if length($message);
197              
198             # Use network newlines, and be sure not to mangle newlines in the
199             # response's content.
200              
201 2         4 my @headers;
202 2         5 push @headers, $status_line;
203 2         24 push @headers, $_->headers_as_string("\x0D\x0A");
204              
205 2         53 push @raw, join("\x0D\x0A", @headers, "") . $_->content;
206             }
207              
208             # Allow next request after we're done sending the response.
209 2         47 $self->[FINISH] = 0;
210              
211 2         7 \@raw;
212             }
213              
214             sub clone {
215             # implement our own clone, as we want to reset the state completely,
216             # and ensure that we honour the current 'event_on_chunk' option
217 1     1 1 8 my ( $self ) = @_;
218              
219 1         3 my $class = ref $self;
220              
221 1         5 return $class->new( event_on_chunk => $self->[ EVENT_ON_PARTIAL_CHUNK ] );
222             }
223             #------------------------------------------------------------------------------
224              
225             sub get_pending {
226 2     2 1 12 my $self = shift;
227 2         464 croak ref($self)." does not support the get_pending() method\n";
228 0         0 return;
229             }
230              
231             #------------------------------------------------------------------------------
232             # Functions specific to HTTPD;
233             #------------------------------------------------------------------------------
234              
235             # Build a basic response, given a status, a content type, and some
236             # content.
237              
238             sub _build_basic_response {
239 12     12   26 my ($self, $content, $content_type, $status) = @_;
240              
241             # Need to check lengths in octets, not characters.
242 2     2   1247 use bytes;
  2         4  
  2         7  
243              
244 12   50     31 $content_type ||= 'text/html';
245 12   50     26 $status ||= RC_OK;
246              
247 12         178 my $response = HTTP::Response->new($status);
248              
249 2     2   123 my $length = do { use bytes; length $content };
  2         3  
  2         7  
  12         561  
  12         20  
250              
251 12         72 $response->push_header( 'Content-Type', $content_type );
252 12         468 $response->push_header( 'Content-Length', $length );
253 12         378 $response->content($content);
254              
255 12         298 return $response;
256             }
257              
258             sub _default_values {
259             return {
260 526     526   3081 STATE() => STATE_REQUEST_LINE,
261             IS_CHUNKED() => 0,
262             BUFFER() => '',
263             FINISH() => 0,
264             REQUEST() => undef,
265             CHUNK_BUFFER() => '',
266             };
267             }
268              
269             sub __reset_state {
270 101     101   158 my ( $self, $clean_buffer ) = @_;
271              
272 101         412 my @fields = ( STATE, FINISH, REQUEST, IS_CHUNKED );
273              
274 101 100       251 if ( $clean_buffer ) {
275 61         144 push( @fields, BUFFER, CHUNK_BUFFER );
276             }
277              
278 101         202 foreach my $field ( @fields ) {
279 526         986 $self->[ $field ] = $self->_default_values->{ $field };
280             }
281             }
282              
283              
284             sub _build_error {
285 12     12   26 my($self, $status, $details) = @_;
286              
287             # when we want to return an error, this object is pretty much dead.
288             # Clear out the state, including the buffers.
289              
290 12         30 $self->__reset_state( 1 );
291              
292 12   50     106 $status ||= RC_BAD_REQUEST;
293 12   50     35 $details ||= '';
294 12   50     53 my $message = status_message($status) || "Unknown Error";
295              
296 12         160 return $self->_build_basic_response(
297             ( "" .
298             "" .
299             "Error $status: $message" .
300             "" .
301             "" .
302             "

Error $status: $message

" .
303             "

$details

" .
304             "" .
305             ""
306             ),
307             "text/html",
308             $status
309             );
310             }
311              
312             sub _parse_request_line {
313 76     76   102 my ( $self ) = @_;
314              
315 76         86 DEBUG && warn "attempting to parse request line\n";
316              
317             # return no packets, if we haven't seen the end of line
318 76 100       634 if ( not $self->[ BUFFER ] =~ /$CRLF/s ) {
319 23         46 return [];
320             }
321              
322             # get the request line, or return an error
323 53 100       893 if ( $self->[ BUFFER ] =~ s/^\s*(\w+)[ \t]+(\S+)(?:[ \t]+(HTTP\/\d+\.\d+))?$CRLF// ) {
    50          
324 47         175 my ( $method, $uri, $version ) = ( $1, $2, $3 );
325              
326 47   66     153 $version ||= $HTTP_0_9;
327              
328 47         58 DEBUG && warn "got request of method = $method, uri = $uri, version = $version\n";
329              
330 47         182 my $request = HTTP::Request->new;
331              
332 47         2160 $request->method( $method );
333 47         753 $request->uri( $uri );
334 47         3117 $request->protocol( $version );
335              
336 47         449 $self->[ REQUEST ] = $request;
337 47         129 $self->[ STATE ] = STATE_PARSING_HEADER;
338             } elsif ( $self->[ BUFFER ] =~ /^.*?$CRLF/s ) {
339 6         9 DEBUG && warn "error in parsing request line\n";
340              
341             return [
342 6         23 $self->_build_error(RC_BAD_REQUEST, "Request line parse failure."),
343             ];
344             } else {
345 0         0 return [];
346             }
347              
348 47         123 return;
349             }
350              
351             sub _parse_header {
352 81     81   113 my ( $self ) = @_;
353              
354 81         89 DEBUG && warn "attempting to parse headers\n";
355              
356 81         799 while ( $self->[ BUFFER ] =~ s/$HEADER_REGEX// ) {
357 33         755 my ( $header, $value ) = ( $1, $2 );
358              
359 33         43 DEBUG && warn "got header line '$header' => '$value'\n";
360              
361             # pulled a header off the buffer
362 33         163 $self->[ REQUEST ]->header( $header => $value );
363             }
364              
365             # blank line to end the headers
366 81 100       1886 if ( $self->[ BUFFER ] =~ s/^$CRLF// ) {
367 47         64 DEBUG && warn "end of headers\n";
368              
369             # set IS_CHUNKED, if a transfer-encoding headers exists, and has
370             # 'chunked' as the last (or only) value. Adjust the header value
371             # in this case.
372 47 100       325 if ( my $te_header = $self->[ REQUEST ]->header( 'transfer-encoding' ) ) {
373 10 50       551 if ( $te_header =~ s/$CHUNKED_REGEX// ) {
374 10         18 DEBUG && warn "request is chunked\n";
375              
376 10         54 $self->[ IS_CHUNKED ] = 1;
377              
378             # set or clear the header, as appropriate
379 10 100       28 if ( $te_header =~ /\S/ ) {
380 1         8 $self->[ REQUEST ]->header( 'transfer-encoding' => $te_header );
381             } else {
382 9         50 $self->[ REQUEST ]->remove_header( 'transfer-encoding' );
383             }
384 10         335 DEBUG && warn "new header = $te_header\n";
385             }
386             }
387            
388 47         1691 $self->[ STATE ] = STATE_PARSING_BODY;
389              
390             # have enough information now, to determine whether we have met the
391             # allowance/requirement for body, for the given request type
392              
393 47         144 my $request_type = $self->[ REQUEST ]->method;
394              
395 47         506 my $requirement = REQUEST_TYPES->{ uc $request_type };
396 47 100       116 if ( defined $requirement ) {
397             # rfc-defined method; check whether body allowed or required
398              
399 45 100       142 my $has_body = $self->[ REQUEST ]->header( 'content-length' ) ? 1 : 0;
400              
401 45         1696 my $error;
402 45 50 66     314 if ( $requirement == DENY_CONTENT_BODY and $has_body ) {
    50 33        
403 0         0 DEBUG && warn "for request type $request_type, can't have a body\n";
404              
405 0         0 $error = $self->_build_error(
406             RC_BAD_REQUEST,
407             'request type does not allow a body'
408             );
409             } elsif ( $requirement == REQUIRE_CONTENT_BODY and not $has_body ) {
410 0         0 DEBUG && warn "for request type $request_type, must have a body\n";
411              
412 0         0 $error = $self->_build_error(
413             RC_LENGTH_REQUIRED,
414             'request type requires body'
415             );
416             } else {
417             # noop, ALLOW_CONTENT_BODY can have or not
418             }
419              
420 45 50       219 if ( $error ) {
421 0         0 return [ $error ];
422             }
423             } else {
424             return [
425 2         8 $self->_build_error( RC_BAD_REQUEST, "invalid request type '$request_type'" )
426             ];
427             }
428             } else {
429             # still haven't got full data, return empty array ref to
430             # cause return
431 34         80 return []
432             }
433             }
434              
435             sub _parse_body {
436 59     59   79 my ( $self ) = @_;
437              
438 59         85 my $is_chunked = $self->[ IS_CHUNKED ];
439              
440 59         179 my $content_length = $self->[ REQUEST ]->content_length;
441              
442 59 100 66     2111 if ( defined $content_length and $content_length and $is_chunked ) {
    100 100        
      100        
443             # can't have both content_length and transfer_encoding of chunked set
444             return [
445 1         4 $self->_build_error(
446             RC_BAD_REQUEST,
447             'Both content-length and transfer-encoding cannot be set in headers'
448             )
449             ];
450             } elsif ( $is_chunked and $self->[ REQUEST ]->protocol eq 'HTTP/1.0' ) {
451             # chunked encoding isn't valid for HTTP/1.0
452             return [
453 1         18 $self->_build_error(
454             RC_BAD_REQUEST,
455             "Can't use chunked encoding with HTTP/1.0"
456             )
457             ];
458             }
459              
460 57         177 DEBUG && warn "attempting to parse body, chunked = $is_chunked\n";
461              
462 57 100       144 if ( $content_length ) {
    100          
463 20         24 DEBUG && warn "looking for $content_length bytes\n";
464              
465             # if we have enough bytes in the buffer, then pull them off and complete the request.
466 2     2   2960 use bytes;
  2         4  
  2         6  
467              
468 20 100       150 if ( length $self->[ BUFFER ] >= $content_length ) {
469 7         72 my $content = substr( $self->[ BUFFER ], 0, $content_length, '' );
470              
471 7         10 DEBUG && warn "got content of '$content'\n";
472              
473 7         28 $self->[ REQUEST ]->content( $content );
474              
475 7         126 $self->[ STATE ] = STATE_PARSING_COMPLETE;
476             } else {
477 13         38 return [];
478             }
479             } elsif ( $is_chunked ) {
480 10         13 DEBUG && warn "looking for chunk portion\n";
481              
482             # first line of each chunk will be the chunk size (in hex),
483             # followed by that number of bytes of chunk data. Keep
484             # dechunking, till we get a chunk size of 0.
485 2     2   310 use bytes;
  2         85  
  2         8  
486              
487 10         15 my $processed_data = 0;
488 10         133 while ( $self->[ BUFFER ] =~ /^((.*?)$CRLF)/s ) {
489 66         73 DEBUG && warn "found a complete line\n";
490              
491 66         83 $processed_data = 1;
492              
493 66         197 my ( $line, $chunk_hex ) = ( $1, $2 );
494              
495 66 100       509 if ( $chunk_hex =~ /^([0-9a-f]+)(;.*)?$/i ) {
496 65         154 $chunk_hex = $1; # ignore semicolon, and everything after
497             } else {
498             # got an invalid chunk size, return an error
499             return [
500 1         8 $self->_build_error(
501             RC_BAD_REQUEST,
502             "invalid chunk size '$chunk_hex'"
503             )
504             ];
505             }
506              
507 65         107 my ( $line_size, $chunk_size ) = ( length( $line ), hex( $chunk_hex ) );
508              
509 65 100       865 if ( $chunk_size == 0 ) {
    100          
510 7         10 DEBUG && warn "got a chunk size of 0, done with chunking\n";
511              
512 7 50       53 if ( $self->[ BUFFER ] =~ s/(?:.{$line_size})//s ) {
513             # signify that trailers still need handling
514 7         21 $self->[ STATE ] = STATE_PARSING_TRAILER;
515 7         24 return;
516             }
517             } elsif ( $self->[ BUFFER ] =~ s/^(?:.{$line_size})(.{$chunk_size})$CRLF//s ) {
518 56         145 my ( $chunk ) = ( $1 );
519 56         63 DEBUG && warn "got a complete chunk of length $chunk_size\n";
520              
521 56         384 $self->[ CHUNK_BUFFER ] .= $chunk;
522             } else {
523             # not enough data for a whole chunk, end current run through
524 2         8 return [];
525             }
526             }
527              
528 0 0       0 $processed_data || return [];
529             } else { # no content_length
530 27         28 DEBUG && warn "finished body parsing\n";
531              
532 27         59 $self->[ STATE ] = STATE_PARSING_COMPLETE;
533             }
534              
535 34         62 return;
536             }
537              
538             sub _parse_trailer {
539 7     7   16 my ( $self ) = @_;
540              
541 7         8 DEBUG && warn "looking for trailer\n";
542              
543 7         12 my $done = 0;
544              
545             # read up to a blank line, and pull off headers^Her, trailers.
546 7 100       110 if ( $self->[ BUFFER ] =~ s/^$CRLF// ) {
    50          
547             # no trailers, just mark that we're done
548 5         10 $done = 1;
549             } elsif ( $self->[ BUFFER ] =~ s/^(.*?($CRLF)\2)//s ) {
550 2         6 my $trailer = $1;
551              
552 2         20 while ( $trailer =~ s/$HEADER_REGEX//s ) {
553 3         79 my ( $header, $value ) = ( $1, $2 );
554              
555 3         6 DEBUG && warn "got trailer line '$header' => '$value'\n";
556              
557             # can't have trailers of 'content-length', 'transfer-encoding'
558             # or 'trailers'
559 3 100       24 if ( $header =~ /^(content-length|transfer-encoding|trailer)$/i ) {
560             return [
561 1         6 $self->_build_error(
562             RC_BAD_REQUEST,
563             "Trailer of '$header' not allowed"
564             )
565             ]
566             }
567              
568 2         7 $self->[ REQUEST ]->header( $header => $value );
569             }
570 1         62 $done = 1;
571             } else {
572 0         0 return [];
573             }
574              
575 6 50       21 if ( $done ) {
576             # rewrite the headers and advance our state
577 6         11 $self->[ STATE ] = STATE_PARSING_COMPLETE;
578              
579             # rewrite the headers, as appropriate
580 6         13 my $request = $self->[ REQUEST ];
581              
582 6         26 $request->content( $self->[ CHUNK_BUFFER ] );
583 6         141 $request->content_length( length $self->[ CHUNK_BUFFER ] );
584 6         267 $request->remove_header( 'trailer' );
585 6         146 $self->[ CHUNK_BUFFER ] = '';
586             }
587              
588 6         14 return;
589             }
590              
591             1;
592              
593             package HTTP::Request::Chunked;
594              
595 2     2   1625 use base qw( HTTP::Request );
  2         4  
  2         191  
596              
597             1;
598              
599             __END__