File Coverage

blib/lib/PAGI/Server/Protocol/HTTP1.pm
Criterion Covered Total %
statement 182 189 96.3
branch 76 84 90.4
condition 43 55 78.1
subroutine 16 17 94.1
pod 9 9 100.0
total 326 354 92.0


line stmt bran cond sub pod time code
1             package PAGI::Server::Protocol::HTTP1;
2 107     107   153026 use strict;
  107         178  
  107         5392  
3 107     107   930 use warnings;
  107         415  
  107         8856  
4              
5             our $VERSION = '0.002001';
6              
7 107     107   43361 use HTTP::Parser::XS qw(parse_http_request);
  107         108651  
  107         8738  
8 107     107   1259 use URI::Escape qw(uri_unescape);
  107         1837  
  107         4658  
9 107     107   925 use Encode qw(decode);
  107         13252  
  107         3728  
10 107     107   1249 use PAGI::Server ();
  107         156  
  107         286295  
11              
12              
13             # =============================================================================
14             # Header Validation (CRLF Injection Prevention)
15             # =============================================================================
16             # RFC 7230 Section 3.2.6: Field values MUST NOT contain CR or LF
17             # Additionally, null bytes are rejected as they can cause truncation attacks
18              
19             sub _validate_header_name {
20 480     480   745 my ($name) = @_;
21              
22 480 100       1296 if ($name =~ /[\r\n\0]/) {
23 2         14 die "Invalid header name: contains CR, LF, or null byte\n";
24             }
25             # RFC 7230: token = 1*tchar
26             # For simplicity, we just reject control characters and delimiters
27 478 50       1061 if ($name =~ /[[:cntrl:]]/) {
28 0         0 die "Invalid header name: contains control characters\n";
29             }
30 478         758 return $name;
31             }
32              
33             sub _validate_header_value {
34 478     478   736 my ($value) = @_;
35              
36 478 100       1047 if ($value =~ /[\r\n\0]/) {
37 14         74 die "Invalid header value: contains CR, LF, or null byte\n";
38             }
39 464         631 return $value;
40             }
41              
42             =encoding utf8
43              
44             =head1 NAME
45              
46             PAGI::Server::Protocol::HTTP1 - HTTP/1.1 protocol handler
47              
48             =head1 SYNOPSIS
49              
50             use PAGI::Server::Protocol::HTTP1;
51              
52             my $proto = PAGI::Server::Protocol::HTTP1->new;
53              
54             # Parse incoming request
55             my ($request, $consumed) = $proto->parse_request($buffer);
56              
57             # Serialize response
58             my $bytes = $proto->serialize_response_start(200, \@headers, $chunked);
59             $bytes .= $proto->serialize_response_body($chunk, $more);
60              
61             =head1 DESCRIPTION
62              
63             PAGI::Server::Protocol::HTTP1 isolates HTTP/1.1 wire-format parsing and
64             serialization from PAGI event handling. This allows clean separation of
65             protocol handling and future addition of HTTP/2 or HTTP/3 modules with
66             the same interface.
67              
68             =head1 METHODS
69              
70             =head2 new
71              
72             my $proto = PAGI::Server::Protocol::HTTP1->new(%options);
73              
74             Creates a new HTTP1 protocol handler. Accepts the following options, which
75             bound resource use while parsing untrusted input:
76              
77             =over 4
78              
79             =item * C - maximum size in bytes of the combined header
80             block. Default: 8192 (8KB). Exceeding it yields a 431 error result.
81              
82             =item * C - maximum size in bytes of the request line.
83             Default: 8192 (8KB). Exceeding it yields a 414 error result.
84              
85             =item * C - maximum number of header fields. Default: 100.
86             Exceeding it yields a 431 error result.
87              
88             =item * C - maximum size in bytes of a single chunk in a
89             chunked request body. Default: 10_485_760 (10MB).
90              
91             =back
92              
93             =head2 parse_request
94              
95             my ($request_info, $bytes_consumed) = $proto->parse_request($buffer);
96              
97             Parses an HTTP request from the buffer. The first return value is one of three
98             things:
99              
100             =over 4
101              
102             =item * C when the request is incomplete (more bytes needed). The
103             second value is 0.
104              
105             =item * an B when the request is malformed or exceeds a
106             limit:
107              
108             { error => 400, message => 'Bad Request' }
109              
110             The C key is the HTTP status code to send (400, 413, 414, 431, or 501);
111             C is a short reason. The second return value is the number of bytes
112             to discard.
113              
114             =item * a B on success:
115              
116             $request_info = {
117             method => 'GET',
118             path => '/foo',
119             raw_path => '/foo%20bar',
120             query_string => 'a=1',
121             http_version => '1.1',
122             headers => [ ['host', 'localhost'], ... ],
123             content_length => 0, # or undef if no Content-Length header
124             chunked => 0, # 1 if Transfer-Encoding: chunked
125             expect_continue => 0, # 1 if the request sent Expect: 100-continue
126             };
127              
128             =back
129              
130             =head2 serialize_response_start
131              
132             my $bytes = $proto->serialize_response_start($status, \@headers, $chunked, $http_version);
133              
134             Serializes the status line and headers. C<$chunked> (default 0) adds a
135             C header, but only when C<$http_version> (default
136             C<'1.1'>) is C<'1.1'> — chunked encoding is not emitted for HTTP/1.0
137             responses. A default C header is added when the application does not
138             provide one.
139              
140             =head2 serialize_response_body
141              
142             my $bytes = $proto->serialize_response_body($chunk, $more, $chunked);
143              
144             Serializes a body chunk. Uses chunked encoding if $chunked is true.
145              
146             =head2 serialize_chunk_end
147              
148             my $bytes = $proto->serialize_chunk_end;
149              
150             Returns the terminating zero-length chunk (C<"0\r\n\r\n">) that ends a
151             chunked response body.
152              
153             =head2 serialize_continue
154              
155             my $bytes = $proto->serialize_continue;
156              
157             Returns a C interim response, sent in reply to a
158             request that carried C.
159              
160             =head2 serialize_trailers
161              
162             my $bytes = $proto->serialize_trailers(\@headers);
163              
164             Serializes HTTP trailers.
165              
166             =head2 format_date
167              
168             my $date = $proto->format_date;
169              
170             Returns the current time formatted as an RFC 7231 IMF-fixdate string suitable
171             for a C header (e.g. C<"Sun, 06 Nov 1994 08:49:37 GMT">). The value is
172             cached and regenerated at most once per second.
173              
174             =cut
175              
176             # Cached Date header (regenerated at most once per second)
177             my $_cached_date;
178             my $_cached_date_time = 0;
179              
180             # Cached default Server header (lazy-init to ensure VERSION is loaded)
181             my $_server_header;
182              
183             # HTTP status code reason phrases
184             my %STATUS_PHRASES = (
185             100 => 'Continue',
186             101 => 'Switching Protocols',
187             200 => 'OK',
188             201 => 'Created',
189             204 => 'No Content',
190             301 => 'Moved Permanently',
191             302 => 'Found',
192             304 => 'Not Modified',
193             400 => 'Bad Request',
194             401 => 'Unauthorized',
195             403 => 'Forbidden',
196             404 => 'Not Found',
197             405 => 'Method Not Allowed',
198             413 => 'Payload Too Large',
199             414 => 'URI Too Long',
200             431 => 'Request Header Fields Too Large',
201             500 => 'Internal Server Error',
202             502 => 'Bad Gateway',
203             503 => 'Service Unavailable',
204             );
205              
206             sub new {
207 398     398 1 4524154 my ($class, %args) = @_;
208              
209             my $self = bless {
210             max_header_size => $args{max_header_size} // 8192,
211             max_request_line_size => $args{max_request_line_size} // 8192, # 8KB per RFC 7230
212             max_header_count => $args{max_header_count} // 100, # Max number of headers
213 398   100     7281 max_chunk_size => $args{max_chunk_size} // 10_485_760, # 10MB default
      100        
      100        
      100        
214             }, $class;
215 398         2390 return $self;
216             }
217              
218             sub parse_request {
219 234     234 1 9880 my ($self, $buffer_ref) = @_;
220              
221             # HTTP::Parser::XS expects a scalar, not a reference
222 234 100       671 my $buffer = ref $buffer_ref ? $$buffer_ref : $buffer_ref;
223              
224             # Check for complete headers (look for \r\n\r\n)
225 234         832 my $header_end = index($buffer, "\r\n\r\n");
226 234 100       567 return (undef, 0) if $header_end < 0;
227              
228             # Check request line length (first line before \r\n)
229 231         613 my $first_line_end = index($buffer, "\r\n");
230 231 100       1310 if ($first_line_end > $self->{max_request_line_size}) {
231 2         13 return ({ error => 414, message => 'URI Too Long' }, $header_end + 4);
232             }
233              
234             # Check max header size
235 229 100       718 if ($header_end > $self->{max_header_size}) {
236 2         12 return ({ error => 431, message => 'Request Header Fields Too Large' }, $header_end + 4);
237             }
238              
239             # Parse using HTTP::Parser::XS
240 227         444 my %env;
241 227         3859 my $ret = parse_http_request($buffer, \%env);
242              
243             # Return error for malformed request (-1)
244 227 100       711 if ($ret == -1) {
245             # Find end of malformed request line/headers
246 1         2 my $consumed = $header_end + 4;
247 1         6 return ({ error => 400, message => 'Bad Request' }, $consumed);
248             }
249              
250             # Return undef if incomplete (-2)
251 226 50       650 return (undef, 0) if $ret < 0;
252              
253             # Extract method and path
254 226         550 my $method = $env{REQUEST_METHOD};
255 226   50     753 my $raw_uri = $env{REQUEST_URI} // '/';
256              
257             # Split path and query string
258 226         1249 my ($raw_path, $query_string) = split(/\?/, $raw_uri, 2);
259 226   50     597 $raw_path //= '/';
260 226   100     1211 $query_string //= '';
261              
262             # Decode path (URL-decode, then UTF-8 decode with fallback)
263             # Mojolicious-style: try UTF-8 decode, fall back to original bytes if invalid.
264             # Fast path: a path with no percent-escapes and no high bytes is already its
265             # own decoded form (ASCII is its own UTF-8), so skip uri_unescape and the
266             # eval + Encode::decode entirely -- the common case.
267 226         466 my $path;
268 226 100       985 if ($raw_path !~ /[%\x80-\xff]/) {
269 223         463 $path = $raw_path;
270             }
271             else {
272 3         15 my $unescaped = uri_unescape($raw_path);
273 3   66     95 $path = eval { decode('UTF-8', $unescaped, Encode::FB_CROAK) } // $unescaped;
  3         34  
274             }
275              
276             # Build headers array with lowercase names
277 226         661 my @headers;
278             my $content_length;
279 226         945 my $chunked = 0;
280 226         333 my $te_bad_order = 0;
281 226         278 my $te_unsupported = 0;
282 226         585 my $expect_continue = 0;
283 226         388 my @cookie_values;
284              
285 226         913 for my $key (keys %env) {
286             # Optimized: use index() + substr() instead of regex (faster per NYTProf)
287 2044 100       3512 if (index($key, 'HTTP_') == 0) {
288 666         1267 my $header_name = lc(substr($key, 5));
289 666         1257 $header_name =~ tr/_/-/; # Optimized: tr/// is faster than s///g
290 666         1141 my $value = $env{$key};
291              
292             # Handle Cookie header normalization
293 666 100       1236 if ($header_name eq 'cookie') {
294 1         2 push @cookie_values, $value;
295 1         3 next;
296             }
297              
298             # RFC 9112 Section 6.1: chunked must be the final transfer coding
299 665 100       1179 if ($header_name eq 'transfer-encoding') {
300 6         46 my @codings = map { s/^\s+|\s+$//gr } split /,/, lc($value);
  8         45  
301 6 100 66     72 if (@codings && $codings[-1] eq 'chunked') {
    100          
302 4         9 $chunked = 1;
303 3         8 } elsif (grep { $_ eq 'chunked' } @codings) {
304             # chunked present but not final — protocol error
305 1         2 $te_bad_order = 1;
306             } else {
307 1         2 $te_unsupported = 1;
308             }
309             }
310              
311             # Check for Expect: 100-continue
312 665 100 66     1221 if ($header_name eq 'expect' && lc($value) eq '100-continue') {
313 1         2 $expect_continue = 1;
314             }
315              
316 665         1563 push @headers, [$header_name, $value];
317             }
318             }
319              
320             # Add normalized cookie header if present
321 226 100       614 if (@cookie_values) {
322 1         5 push @headers, ['cookie', join('; ', @cookie_values)];
323             }
324              
325             # Check header count limit (DoS protection)
326 226 100       697 if (@headers > $self->{max_header_count}) {
327 1         13 return ({ error => 431, message => 'Request Header Fields Too Large' }, $header_end + 4);
328             }
329              
330             # Add content-type and content-length from env
331 225 100       708 if (defined $env{CONTENT_TYPE}) {
332 6         35 push @headers, ['content-type', $env{CONTENT_TYPE}];
333             }
334 225 100       682 if (defined $env{CONTENT_LENGTH}) {
335 16         37 my $cl_value = $env{CONTENT_LENGTH};
336              
337             # RFC 7230 Section 3.3.2: Content-Length = 1*DIGIT
338             # Must be only digits, no whitespace, no negative sign
339 16 100       195 if ($cl_value !~ /^\d+$/) {
340 4         29 return ({ error => 400, message => 'Bad Request' }, $header_end + 4);
341             }
342              
343             # Check for unreasonably large values (>2GB indicates potential DoS)
344             # Using string length check to avoid Perl's numeric conversion issues
345 12 100 66     63 if (length($cl_value) > 10 || $cl_value > 2_147_483_647) {
346 1         9 return ({ error => 413, message => 'Payload Too Large' }, $header_end + 4);
347             }
348              
349 11         32 push @headers, ['content-length', $cl_value];
350 11         21 $content_length = $cl_value + 0;
351             }
352              
353             # Reject unsupported Transfer-Encoding before checking CL/TE conflict
354 220 100       448 if ($te_bad_order) {
355 1         10 return ({ error => 400, message => 'chunked must be the final Transfer-Encoding' }, $header_end + 4);
356             }
357 219 100       455 if ($te_unsupported) {
358 1         8 return ({ error => 501, message => 'Unsupported Transfer-Encoding' }, $header_end + 4);
359             }
360              
361             # RFC 9112 Section 6.3.3: reject requests with both Transfer-Encoding
362             # and Content-Length to prevent request smuggling (CL/TE desync)
363 218 100 100     662 if ($chunked && defined $content_length) {
364 1         10 return ({ error => 400, message => 'Transfer-Encoding and Content-Length are mutually exclusive' }, $header_end + 4);
365             }
366              
367             # Determine HTTP version (optimized: substr instead of regex)
368 217         648 my $http_version = '1.1';
369 217 50 33     2309 if ($env{SERVER_PROTOCOL} && index($env{SERVER_PROTOCOL}, 'HTTP/') == 0) {
370 217         1117 $http_version = substr($env{SERVER_PROTOCOL}, 5);
371             }
372              
373             # RFC 7230 Section 5.4: A client MUST send a Host header field in all
374             # HTTP/1.1 request messages. A server MUST respond with a 400 (Bad Request)
375             # status code to any HTTP/1.1 request message that lacks a Host header field.
376 217 100 100     1186 if ($http_version eq '1.1' && !defined $env{HTTP_HOST}) {
377 1         10 return ({ error => 400, message => 'Bad Request' }, $header_end + 4);
378             }
379              
380 216         3100 my $request = {
381             method => $method,
382             path => $path,
383             raw_path => $raw_path,
384             query_string => $query_string,
385             http_version => $http_version,
386             headers => \@headers,
387             content_length => $content_length,
388             chunked => $chunked,
389             expect_continue => $expect_continue,
390             };
391              
392 216         1325 return ($request, $ret);
393             }
394              
395             sub serialize_response_start {
396 216     216 1 5363 my ($self, $status, $headers, $chunked, $http_version) = @_;
397 216   100     561 $chunked //= 0;
398 216   100     567 $http_version //= '1.1';
399              
400 216   100     1002 my $phrase = $STATUS_PHRASES{$status} // 'Unknown';
401 216         625 my $response = "HTTP/$http_version $status $phrase\r\n";
402              
403             # Serialize headers and detect app-provided Server header in a single pass
404 216         350 my $has_server = 0;
405 216         497 for my $header (@$headers) {
406 476         846 my ($name, $value) = @$header;
407 476 100       965 $has_server = 1 if lc($name) eq 'server';
408 476         1100 $name = _validate_header_name($name);
409 475         984 $value = _validate_header_value($value);
410 464         1150 $response .= "$name: $value\r\n";
411             }
412              
413             # Add default Server header if app didn't provide one
414 204 100       494 unless ($has_server) {
415 203   66     661 $_server_header //= "Server: PAGI::Server/$PAGI::Server::VERSION\r\n";
416 203         354 $response .= $_server_header;
417             }
418              
419             # Add Transfer-Encoding if chunked (HTTP/1.1 only)
420 204 100 100     861 if ($chunked && $http_version eq '1.1') {
421 155         329 $response .= "Transfer-Encoding: chunked\r\n";
422             }
423              
424 204         343 $response .= "\r\n";
425 204         568 return $response;
426             }
427              
428             sub serialize_response_body {
429 1     1 1 287 my ($self, $chunk, $more, $chunked) = @_;
430 1   50     3 $chunked //= 0;
431              
432 1 50 33     7 return '' unless defined $chunk && length $chunk;
433              
434 1 50       3 if ($chunked) {
435 1         6 my $len = sprintf("%x", length($chunk));
436 1         2 my $body = "$len\r\n$chunk\r\n";
437              
438             # Add final chunk if no more data
439 1 50       3 if (!$more) {
440 1         2 $body .= "0\r\n\r\n";
441             }
442              
443 1         4 return $body;
444             } else {
445 0         0 return $chunk;
446             }
447             }
448              
449             sub serialize_chunk_end {
450 0     0 1 0 my ($self) = @_;
451              
452 0         0 return "0\r\n\r\n";
453             }
454              
455             sub serialize_continue {
456 1     1 1 739 my ($self) = @_;
457              
458 1         3 return "HTTP/1.1 100 Continue\r\n\r\n";
459             }
460              
461             sub serialize_trailers {
462 4     4 1 1220 my ($self, $headers) = @_;
463              
464 4         10 my $trailers = '';
465 4         10 for my $header (@$headers) {
466 4         10 my ($name, $value) = @$header;
467 4         82 $name = _validate_header_name($name);
468 3         9 $value = _validate_header_value($value);
469 0         0 $trailers .= "$name: $value\r\n";
470             }
471 0         0 $trailers .= "\r\n";
472 0         0 return $trailers;
473             }
474              
475             sub format_date {
476 243     243 1 1085 my ($self) = @_;
477              
478 243         555 my $now = time();
479 243 100       671 if ($now != $_cached_date_time) {
480 105         501 $_cached_date_time = $now;
481 105         820 my @days = qw(Sun Mon Tue Wed Thu Fri Sat);
482 105         1056 my @months = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
483 105         618 my @gmt = gmtime($now);
484 105         1344 $_cached_date = sprintf("%s, %02d %s %04d %02d:%02d:%02d GMT",
485             $days[$gmt[6]], $gmt[3], $months[$gmt[4]], $gmt[5] + 1900,
486             $gmt[2], $gmt[1], $gmt[0]);
487             }
488 243         1159 return $_cached_date;
489             }
490              
491             =head2 parse_chunked_body
492              
493             my ($data, $bytes_consumed, $complete) = $proto->parse_chunked_body($buffer);
494              
495             Parses chunked Transfer-Encoding body from the buffer. Returns:
496             - $data: decoded body data (may be empty string)
497             - $bytes_consumed: number of bytes consumed from buffer
498             - $complete: 1 if final chunk (0-length) was seen, 0 otherwise
499              
500             Returns (undef, 0, 0) if more data is needed.
501              
502             =cut
503              
504             sub parse_chunked_body {
505 16     16 1 16224 my ($self, $buffer_ref) = @_;
506              
507 16 50       31 my $buffer = ref $buffer_ref ? $$buffer_ref : $buffer_ref;
508 16         21 my $data = '';
509 16         19 my $total_consumed = 0;
510 16         20 my $complete = 0;
511              
512 16         17 while (1) {
513             # Find chunk size line
514 25         35 my $crlf = index($buffer, "\r\n", $total_consumed);
515 25 50       47 last if $crlf < 0;
516              
517             # Parse chunk size (hex)
518 25         39 my $size_line = substr($buffer, $total_consumed, $crlf - $total_consumed);
519 25         47 $size_line =~ s/;.*//; # Remove chunk extensions
520 25         77 $size_line =~ s/^\s+|\s+$//g; # Trim whitespace
521              
522             # Validate chunk size is valid hex (RFC 7230 Section 4.1)
523 25 100 100     138 if ($size_line eq '' || $size_line !~ /^[0-9a-fA-F]+$/) {
524 4         21 return ({ error => 400, message => 'Invalid chunk size' }, 0, 0);
525             }
526              
527             # Reject obviously oversized chunk sizes before hex() conversion
528             # 7 hex digits = max 268MB, 8+ digits certainly exceeds any reasonable limit
529 21 100       34 if (length($size_line) > 7) {
530 1         6 return ({ error => 413, message => 'Chunk Too Large' }, 0, 0);
531             }
532              
533 20         23 my $chunk_size = hex($size_line);
534              
535             # Reject chunks exceeding max_chunk_size (DoS protection)
536 20 100       42 if ($chunk_size > $self->{max_chunk_size}) {
537 2         11 return ({ error => 413, message => 'Chunk Too Large' }, 0, 0);
538             }
539              
540             # Check if we have the full chunk + trailing CRLF
541 18         21 my $chunk_start = $crlf + 2;
542 18         18 my $chunk_end = $chunk_start + $chunk_size + 2; # +2 for trailing CRLF
543              
544 18 100       30 if (length($buffer) < $chunk_end) {
545 1         2 last; # Need more data
546             }
547              
548             # Extract chunk data
549 17 100       26 if ($chunk_size > 0) {
550 9         19 $data .= substr($buffer, $chunk_start, $chunk_size);
551             }
552              
553 17         21 $total_consumed = $chunk_end;
554              
555             # Check for final chunk
556 17 100       26 if ($chunk_size == 0) {
557 8         9 $complete = 1;
558 8         14 last;
559             }
560             }
561              
562 9         28 return ($data, $total_consumed, $complete);
563             }
564              
565             1;
566              
567             __END__