File Coverage

blib/lib/PAGI/Server/Protocol/HTTP1.pm
Criterion Covered Total %
statement 182 189 96.3
branch 76 84 90.4
condition 43 55 78.1
subroutine 16 17 94.1
pod 9 9 100.0
total 326 354 92.0


line stmt bran cond sub pod time code
1             package PAGI::Server::Protocol::HTTP1;
2 107     107   151279 use strict;
  107         181  
  107         5075  
3 107     107   1069 use warnings;
  107         536  
  107         8974  
4              
5             our $VERSION = '0.002002';
6              
7 107     107   42980 use HTTP::Parser::XS qw(parse_http_request);
  107         109887  
  107         8553  
8 107     107   1424 use URI::Escape qw(uri_unescape);
  107         1585  
  107         4762  
9 107     107   964 use Encode qw(decode);
  107         13684  
  107         3893  
10 107     107   1322 use PAGI::Server ();
  107         370  
  107         285098  
11              
12              
13             # =============================================================================
14             # Header Validation (CRLF Injection Prevention)
15             # =============================================================================
16             # RFC 7230 Section 3.2.6: Field values MUST NOT contain CR or LF
17             # Additionally, null bytes are rejected as they can cause truncation attacks
18              
19             sub _validate_header_name {
20 480     480   746 my ($name) = @_;
21              
22 480 100       1239 if ($name =~ /[\r\n\0]/) {
23 2         11 die "Invalid header name: contains CR, LF, or null byte\n";
24             }
25             # RFC 7230: token = 1*tchar
26             # For simplicity, we just reject control characters and delimiters
27 478 50       997 if ($name =~ /[[:cntrl:]]/) {
28 0         0 die "Invalid header name: contains control characters\n";
29             }
30 478         706 return $name;
31             }
32              
33             sub _validate_header_value {
34 478     478   746 my ($value) = @_;
35              
36 478 100       1041 if ($value =~ /[\r\n\0]/) {
37 14         87 die "Invalid header value: contains CR, LF, or null byte\n";
38             }
39 464         672 return $value;
40             }
41              
42             =encoding utf8
43              
44             =head1 NAME
45              
46             PAGI::Server::Protocol::HTTP1 - HTTP/1.1 protocol handler
47              
48             =head1 SYNOPSIS
49              
50             use PAGI::Server::Protocol::HTTP1;
51              
52             my $proto = PAGI::Server::Protocol::HTTP1->new;
53              
54             # Parse incoming request
55             my ($request, $consumed) = $proto->parse_request($buffer);
56              
57             # Serialize response
58             my $bytes = $proto->serialize_response_start(200, \@headers, $chunked);
59             $bytes .= $proto->serialize_response_body($chunk, $more);
60              
61             =head1 DESCRIPTION
62              
63             PAGI::Server::Protocol::HTTP1 isolates HTTP/1.1 wire-format parsing and
64             serialization from PAGI event handling. This allows clean separation of
65             protocol handling and future addition of HTTP/2 or HTTP/3 modules with
66             the same interface.
67              
68             =head1 METHODS
69              
70             =head2 new
71              
72             my $proto = PAGI::Server::Protocol::HTTP1->new(%options);
73              
74             Creates a new HTTP1 protocol handler. Accepts the following options, which
75             bound resource use while parsing untrusted input:
76              
77             =over 4
78              
79             =item * C - maximum size in bytes of the combined header
80             block. Default: 8192 (8KB). Exceeding it yields a 431 error result.
81              
82             =item * C - maximum size in bytes of the request line.
83             Default: 8192 (8KB). Exceeding it yields a 414 error result.
84              
85             =item * C - maximum number of header fields. Default: 100.
86             Exceeding it yields a 431 error result.
87              
88             =item * C - maximum size in bytes of a single chunk in a
89             chunked request body. Default: 10_485_760 (10MB).
90              
91             =back
92              
93             =head2 parse_request
94              
95             my ($request_info, $bytes_consumed) = $proto->parse_request($buffer);
96              
97             Parses an HTTP request from the buffer. The first return value is one of three
98             things:
99              
100             =over 4
101              
102             =item * C when the request is incomplete (more bytes needed). The
103             second value is 0.
104              
105             =item * an B when the request is malformed or exceeds a
106             limit:
107              
108             { error => 400, message => 'Bad Request' }
109              
110             The C key is the HTTP status code to send (400, 413, 414, 431, or 501);
111             C is a short reason. The second return value is the number of bytes
112             to discard.
113              
114             =item * a B on success:
115              
116             $request_info = {
117             method => 'GET',
118             path => '/foo',
119             raw_path => '/foo%20bar',
120             query_string => 'a=1',
121             http_version => '1.1',
122             headers => [ ['host', 'localhost'], ... ],
123             content_length => 0, # or undef if no Content-Length header
124             chunked => 0, # 1 if Transfer-Encoding: chunked
125             expect_continue => 0, # 1 if the request sent Expect: 100-continue
126             };
127              
128             =back
129              
130             =head2 serialize_response_start
131              
132             my $bytes = $proto->serialize_response_start($status, \@headers, $chunked, $http_version);
133              
134             Serializes the status line and headers. C<$chunked> (default 0) adds a
135             C header, but only when C<$http_version> (default
136             C<'1.1'>) is C<'1.1'> — chunked encoding is not emitted for HTTP/1.0
137             responses. A default C header is added when the application does not
138             provide one.
139              
140             =head2 serialize_response_body
141              
142             my $bytes = $proto->serialize_response_body($chunk, $more, $chunked);
143              
144             Serializes a body chunk. Uses chunked encoding if $chunked is true.
145              
146             =head2 serialize_chunk_end
147              
148             my $bytes = $proto->serialize_chunk_end;
149              
150             Returns the terminating zero-length chunk (C<"0\r\n\r\n">) that ends a
151             chunked response body.
152              
153             =head2 serialize_continue
154              
155             my $bytes = $proto->serialize_continue;
156              
157             Returns a C interim response, sent in reply to a
158             request that carried C.
159              
160             =head2 serialize_trailers
161              
162             my $bytes = $proto->serialize_trailers(\@headers);
163              
164             Serializes HTTP trailers.
165              
166             =head2 format_date
167              
168             my $date = $proto->format_date;
169              
170             Returns the current time formatted as an RFC 7231 IMF-fixdate string suitable
171             for a C header (e.g. C<"Sun, 06 Nov 1994 08:49:37 GMT">). The value is
172             cached and regenerated at most once per second.
173              
174             =cut
175              
176             # Cached Date header (regenerated at most once per second)
177             my $_cached_date;
178             my $_cached_date_time = 0;
179              
180             # Cached default Server header (lazy-init to ensure VERSION is loaded)
181             my $_server_header;
182              
183             # HTTP status code reason phrases
184             my %STATUS_PHRASES = (
185             100 => 'Continue',
186             101 => 'Switching Protocols',
187             200 => 'OK',
188             201 => 'Created',
189             204 => 'No Content',
190             301 => 'Moved Permanently',
191             302 => 'Found',
192             304 => 'Not Modified',
193             400 => 'Bad Request',
194             401 => 'Unauthorized',
195             403 => 'Forbidden',
196             404 => 'Not Found',
197             405 => 'Method Not Allowed',
198             413 => 'Payload Too Large',
199             414 => 'URI Too Long',
200             431 => 'Request Header Fields Too Large',
201             500 => 'Internal Server Error',
202             502 => 'Bad Gateway',
203             503 => 'Service Unavailable',
204             );
205              
206             sub new {
207 398     398 1 4639858 my ($class, %args) = @_;
208              
209             my $self = bless {
210             max_header_size => $args{max_header_size} // 8192,
211             max_request_line_size => $args{max_request_line_size} // 8192, # 8KB per RFC 7230
212             max_header_count => $args{max_header_count} // 100, # Max number of headers
213 398   100     7490 max_chunk_size => $args{max_chunk_size} // 10_485_760, # 10MB default
      100        
      100        
      100        
214             }, $class;
215 398         2445 return $self;
216             }
217              
218             sub parse_request {
219 234     234 1 10202 my ($self, $buffer_ref) = @_;
220              
221             # HTTP::Parser::XS expects a scalar, not a reference
222 234 100       787 my $buffer = ref $buffer_ref ? $$buffer_ref : $buffer_ref;
223              
224             # Check for complete headers (look for \r\n\r\n)
225 234         887 my $header_end = index($buffer, "\r\n\r\n");
226 234 100       586 return (undef, 0) if $header_end < 0;
227              
228             # Check request line length (first line before \r\n)
229 231         589 my $first_line_end = index($buffer, "\r\n");
230 231 100       913 if ($first_line_end > $self->{max_request_line_size}) {
231 2         12 return ({ error => 414, message => 'URI Too Long' }, $header_end + 4);
232             }
233              
234             # Check max header size
235 229 100       774 if ($header_end > $self->{max_header_size}) {
236 2         11 return ({ error => 431, message => 'Request Header Fields Too Large' }, $header_end + 4);
237             }
238              
239             # Parse using HTTP::Parser::XS
240 227         412 my %env;
241 227         3731 my $ret = parse_http_request($buffer, \%env);
242              
243             # Return error for malformed request (-1)
244 227 100       634 if ($ret == -1) {
245             # Find end of malformed request line/headers
246 1         2 my $consumed = $header_end + 4;
247 1         5 return ({ error => 400, message => 'Bad Request' }, $consumed);
248             }
249              
250             # Return undef if incomplete (-2)
251 226 50       612 return (undef, 0) if $ret < 0;
252              
253             # Extract method and path
254 226         592 my $method = $env{REQUEST_METHOD};
255 226   50     2529 my $raw_uri = $env{REQUEST_URI} // '/';
256              
257             # Split path and query string
258 226         1010 my ($raw_path, $query_string) = split(/\?/, $raw_uri, 2);
259 226   50     654 $raw_path //= '/';
260 226   100     1200 $query_string //= '';
261              
262             # Decode path (URL-decode, then UTF-8 decode with fallback)
263             # Mojolicious-style: try UTF-8 decode, fall back to original bytes if invalid.
264             # Fast path: a path with no percent-escapes and no high bytes is already its
265             # own decoded form (ASCII is its own UTF-8), so skip uri_unescape and the
266             # eval + Encode::decode entirely -- the common case.
267 226         437 my $path;
268 226 100       992 if ($raw_path !~ /[%\x80-\xff]/) {
269 223         453 $path = $raw_path;
270             }
271             else {
272 3         35 my $unescaped = uri_unescape($raw_path);
273 3   66     84 $path = eval { decode('UTF-8', $unescaped, Encode::FB_CROAK) } // $unescaped;
  3         34  
274             }
275              
276             # Build headers array with lowercase names
277 226         625 my @headers;
278             my $content_length;
279 226         370 my $chunked = 0;
280 226         1129 my $te_bad_order = 0;
281 226         329 my $te_unsupported = 0;
282 226         317 my $expect_continue = 0;
283 226         386 my @cookie_values;
284              
285 226         914 for my $key (keys %env) {
286             # Optimized: use index() + substr() instead of regex (faster per NYTProf)
287 2044 100       3568 if (index($key, 'HTTP_') == 0) {
288 666         1498 my $header_name = lc(substr($key, 5));
289 666         1212 $header_name =~ tr/_/-/; # Optimized: tr/// is faster than s///g
290 666         996 my $value = $env{$key};
291              
292             # Handle Cookie header normalization
293 666 100       1126 if ($header_name eq 'cookie') {
294 1         2 push @cookie_values, $value;
295 1         3 next;
296             }
297              
298             # RFC 9112 Section 6.1: chunked must be the final transfer coding
299 665 100       1025 if ($header_name eq 'transfer-encoding') {
300 6         16 my @codings = map { s/^\s+|\s+$//gr } split /,/, lc($value);
  8         44  
301 6 100 66     31 if (@codings && $codings[-1] eq 'chunked') {
    100          
302 4         8 $chunked = 1;
303 3         7 } elsif (grep { $_ eq 'chunked' } @codings) {
304             # chunked present but not final — protocol error
305 1         3 $te_bad_order = 1;
306             } else {
307 1         4 $te_unsupported = 1;
308             }
309             }
310              
311             # Check for Expect: 100-continue
312 665 100 66     1388 if ($header_name eq 'expect' && lc($value) eq '100-continue') {
313 1         13 $expect_continue = 1;
314             }
315              
316 665         1676 push @headers, [$header_name, $value];
317             }
318             }
319              
320             # Add normalized cookie header if present
321 226 100       653 if (@cookie_values) {
322 1         4 push @headers, ['cookie', join('; ', @cookie_values)];
323             }
324              
325             # Check header count limit (DoS protection)
326 226 100       657 if (@headers > $self->{max_header_count}) {
327 1         17 return ({ error => 431, message => 'Request Header Fields Too Large' }, $header_end + 4);
328             }
329              
330             # Add content-type and content-length from env
331 225 100       768 if (defined $env{CONTENT_TYPE}) {
332 6         22 push @headers, ['content-type', $env{CONTENT_TYPE}];
333             }
334 225 100       606 if (defined $env{CONTENT_LENGTH}) {
335 16         36 my $cl_value = $env{CONTENT_LENGTH};
336              
337             # RFC 7230 Section 3.3.2: Content-Length = 1*DIGIT
338             # Must be only digits, no whitespace, no negative sign
339 16 100       88 if ($cl_value !~ /^\d+$/) {
340 4         50 return ({ error => 400, message => 'Bad Request' }, $header_end + 4);
341             }
342              
343             # Check for unreasonably large values (>2GB indicates potential DoS)
344             # Using string length check to avoid Perl's numeric conversion issues
345 12 100 66     166 if (length($cl_value) > 10 || $cl_value > 2_147_483_647) {
346 1         8 return ({ error => 413, message => 'Payload Too Large' }, $header_end + 4);
347             }
348              
349 11         31 push @headers, ['content-length', $cl_value];
350 11         23 $content_length = $cl_value + 0;
351             }
352              
353             # Reject unsupported Transfer-Encoding before checking CL/TE conflict
354 220 100       568 if ($te_bad_order) {
355 1         9 return ({ error => 400, message => 'chunked must be the final Transfer-Encoding' }, $header_end + 4);
356             }
357 219 100       555 if ($te_unsupported) {
358 1         8 return ({ error => 501, message => 'Unsupported Transfer-Encoding' }, $header_end + 4);
359             }
360              
361             # RFC 9112 Section 6.3.3: reject requests with both Transfer-Encoding
362             # and Content-Length to prevent request smuggling (CL/TE desync)
363 218 100 100     661 if ($chunked && defined $content_length) {
364 1         11 return ({ error => 400, message => 'Transfer-Encoding and Content-Length are mutually exclusive' }, $header_end + 4);
365             }
366              
367             # Determine HTTP version (optimized: substr instead of regex)
368 217         514 my $http_version = '1.1';
369 217 50 33     2118 if ($env{SERVER_PROTOCOL} && index($env{SERVER_PROTOCOL}, 'HTTP/') == 0) {
370 217         1005 $http_version = substr($env{SERVER_PROTOCOL}, 5);
371             }
372              
373             # RFC 7230 Section 5.4: A client MUST send a Host header field in all
374             # HTTP/1.1 request messages. A server MUST respond with a 400 (Bad Request)
375             # status code to any HTTP/1.1 request message that lacks a Host header field.
376 217 100 100     1235 if ($http_version eq '1.1' && !defined $env{HTTP_HOST}) {
377 1         13 return ({ error => 400, message => 'Bad Request' }, $header_end + 4);
378             }
379              
380 216         3180 my $request = {
381             method => $method,
382             path => $path,
383             raw_path => $raw_path,
384             query_string => $query_string,
385             http_version => $http_version,
386             headers => \@headers,
387             content_length => $content_length,
388             chunked => $chunked,
389             expect_continue => $expect_continue,
390             };
391              
392 216         1440 return ($request, $ret);
393             }
394              
395             sub serialize_response_start {
396 216     216 1 6424 my ($self, $status, $headers, $chunked, $http_version) = @_;
397 216   100     550 $chunked //= 0;
398 216   100     586 $http_version //= '1.1';
399              
400 216   100     996 my $phrase = $STATUS_PHRASES{$status} // 'Unknown';
401 216         525 my $response = "HTTP/$http_version $status $phrase\r\n";
402              
403             # Serialize headers and detect app-provided Server header in a single pass
404 216         407 my $has_server = 0;
405 216         527 for my $header (@$headers) {
406 476         815 my ($name, $value) = @$header;
407 476 100       1174 $has_server = 1 if lc($name) eq 'server';
408 476         1098 $name = _validate_header_name($name);
409 475         1093 $value = _validate_header_value($value);
410 464         1035 $response .= "$name: $value\r\n";
411             }
412              
413             # Add default Server header if app didn't provide one
414 204 100       518 unless ($has_server) {
415 203   66     638 $_server_header //= "Server: PAGI::Server/$PAGI::Server::VERSION\r\n";
416 203         382 $response .= $_server_header;
417             }
418              
419             # Add Transfer-Encoding if chunked (HTTP/1.1 only)
420 204 100 100     898 if ($chunked && $http_version eq '1.1') {
421 155         318 $response .= "Transfer-Encoding: chunked\r\n";
422             }
423              
424 204         353 $response .= "\r\n";
425 204         510 return $response;
426             }
427              
428             sub serialize_response_body {
429 1     1 1 291 my ($self, $chunk, $more, $chunked) = @_;
430 1   50     4 $chunked //= 0;
431              
432 1 50 33     5 return '' unless defined $chunk && length $chunk;
433              
434 1 50       3 if ($chunked) {
435 1         6 my $len = sprintf("%x", length($chunk));
436 1         2 my $body = "$len\r\n$chunk\r\n";
437              
438             # Add final chunk if no more data
439 1 50       2 if (!$more) {
440 1         2 $body .= "0\r\n\r\n";
441             }
442              
443 1         3 return $body;
444             } else {
445 0         0 return $chunk;
446             }
447             }
448              
449             sub serialize_chunk_end {
450 0     0 1 0 my ($self) = @_;
451              
452 0         0 return "0\r\n\r\n";
453             }
454              
455             sub serialize_continue {
456 1     1 1 650 my ($self) = @_;
457              
458 1         3 return "HTTP/1.1 100 Continue\r\n\r\n";
459             }
460              
461             sub serialize_trailers {
462 4     4 1 849 my ($self, $headers) = @_;
463              
464 4         10 my $trailers = '';
465 4         10 for my $header (@$headers) {
466 4         6 my ($name, $value) = @$header;
467 4         84 $name = _validate_header_name($name);
468 3         9 $value = _validate_header_value($value);
469 0         0 $trailers .= "$name: $value\r\n";
470             }
471 0         0 $trailers .= "\r\n";
472 0         0 return $trailers;
473             }
474              
475             sub format_date {
476 243     243 1 830 my ($self) = @_;
477              
478 243         646 my $now = time();
479 243 100       763 if ($now != $_cached_date_time) {
480 101         313 $_cached_date_time = $now;
481 101         992 my @days = qw(Sun Mon Tue Wed Thu Fri Sat);
482 101         1232 my @months = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
483 101         478 my @gmt = gmtime($now);
484 101         1255 $_cached_date = sprintf("%s, %02d %s %04d %02d:%02d:%02d GMT",
485             $days[$gmt[6]], $gmt[3], $months[$gmt[4]], $gmt[5] + 1900,
486             $gmt[2], $gmt[1], $gmt[0]);
487             }
488 243         1028 return $_cached_date;
489             }
490              
491             =head2 parse_chunked_body
492              
493             my ($data, $bytes_consumed, $complete) = $proto->parse_chunked_body($buffer);
494              
495             Parses chunked Transfer-Encoding body from the buffer. Returns:
496             - $data: decoded body data (may be empty string)
497             - $bytes_consumed: number of bytes consumed from buffer
498             - $complete: 1 if final chunk (0-length) was seen, 0 otherwise
499              
500             Returns (undef, 0, 0) if more data is needed.
501              
502             =cut
503              
504             sub parse_chunked_body {
505 16     16 1 18047 my ($self, $buffer_ref) = @_;
506              
507 16 50       39 my $buffer = ref $buffer_ref ? $$buffer_ref : $buffer_ref;
508 16         26 my $data = '';
509 16         18 my $total_consumed = 0;
510 16         19 my $complete = 0;
511              
512 16         18 while (1) {
513             # Find chunk size line
514 25         37 my $crlf = index($buffer, "\r\n", $total_consumed);
515 25 50       42 last if $crlf < 0;
516              
517             # Parse chunk size (hex)
518 25         45 my $size_line = substr($buffer, $total_consumed, $crlf - $total_consumed);
519 25         51 $size_line =~ s/;.*//; # Remove chunk extensions
520 25         73 $size_line =~ s/^\s+|\s+$//g; # Trim whitespace
521              
522             # Validate chunk size is valid hex (RFC 7230 Section 4.1)
523 25 100 100     117 if ($size_line eq '' || $size_line !~ /^[0-9a-fA-F]+$/) {
524 4         23 return ({ error => 400, message => 'Invalid chunk size' }, 0, 0);
525             }
526              
527             # Reject obviously oversized chunk sizes before hex() conversion
528             # 7 hex digits = max 268MB, 8+ digits certainly exceeds any reasonable limit
529 21 100       59 if (length($size_line) > 7) {
530 1         7 return ({ error => 413, message => 'Chunk Too Large' }, 0, 0);
531             }
532              
533 20         28 my $chunk_size = hex($size_line);
534              
535             # Reject chunks exceeding max_chunk_size (DoS protection)
536 20 100       38 if ($chunk_size > $self->{max_chunk_size}) {
537 2         10 return ({ error => 413, message => 'Chunk Too Large' }, 0, 0);
538             }
539              
540             # Check if we have the full chunk + trailing CRLF
541 18         21 my $chunk_start = $crlf + 2;
542 18         17 my $chunk_end = $chunk_start + $chunk_size + 2; # +2 for trailing CRLF
543              
544 18 100       31 if (length($buffer) < $chunk_end) {
545 1         3 last; # Need more data
546             }
547              
548             # Extract chunk data
549 17 100       40 if ($chunk_size > 0) {
550 9         17 $data .= substr($buffer, $chunk_start, $chunk_size);
551             }
552              
553 17         19 $total_consumed = $chunk_end;
554              
555             # Check for final chunk
556 17 100       25 if ($chunk_size == 0) {
557 8         9 $complete = 1;
558 8         13 last;
559             }
560             }
561              
562 9         30 return ($data, $total_consumed, $complete);
563             }
564              
565             1;
566              
567             __END__