File Coverage

blib/lib/PAGI/Server/Protocol/HTTP1.pm
Criterion Covered Total %
statement 179 186 96.2
branch 74 82 90.2
condition 43 55 78.1
subroutine 16 17 94.1
pod 6 9 66.6
total 318 349 91.1


line stmt bran cond sub pod time code
1             package PAGI::Server::Protocol::HTTP1;
2 86     86   152125 use strict;
  86         141  
  86         3212  
3 86     86   306 use warnings;
  86         118  
  86         5222  
4 86     86   32521 use HTTP::Parser::XS qw(parse_http_request);
  86         87025  
  86         7101  
5 86     86   970 use URI::Escape qw(uri_unescape);
  86         1580  
  86         4161  
6 86     86   840 use Encode qw(decode);
  86         14112  
  86         3148  
7 86     86   1221 use PAGI::Server ();
  86         133  
  86         229029  
8              
9              
10             # =============================================================================
11             # Header Validation (CRLF Injection Prevention)
12             # =============================================================================
13             # RFC 7230 Section 3.2.6: Field values MUST NOT contain CR or LF
14             # Additionally, null bytes are rejected as they can cause truncation attacks
15              
16             sub _validate_header_name {
17 732     732   1189 my ($name) = @_;
18              
19 732 100       1825 if ($name =~ /[\r\n\0]/) {
20 2         23 die "Invalid header name: contains CR, LF, or null byte\n";
21             }
22             # RFC 7230: token = 1*tchar
23             # For simplicity, we just reject control characters and delimiters
24 730 50       1337 if ($name =~ /[[:cntrl:]]/) {
25 0         0 die "Invalid header name: contains control characters\n";
26             }
27 730         1082 return $name;
28             }
29              
30             sub _validate_header_value {
31 730     730   1057 my ($value) = @_;
32              
33 730 100       1451 if ($value =~ /[\r\n\0]/) {
34 14         68 die "Invalid header value: contains CR, LF, or null byte\n";
35             }
36 716         1016 return $value;
37             }
38              
39             =head1 NAME
40              
41             PAGI::Server::Protocol::HTTP1 - HTTP/1.1 protocol handler
42              
43             =head1 SYNOPSIS
44              
45             use PAGI::Server::Protocol::HTTP1;
46              
47             my $proto = PAGI::Server::Protocol::HTTP1->new;
48              
49             # Parse incoming request
50             my ($request, $consumed) = $proto->parse_request($buffer);
51              
52             # Serialize response
53             my $bytes = $proto->serialize_response_start(200, \@headers, $chunked);
54             $bytes .= $proto->serialize_response_body($chunk, $more);
55              
56             =head1 DESCRIPTION
57              
58             PAGI::Server::Protocol::HTTP1 isolates HTTP/1.1 wire-format parsing and
59             serialization from PAGI event handling. This allows clean separation of
60             protocol handling and future addition of HTTP/2 or HTTP/3 modules with
61             the same interface.
62              
63             =head1 METHODS
64              
65             =head2 new
66              
67             my $proto = PAGI::Server::Protocol::HTTP1->new;
68              
69             Creates a new HTTP1 protocol handler.
70              
71             =head2 parse_request
72              
73             my ($request_info, $bytes_consumed) = $proto->parse_request($buffer);
74              
75             Parses an HTTP request from the buffer. Returns undef if the request
76             is incomplete. On success, returns:
77              
78             $request_info = {
79             method => 'GET',
80             path => '/foo',
81             raw_path => '/foo%20bar',
82             query_string => 'a=1',
83             http_version => '1.1',
84             headers => [ ['host', 'localhost'], ... ],
85             content_length => 0, # or undef if not present
86             chunked => 0, # 1 if Transfer-Encoding: chunked
87             };
88              
89             =head2 serialize_response_start
90              
91             my $bytes = $proto->serialize_response_start($status, \@headers, $chunked);
92              
93             Serializes the response line and headers.
94              
95             =head2 serialize_response_body
96              
97             my $bytes = $proto->serialize_response_body($chunk, $more, $chunked);
98              
99             Serializes a body chunk. Uses chunked encoding if $chunked is true.
100              
101             =head2 serialize_trailers
102              
103             my $bytes = $proto->serialize_trailers(\@headers);
104              
105             Serializes HTTP trailers.
106              
107             =cut
108              
109             # Cached Date header (regenerated at most once per second)
110             my $_cached_date;
111             my $_cached_date_time = 0;
112              
113             # Cached default Server header (lazy-init to ensure VERSION is loaded)
114             my $_server_header;
115              
116             # HTTP status code reason phrases
117             my %STATUS_PHRASES = (
118             100 => 'Continue',
119             101 => 'Switching Protocols',
120             200 => 'OK',
121             201 => 'Created',
122             204 => 'No Content',
123             301 => 'Moved Permanently',
124             302 => 'Found',
125             304 => 'Not Modified',
126             400 => 'Bad Request',
127             401 => 'Unauthorized',
128             403 => 'Forbidden',
129             404 => 'Not Found',
130             405 => 'Method Not Allowed',
131             413 => 'Payload Too Large',
132             414 => 'URI Too Long',
133             431 => 'Request Header Fields Too Large',
134             500 => 'Internal Server Error',
135             502 => 'Bad Gateway',
136             503 => 'Service Unavailable',
137             );
138              
139             sub new {
140 438     438 1 3633163 my ($class, %args) = @_;
141              
142             my $self = bless {
143             max_header_size => $args{max_header_size} // 8192,
144             max_request_line_size => $args{max_request_line_size} // 8192, # 8KB per RFC 7230
145             max_header_count => $args{max_header_count} // 100, # Max number of headers
146 438   100     7702 max_chunk_size => $args{max_chunk_size} // 10_485_760, # 10MB default
      100        
      100        
      100        
147             }, $class;
148 438         1975 return $self;
149             }
150              
151             sub parse_request {
152 299     299 1 10340 my ($self, $buffer_ref) = @_;
153              
154             # HTTP::Parser::XS expects a scalar, not a reference
155 299 100       957 my $buffer = ref $buffer_ref ? $$buffer_ref : $buffer_ref;
156              
157             # Check for complete headers (look for \r\n\r\n)
158 299         1101 my $header_end = index($buffer, "\r\n\r\n");
159 299 100       921 return (undef, 0) if $header_end < 0;
160              
161             # Check request line length (first line before \r\n)
162 296         756 my $first_line_end = index($buffer, "\r\n");
163 296 100       1099 if ($first_line_end > $self->{max_request_line_size}) {
164 2         14 return ({ error => 414, message => 'URI Too Long' }, $header_end + 4);
165             }
166              
167             # Check max header size
168 294 100       934 if ($header_end > $self->{max_header_size}) {
169 2         14 return ({ error => 431, message => 'Request Header Fields Too Large' }, $header_end + 4);
170             }
171              
172             # Parse using HTTP::Parser::XS
173 292         468 my %env;
174 292         4544 my $ret = parse_http_request($buffer, \%env);
175              
176             # Return error for malformed request (-1)
177 292 100       806 if ($ret == -1) {
178             # Find end of malformed request line/headers
179 1         3 my $consumed = $header_end + 4;
180 1         6 return ({ error => 400, message => 'Bad Request' }, $consumed);
181             }
182              
183             # Return undef if incomplete (-2)
184 291 50       686 return (undef, 0) if $ret < 0;
185              
186             # Extract method and path
187 291         767 my $method = $env{REQUEST_METHOD};
188 291   50     928 my $raw_uri = $env{REQUEST_URI} // '/';
189              
190             # Split path and query string
191 291         1216 my ($raw_path, $query_string) = split(/\?/, $raw_uri, 2);
192 291   50     720 $raw_path //= '/';
193 291   100     1550 $query_string //= '';
194              
195             # Decode path (URL-decode, then UTF-8 decode with fallback)
196             # Mojolicious-style: try UTF-8 decode, fall back to original bytes if invalid
197 291         1531 my $unescaped = uri_unescape($raw_path);
198 291   66     2755 my $path = eval { decode('UTF-8', $unescaped, Encode::FB_CROAK) } // $unescaped;
  291         3595  
199              
200             # Build headers array with lowercase names
201 291         16705 my @headers;
202             my $content_length;
203 291         527 my $chunked = 0;
204 291         481 my $te_bad_order = 0;
205 291         524 my $te_unsupported = 0;
206 291         582 my $expect_continue = 0;
207 291         508 my @cookie_values;
208              
209 291         1145 for my $key (keys %env) {
210             # Optimized: use index() + substr() instead of regex (faster per NYTProf)
211 2638 100       5085 if (index($key, 'HTTP_') == 0) {
212 866         1677 my $header_name = lc(substr($key, 5));
213 866         1430 $header_name =~ tr/_/-/; # Optimized: tr/// is faster than s///g
214 866         1382 my $value = $env{$key};
215              
216             # Handle Cookie header normalization
217 866 100       1492 if ($header_name eq 'cookie') {
218 1         3 push @cookie_values, $value;
219 1         2 next;
220             }
221              
222             # RFC 9112 Section 6.1: chunked must be the final transfer coding
223 865 100       1393 if ($header_name eq 'transfer-encoding') {
224 6         18 my @codings = map { s/^\s+|\s+$//gr } split /,/, lc($value);
  8         46  
225 6 100 66     29 if (@codings && $codings[-1] eq 'chunked') {
    100          
226 4         6 $chunked = 1;
227 3         9 } elsif (grep { $_ eq 'chunked' } @codings) {
228             # chunked present but not final — protocol error
229 1         2 $te_bad_order = 1;
230             } else {
231 1         2 $te_unsupported = 1;
232             }
233             }
234              
235             # Check for Expect: 100-continue
236 865 100 66     1743 if ($header_name eq 'expect' && lc($value) eq '100-continue') {
237 1         2 $expect_continue = 1;
238             }
239              
240 865         2016 push @headers, [$header_name, $value];
241             }
242             }
243              
244             # Add normalized cookie header if present
245 291 100       836 if (@cookie_values) {
246 1         5 push @headers, ['cookie', join('; ', @cookie_values)];
247             }
248              
249             # Check header count limit (DoS protection)
250 291 100       895 if (@headers > $self->{max_header_count}) {
251 1         12 return ({ error => 431, message => 'Request Header Fields Too Large' }, $header_end + 4);
252             }
253              
254             # Add content-type and content-length from env
255 290 100       946 if (defined $env{CONTENT_TYPE}) {
256 8         28 push @headers, ['content-type', $env{CONTENT_TYPE}];
257             }
258 290 100       872 if (defined $env{CONTENT_LENGTH}) {
259 18         44 my $cl_value = $env{CONTENT_LENGTH};
260              
261             # RFC 7230 Section 3.3.2: Content-Length = 1*DIGIT
262             # Must be only digits, no whitespace, no negative sign
263 18 100       118 if ($cl_value !~ /^\d+$/) {
264 4         30 return ({ error => 400, message => 'Bad Request' }, $header_end + 4);
265             }
266              
267             # Check for unreasonably large values (>2GB indicates potential DoS)
268             # Using string length check to avoid Perl's numeric conversion issues
269 14 100 66     145 if (length($cl_value) > 10 || $cl_value > 2_147_483_647) {
270 1         8 return ({ error => 413, message => 'Payload Too Large' }, $header_end + 4);
271             }
272              
273 13         39 push @headers, ['content-length', $cl_value];
274 13         31 $content_length = $cl_value + 0;
275             }
276              
277             # Reject unsupported Transfer-Encoding before checking CL/TE conflict
278 285 100       748 if ($te_bad_order) {
279 1         9 return ({ error => 400, message => 'chunked must be the final Transfer-Encoding' }, $header_end + 4);
280             }
281 284 100       618 if ($te_unsupported) {
282 1         8 return ({ error => 501, message => 'Unsupported Transfer-Encoding' }, $header_end + 4);
283             }
284              
285             # RFC 9112 Section 6.3.3: reject requests with both Transfer-Encoding
286             # and Content-Length to prevent request smuggling (CL/TE desync)
287 283 100 100     848 if ($chunked && defined $content_length) {
288 1         12 return ({ error => 400, message => 'Transfer-Encoding and Content-Length are mutually exclusive' }, $header_end + 4);
289             }
290              
291             # Determine HTTP version (optimized: substr instead of regex)
292 282         709 my $http_version = '1.1';
293 282 50 33     1988 if ($env{SERVER_PROTOCOL} && index($env{SERVER_PROTOCOL}, 'HTTP/') == 0) {
294 282         793 $http_version = substr($env{SERVER_PROTOCOL}, 5);
295             }
296              
297             # RFC 7230 Section 5.4: A client MUST send a Host header field in all
298             # HTTP/1.1 request messages. A server MUST respond with a 400 (Bad Request)
299             # status code to any HTTP/1.1 request message that lacks a Host header field.
300 282 100 100     1466 if ($http_version eq '1.1' && !defined $env{HTTP_HOST}) {
301 1         16 return ({ error => 400, message => 'Bad Request' }, $header_end + 4);
302             }
303              
304 281         3728 my $request = {
305             method => $method,
306             path => $path,
307             raw_path => $raw_path,
308             query_string => $query_string,
309             http_version => $http_version,
310             headers => \@headers,
311             content_length => $content_length,
312             chunked => $chunked,
313             expect_continue => $expect_continue,
314             };
315              
316 281         1759 return ($request, $ret);
317             }
318              
319             sub serialize_response_start {
320 281     281 1 5726 my ($self, $status, $headers, $chunked, $http_version) = @_;
321 281   100     704 $chunked //= 0;
322 281   100     648 $http_version //= '1.1';
323              
324 281   100     1307 my $phrase = $STATUS_PHRASES{$status} // 'Unknown';
325 281         733 my $response = "HTTP/$http_version $status $phrase\r\n";
326              
327             # Serialize headers and detect app-provided Server header in a single pass
328 281         422 my $has_server = 0;
329 281         707 for my $header (@$headers) {
330 728         1182 my ($name, $value) = @$header;
331 728 100       1397 $has_server = 1 if lc($name) eq 'server';
332 728         1559 $name = _validate_header_name($name);
333 727         1355 $value = _validate_header_value($value);
334 716         1579 $response .= "$name: $value\r\n";
335             }
336              
337             # Add default Server header if app didn't provide one
338 269 100       597 unless ($has_server) {
339 268   66     786 $_server_header //= "Server: PAGI::Server/$PAGI::Server::VERSION\r\n";
340 268         552 $response .= $_server_header;
341             }
342              
343             # Add Transfer-Encoding if chunked (HTTP/1.1 only)
344 269 100 100     1055 if ($chunked && $http_version eq '1.1') {
345 176         405 $response .= "Transfer-Encoding: chunked\r\n";
346             }
347              
348 269         469 $response .= "\r\n";
349 269         695 return $response;
350             }
351              
352             sub serialize_response_body {
353 1     1 1 315 my ($self, $chunk, $more, $chunked) = @_;
354 1   50     4 $chunked //= 0;
355              
356 1 50 33     5 return '' unless defined $chunk && length $chunk;
357              
358 1 50       3 if ($chunked) {
359 1         5 my $len = sprintf("%x", length($chunk));
360 1         4 my $body = "$len\r\n$chunk\r\n";
361              
362             # Add final chunk if no more data
363 1 50       2 if (!$more) {
364 1         2 $body .= "0\r\n\r\n";
365             }
366              
367 1         3 return $body;
368             } else {
369 0         0 return $chunk;
370             }
371             }
372              
373             sub serialize_chunk_end {
374 0     0 0 0 my ($self) = @_;
375              
376 0         0 return "0\r\n\r\n";
377             }
378              
379             sub serialize_continue {
380 1     1 0 805 my ($self) = @_;
381              
382 1         4 return "HTTP/1.1 100 Continue\r\n\r\n";
383             }
384              
385             sub serialize_trailers {
386 4     4 1 658 my ($self, $headers) = @_;
387              
388 4         9 my $trailers = '';
389 4         9 for my $header (@$headers) {
390 4         7 my ($name, $value) = @$header;
391 4         10 $name = _validate_header_name($name);
392 3         7 $value = _validate_header_value($value);
393 0         0 $trailers .= "$name: $value\r\n";
394             }
395 0         0 $trailers .= "\r\n";
396 0         0 return $trailers;
397             }
398              
399             sub format_date {
400 263     263 0 833 my ($self) = @_;
401              
402 263         573 my $now = time();
403 263 100       666 if ($now != $_cached_date_time) {
404 67         189 $_cached_date_time = $now;
405 67         545 my @days = qw(Sun Mon Tue Wed Thu Fri Sat);
406 67         547 my @months = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
407 67         385 my @gmt = gmtime($now);
408 67         817 $_cached_date = sprintf("%s, %02d %s %04d %02d:%02d:%02d GMT",
409             $days[$gmt[6]], $gmt[3], $months[$gmt[4]], $gmt[5] + 1900,
410             $gmt[2], $gmt[1], $gmt[0]);
411             }
412 263         909 return $_cached_date;
413             }
414              
415             =head2 parse_chunked_body
416              
417             my ($data, $bytes_consumed, $complete) = $proto->parse_chunked_body($buffer);
418              
419             Parses chunked Transfer-Encoding body from the buffer. Returns:
420             - $data: decoded body data (may be empty string)
421             - $bytes_consumed: number of bytes consumed from buffer
422             - $complete: 1 if final chunk (0-length) was seen, 0 otherwise
423              
424             Returns (undef, 0, 0) if more data is needed.
425              
426             =cut
427              
428             sub parse_chunked_body {
429 16     16 1 17753 my ($self, $buffer_ref) = @_;
430              
431 16 50       39 my $buffer = ref $buffer_ref ? $$buffer_ref : $buffer_ref;
432 16         25 my $data = '';
433 16         19 my $total_consumed = 0;
434 16         18 my $complete = 0;
435              
436 16         17 while (1) {
437             # Find chunk size line
438 25         45 my $crlf = index($buffer, "\r\n", $total_consumed);
439 25 50       42 last if $crlf < 0;
440              
441             # Parse chunk size (hex)
442 25         63 my $size_line = substr($buffer, $total_consumed, $crlf - $total_consumed);
443 25         43 $size_line =~ s/;.*//; # Remove chunk extensions
444 25         81 $size_line =~ s/^\s+|\s+$//g; # Trim whitespace
445              
446             # Validate chunk size is valid hex (RFC 7230 Section 4.1)
447 25 100 100     225 if ($size_line eq '' || $size_line !~ /^[0-9a-fA-F]+$/) {
448 4         22 return ({ error => 400, message => 'Invalid chunk size' }, 0, 0);
449             }
450              
451             # Reject obviously oversized chunk sizes before hex() conversion
452             # 7 hex digits = max 268MB, 8+ digits certainly exceeds any reasonable limit
453 21 100       36 if (length($size_line) > 7) {
454 1         8 return ({ error => 413, message => 'Chunk Too Large' }, 0, 0);
455             }
456              
457 20         31 my $chunk_size = hex($size_line);
458              
459             # Reject chunks exceeding max_chunk_size (DoS protection)
460 20 100       39 if ($chunk_size > $self->{max_chunk_size}) {
461 2         12 return ({ error => 413, message => 'Chunk Too Large' }, 0, 0);
462             }
463              
464             # Check if we have the full chunk + trailing CRLF
465 18         1404 my $chunk_start = $crlf + 2;
466 18         25 my $chunk_end = $chunk_start + $chunk_size + 2; # +2 for trailing CRLF
467              
468 18 100       59 if (length($buffer) < $chunk_end) {
469 1         2 last; # Need more data
470             }
471              
472             # Extract chunk data
473 17 100       24 if ($chunk_size > 0) {
474 9         21 $data .= substr($buffer, $chunk_start, $chunk_size);
475             }
476              
477 17         17 $total_consumed = $chunk_end;
478              
479             # Check for final chunk
480 17 100       29 if ($chunk_size == 0) {
481 8         10 $complete = 1;
482 8         16 last;
483             }
484             }
485              
486 9         31 return ($data, $total_consumed, $complete);
487             }
488              
489             1;
490              
491             __END__