| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package HTTP::MultiPartParser; | 
| 2 | 4 |  |  | 4 |  | 40442 | use strict; | 
|  | 4 |  |  |  |  | 7 |  | 
|  | 4 |  |  |  |  | 121 |  | 
| 3 | 4 |  |  | 4 |  | 20 | use warnings; | 
|  | 4 |  |  |  |  | 6 |  | 
|  | 4 |  |  |  |  | 183 |  | 
| 4 |  |  |  |  |  |  |  | 
| 5 |  |  |  |  |  |  | BEGIN { | 
| 6 | 4 |  |  | 4 |  | 96 | our $VERSION = '0.02'; | 
| 7 |  |  |  |  |  |  | } | 
| 8 |  |  |  |  |  |  |  | 
| 9 | 4 |  |  | 4 |  | 18 | use Carp         qw[]; | 
|  | 4 |  |  |  |  | 12 |  | 
|  | 4 |  |  |  |  | 85 |  | 
| 10 | 4 |  |  | 4 |  | 19 | use Scalar::Util qw[]; | 
|  | 4 |  |  |  |  | 4 |  | 
|  | 4 |  |  |  |  | 6447 |  | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | my $_mk_parser; | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | # RFC2046 | 
| 15 |  |  |  |  |  |  | my $ValidBoundary = qr<\A [0-9A-Za-z'()+_,-./:=?]+ \z>x; | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  | sub new { | 
| 18 | 27 |  |  | 27 | 1 | 166140 | my ($class, %params) = @_; | 
| 19 |  |  |  |  |  |  |  | 
| 20 | 27 |  |  |  |  | 137 | my $self = { | 
| 21 |  |  |  |  |  |  | on_error          => \&Carp::croak, | 
| 22 |  |  |  |  |  |  | max_header_size   => 32 * 1024, | 
| 23 |  |  |  |  |  |  | max_preamble_size => 32 * 1024, | 
| 24 |  |  |  |  |  |  | on_header_as      => 'lines', | 
| 25 |  |  |  |  |  |  | }; | 
| 26 |  |  |  |  |  |  |  | 
| 27 | 27 |  |  |  |  | 129 | while (my ($p, $v) = each %params) { | 
| 28 | 96 | 100 | 100 |  |  | 377 | if ($p eq 'boundary') { | 
|  |  | 50 | 66 |  |  |  |  | 
|  |  | 0 | 0 |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 29 | 27 | 50 | 33 |  |  | 346 | Carp::croak(q/Parameter 'boundary' is not a valid boundary value/) | 
|  |  |  | 33 |  |  |  |  | 
| 30 |  |  |  |  |  |  | unless ref \$v eq 'SCALAR' && defined $v && $v =~ $ValidBoundary; | 
| 31 | 27 |  |  |  |  | 97 | $self->{boundary} = $v; | 
| 32 |  |  |  |  |  |  | } | 
| 33 |  |  |  |  |  |  | elsif (   $p eq 'on_header' | 
| 34 |  |  |  |  |  |  | || $p eq 'on_body' | 
| 35 |  |  |  |  |  |  | || $p eq 'on_error') { | 
| 36 | 69 | 50 |  |  |  | 128 | Carp::croak(qq/Parameter '$p' is not a CODE reference/) | 
| 37 |  |  |  |  |  |  | unless ref $v eq 'CODE'; | 
| 38 | 69 |  |  |  |  | 170 | $self->{$p} = $v; | 
| 39 |  |  |  |  |  |  | } | 
| 40 |  |  |  |  |  |  | elsif (   $p eq 'max_header_size' | 
| 41 |  |  |  |  |  |  | || $p eq 'max_preamble_size') { | 
| 42 | 0 | 0 | 0 |  |  | 0 | Carp::croak(qq/Parameter '$p' is not a positive integer/) | 
|  |  |  | 0 |  |  |  |  | 
| 43 |  |  |  |  |  |  | unless ref \$v eq 'SCALAR' && defined $v && $v =~ /\A [1-9][0-9]* \z/x; | 
| 44 | 0 |  |  |  |  | 0 | $self->{$p} = $v; | 
| 45 |  |  |  |  |  |  | } | 
| 46 |  |  |  |  |  |  | elsif ($p eq 'on_header_as') { | 
| 47 | 0 | 0 | 0 |  |  | 0 | Carp::croak(q/Parameter 'on_header_as' must be either 'unparsed' or 'lines'/) | 
|  |  |  | 0 |  |  |  |  | 
| 48 |  |  |  |  |  |  | unless ref \$v eq 'SCALAR' && defined $v && $v =~ /\A (?: unparsed | lines) \z/x; | 
| 49 | 0 |  |  |  |  | 0 | $self->{on_header_as} = $v; | 
| 50 |  |  |  |  |  |  | } | 
| 51 |  |  |  |  |  |  | else { | 
| 52 | 0 |  |  |  |  | 0 | Carp::croak(qq/Unknown parameter '$p' passed to constructor/); | 
| 53 |  |  |  |  |  |  | } | 
| 54 |  |  |  |  |  |  | } | 
| 55 |  |  |  |  |  |  |  | 
| 56 | 27 |  |  |  |  | 50 | for my $p (qw(boundary on_header on_body)) { | 
| 57 |  |  |  |  |  |  | Carp::croak(qq/Mandatory parameter '$p' is missing/) | 
| 58 | 81 | 50 |  |  |  | 152 | unless exists $self->{$p}; | 
| 59 |  |  |  |  |  |  | } | 
| 60 |  |  |  |  |  |  |  | 
| 61 | 27 |  |  |  |  | 43 | bless $self, $class; | 
| 62 | 27 |  |  |  |  | 70 | $self->{parser} = $_mk_parser->($self); | 
| 63 | 27 |  |  |  |  | 74 | return $self; | 
| 64 |  |  |  |  |  |  | } | 
| 65 |  |  |  |  |  |  |  | 
| 66 |  |  |  |  |  |  | sub parse { | 
| 67 | 38 | 50 |  | 38 | 1 | 775 | @_ == 2 || Carp::croak(q/Usage: $parser->parse($octets)/); | 
| 68 | 38 |  |  |  |  | 76 | return $_[0]->{parser}->($_[1]); | 
| 69 |  |  |  |  |  |  | } | 
| 70 |  |  |  |  |  |  |  | 
| 71 |  |  |  |  |  |  | sub finish { | 
| 72 | 27 | 50 |  | 27 | 1 | 158 | @_ == 1 || Carp::croak(q/Usage: $parser->finish()/); | 
| 73 | 27 |  |  |  |  | 63 | return $_[0]->{parser}->('', 1); | 
| 74 |  |  |  |  |  |  | } | 
| 75 |  |  |  |  |  |  |  | 
| 76 |  |  |  |  |  |  | sub reset { | 
| 77 | 0 | 0 |  | 0 | 1 |  | @_ == 1 || Carp::croak(q/Usage: $parser->reset()/); | 
| 78 | 0 |  |  |  |  |  | $_[0]->{parser}  = $_mk_parser->($_[0]); | 
| 79 | 0 |  |  |  |  |  | $_[0]->{aborted} = !!0; | 
| 80 |  |  |  |  |  |  | } | 
| 81 |  |  |  |  |  |  |  | 
| 82 |  |  |  |  |  |  | sub is_aborted { | 
| 83 | 0 | 0 |  | 0 | 1 |  | @_ == 1 || Carp::croak(q/Usage: $parser->is_aborted()/); | 
| 84 | 0 |  |  |  |  |  | return $_[0]->{aborted}; | 
| 85 |  |  |  |  |  |  | } | 
| 86 |  |  |  |  |  |  |  | 
| 87 |  |  |  |  |  |  | sub CRLF  () { "\x0D\x0A" } | 
| 88 |  |  |  |  |  |  | sub TRUE  () { !!1 } | 
| 89 |  |  |  |  |  |  | sub FALSE () { !!0 } | 
| 90 |  |  |  |  |  |  |  | 
| 91 |  |  |  |  |  |  | sub STATE_PREAMBLE () { 1 } | 
| 92 |  |  |  |  |  |  | sub STATE_BOUNDARY () { 2 } | 
| 93 |  |  |  |  |  |  | sub STATE_HEADER   () { 3 } | 
| 94 |  |  |  |  |  |  | sub STATE_BODY     () { 4 } | 
| 95 |  |  |  |  |  |  | sub STATE_EPILOGUE () { 5 } | 
| 96 |  |  |  |  |  |  |  | 
| 97 |  |  |  |  |  |  | $_mk_parser = sub { | 
| 98 |  |  |  |  |  |  | Scalar::Util::weaken(my $self = $_[0]); | 
| 99 |  |  |  |  |  |  |  | 
| 100 |  |  |  |  |  |  | # RFC 2616 3.7.2 Multipart Types | 
| 101 |  |  |  |  |  |  | # The message body is itself a protocol element and MUST therefore use only | 
| 102 |  |  |  |  |  |  | # CRLF to represent line breaks between body-parts. | 
| 103 |  |  |  |  |  |  | my $boundary           = $self->{boundary}; | 
| 104 |  |  |  |  |  |  | my $boundary_preamble  =        '--' . $boundary; | 
| 105 |  |  |  |  |  |  | my $boundary_delimiter = CRLF . '--' . $boundary; | 
| 106 |  |  |  |  |  |  |  | 
| 107 |  |  |  |  |  |  | my $chunk   = ''; | 
| 108 |  |  |  |  |  |  | my $buffer  = ''; | 
| 109 |  |  |  |  |  |  | my $state   = STATE_PREAMBLE; | 
| 110 |  |  |  |  |  |  | my $finish  = FALSE; | 
| 111 |  |  |  |  |  |  | my $aborted = FALSE; | 
| 112 |  |  |  |  |  |  |  | 
| 113 |  |  |  |  |  |  | my $on_header = $self->{on_header}; | 
| 114 |  |  |  |  |  |  | my $on_body   = $self->{on_body}; | 
| 115 |  |  |  |  |  |  | my $on_error  = sub { | 
| 116 |  |  |  |  |  |  | $aborted = $self->{aborted} = TRUE; | 
| 117 |  |  |  |  |  |  | goto $self->{on_error}; | 
| 118 |  |  |  |  |  |  | }; | 
| 119 |  |  |  |  |  |  |  | 
| 120 |  |  |  |  |  |  | if ($self->{on_header_as} eq 'lines') { | 
| 121 |  |  |  |  |  |  | $on_header = sub { | 
| 122 |  |  |  |  |  |  | my @headers; | 
| 123 |  |  |  |  |  |  | for (split /\x0D\x0A/, $_[0]) { | 
| 124 |  |  |  |  |  |  | if (/\A [^\x00-\x1F\x7F:]+ : /x) { | 
| 125 |  |  |  |  |  |  | push @headers, $_; | 
| 126 |  |  |  |  |  |  | } | 
| 127 |  |  |  |  |  |  | elsif (s/\A [\x09\x20]+ //x) { | 
| 128 |  |  |  |  |  |  | if (!@headers) { | 
| 129 |  |  |  |  |  |  | $on_error->(q/Continuation line seen before first header/); | 
| 130 |  |  |  |  |  |  | return; | 
| 131 |  |  |  |  |  |  | } | 
| 132 |  |  |  |  |  |  | next unless length; | 
| 133 |  |  |  |  |  |  | $headers[-1] .= ' ' unless $headers[-1] =~ /[\x09\x20]\z/; | 
| 134 |  |  |  |  |  |  | $headers[-1] .= $_; | 
| 135 |  |  |  |  |  |  | } | 
| 136 |  |  |  |  |  |  | else { | 
| 137 |  |  |  |  |  |  | $on_error->(q/Malformed header line/); | 
| 138 |  |  |  |  |  |  | return; | 
| 139 |  |  |  |  |  |  | } | 
| 140 |  |  |  |  |  |  | } | 
| 141 |  |  |  |  |  |  | $self->{on_header}->(\@headers); | 
| 142 |  |  |  |  |  |  | }; | 
| 143 |  |  |  |  |  |  | } | 
| 144 |  |  |  |  |  |  |  | 
| 145 |  |  |  |  |  |  | return sub { | 
| 146 |  |  |  |  |  |  | $buffer .= $_[0]; | 
| 147 |  |  |  |  |  |  | $finish  = $_[1]; | 
| 148 |  |  |  |  |  |  |  | 
| 149 |  |  |  |  |  |  | while (!$aborted) { | 
| 150 |  |  |  |  |  |  | if ($state == STATE_PREAMBLE) { | 
| 151 |  |  |  |  |  |  | my $pos = index($buffer, $boundary_preamble); | 
| 152 |  |  |  |  |  |  | if ($pos < 0) { | 
| 153 |  |  |  |  |  |  | if (length $buffer > $self->{max_preamble_size}) { | 
| 154 |  |  |  |  |  |  | $on_error->(q/Size of preamble exceeds maximum allowed/); | 
| 155 |  |  |  |  |  |  | last; | 
| 156 |  |  |  |  |  |  | } | 
| 157 |  |  |  |  |  |  | $finish && $on_error->(q/End of stream encountered while parsing preamble/); | 
| 158 |  |  |  |  |  |  | last; | 
| 159 |  |  |  |  |  |  | } | 
| 160 |  |  |  |  |  |  | substr($buffer, 0, $pos + 2 + length $boundary, ''); | 
| 161 |  |  |  |  |  |  | $state = STATE_BOUNDARY; | 
| 162 |  |  |  |  |  |  | } | 
| 163 |  |  |  |  |  |  | elsif ($state == STATE_BOUNDARY) { | 
| 164 |  |  |  |  |  |  | if (length $buffer < 2) { | 
| 165 |  |  |  |  |  |  | $finish && $on_error->(q/End of stream encountered while parsing boundary/); | 
| 166 |  |  |  |  |  |  | last; | 
| 167 |  |  |  |  |  |  | } | 
| 168 |  |  |  |  |  |  | elsif (substr($buffer, 0, 2) eq CRLF) { | 
| 169 |  |  |  |  |  |  | substr($buffer, 0, 2, ''); | 
| 170 |  |  |  |  |  |  | $state = STATE_HEADER; | 
| 171 |  |  |  |  |  |  | } | 
| 172 |  |  |  |  |  |  | elsif (substr($buffer, 0, 2) eq '--') { | 
| 173 |  |  |  |  |  |  | if (length $buffer < 4) { | 
| 174 |  |  |  |  |  |  | $finish && $on_error->(q/End of stream encountered while parsing closing boundary/); | 
| 175 |  |  |  |  |  |  | last; | 
| 176 |  |  |  |  |  |  | } | 
| 177 |  |  |  |  |  |  | elsif (substr($buffer, 2, 2) eq CRLF) { | 
| 178 |  |  |  |  |  |  | substr($buffer, 0, 4, ''); | 
| 179 |  |  |  |  |  |  | $state = STATE_EPILOGUE; | 
| 180 |  |  |  |  |  |  | } | 
| 181 |  |  |  |  |  |  | else { | 
| 182 |  |  |  |  |  |  | $on_error->(q/Closing boundary does not terminate with CRLF/); | 
| 183 |  |  |  |  |  |  | last; | 
| 184 |  |  |  |  |  |  | } | 
| 185 |  |  |  |  |  |  | } | 
| 186 |  |  |  |  |  |  | else { | 
| 187 |  |  |  |  |  |  | $on_error->(q/Boundary does not terminate with CRLF or hyphens/); | 
| 188 |  |  |  |  |  |  | last; | 
| 189 |  |  |  |  |  |  | } | 
| 190 |  |  |  |  |  |  | } | 
| 191 |  |  |  |  |  |  | elsif ($state == STATE_HEADER) { | 
| 192 |  |  |  |  |  |  | my $pos = index($buffer, CRLF . CRLF); | 
| 193 |  |  |  |  |  |  | if ($pos < 0) { | 
| 194 |  |  |  |  |  |  | if (length $buffer > $self->{max_header_size}) { | 
| 195 |  |  |  |  |  |  | $on_error->(q/Size of part header exceeds maximum allowed/); | 
| 196 |  |  |  |  |  |  | last; | 
| 197 |  |  |  |  |  |  | } | 
| 198 |  |  |  |  |  |  | $finish && $on_error->(q/End of stream encountered while parsing part header/); | 
| 199 |  |  |  |  |  |  | last; | 
| 200 |  |  |  |  |  |  | } | 
| 201 |  |  |  |  |  |  |  | 
| 202 |  |  |  |  |  |  | $chunk = substr($buffer, 0, $pos + 4, ''); | 
| 203 |  |  |  |  |  |  | $state = STATE_BODY; | 
| 204 |  |  |  |  |  |  | $on_header->($chunk); | 
| 205 |  |  |  |  |  |  | } | 
| 206 |  |  |  |  |  |  | elsif ($state == STATE_BODY) { | 
| 207 |  |  |  |  |  |  | my $take = index($buffer, $boundary_delimiter); | 
| 208 |  |  |  |  |  |  | if ($take < 0) { | 
| 209 |  |  |  |  |  |  | $take = length($buffer) - (6 + length $boundary); | 
| 210 |  |  |  |  |  |  | if ($take <= 0) { | 
| 211 |  |  |  |  |  |  | $finish && $on_error->(q/End of stream encountered while parsing part body/); | 
| 212 |  |  |  |  |  |  | last; | 
| 213 |  |  |  |  |  |  | } | 
| 214 |  |  |  |  |  |  | } | 
| 215 |  |  |  |  |  |  | else { | 
| 216 |  |  |  |  |  |  | $state = STATE_BOUNDARY; | 
| 217 |  |  |  |  |  |  | } | 
| 218 |  |  |  |  |  |  |  | 
| 219 |  |  |  |  |  |  | $chunk = substr($buffer, 0, $take, ''); | 
| 220 |  |  |  |  |  |  |  | 
| 221 |  |  |  |  |  |  | if ($state == STATE_BOUNDARY) { | 
| 222 |  |  |  |  |  |  | substr($buffer, 0, 4 + length $boundary, ''); | 
| 223 |  |  |  |  |  |  | } | 
| 224 |  |  |  |  |  |  |  | 
| 225 |  |  |  |  |  |  | $on_body->($chunk, $state == STATE_BOUNDARY); | 
| 226 |  |  |  |  |  |  | } | 
| 227 |  |  |  |  |  |  | # RFC 2616 3.7.2 Multipart Types | 
| 228 |  |  |  |  |  |  | # Unlike in RFC 2046, the epilogue of any multipart message MUST be | 
| 229 |  |  |  |  |  |  | # empty; HTTP applications MUST NOT transmit the epilogue (even if the | 
| 230 |  |  |  |  |  |  | # original multipart contains an epilogue). These restrictions exist in | 
| 231 |  |  |  |  |  |  | # order to preserve the self-delimiting nature of a multipart message- | 
| 232 |  |  |  |  |  |  | # body, wherein the "end" of the message-body is indicated by the | 
| 233 |  |  |  |  |  |  | # ending multipart boundary. | 
| 234 |  |  |  |  |  |  | elsif ($state == STATE_EPILOGUE) { | 
| 235 |  |  |  |  |  |  | (length $buffer == 0) | 
| 236 |  |  |  |  |  |  | || $on_error->(q/Nonempty epilogue/); | 
| 237 |  |  |  |  |  |  | last; | 
| 238 |  |  |  |  |  |  | } | 
| 239 |  |  |  |  |  |  | else { | 
| 240 |  |  |  |  |  |  | Carp::croak(qq/panic: unknown state: $state/); | 
| 241 |  |  |  |  |  |  | } | 
| 242 |  |  |  |  |  |  | } | 
| 243 |  |  |  |  |  |  | return !$aborted; | 
| 244 |  |  |  |  |  |  | }; | 
| 245 |  |  |  |  |  |  | }; | 
| 246 |  |  |  |  |  |  |  | 
| 247 |  |  |  |  |  |  | 1; |