File Coverage

blib/lib/MIME/Parser/Reader.pm
Criterion Covered Total %
statement 83 122 68.0
branch 23 56 41.0
condition 17 27 62.9
subroutine 12 12 100.0
pod 0 11 0.0
total 135 228 59.2


line stmt bran cond sub pod time code
1             package MIME::Parser::Reader;
2              
3             =head1 NAME
4              
5             MIME::Parser::Reader - a line-oriented reader for a MIME::Parser
6              
7              
8             =head1 SYNOPSIS
9              
10             This module is used internally by MIME::Parser; you probably
11             don't need to be looking at it at all. But just in case...
12              
13             ### Create a top-level reader, where chunks end at EOF:
14             $rdr = MIME::Parser::Reader->new();
15              
16             ### Spawn a child reader, where chunks also end at a boundary:
17             $subrdr = $rdr->spawn->add_boundary($bound);
18              
19             ### Spawn a child reader, where chunks also end at a given string:
20             $subrdr = $rdr->spawn->add_terminator($string);
21              
22             ### Read until boundary or terminator:
23             $subrdr->read_chunk($in, $out);
24              
25              
26             =head1 DESCRIPTION
27              
28             A line-oriented reader which can deal with virtual end-of-stream
29             defined by a collection of boundaries.
30              
31             B this is a private class solely for use by MIME::Parser.
32             This class has no official public interface
33              
34             =cut
35              
36 17     17   63 use strict;
  17         25  
  17         19823  
37              
38             ### All possible end-of-line sequences.
39             ### Note that "" is included because last line of stream may have no newline!
40             my @EOLs = ("", "\r", "\n", "\r\n", "\n\r");
41              
42             ### Long line:
43             my $LONGLINE = ' ' x 1000;
44              
45              
46             #------------------------------
47             #
48             # new
49             #
50             # I
51             # Construct an empty (top-level) reader.
52             #
53             sub new {
54 54     54 0 97 my ($class) = @_;
55 54         78 my $eos;
56 54         430 return bless {
57             Bounds => [],
58             BH => {},
59             TH => {},
60             EOS => \$eos,
61             }, $class;
62             }
63              
64             #------------------------------
65             #
66             # spawn
67             #
68             # I
69             # Return a reader which is mostly a duplicate, except that the EOS
70             # accumulator is shared.
71             #
72             sub spawn {
73 212     212 0 207 my $self = shift;
74 212         549 my $dup = bless {}, ref($self);
75 212         188 $dup->{Bounds} = [ @{$self->{Bounds}} ]; ### deep copy
  212         468  
76 212         186 $dup->{BH} = { %{$self->{BH}} }; ### deep copy
  212         510  
77 212         210 $dup->{TH} = { %{$self->{TH}} }; ### deep copy
  212         299  
78 212         263 $dup->{EOS} = $self->{EOS}; ### shallow copy; same ref!
79 212         335 $dup;
80             }
81              
82             #------------------------------
83             #
84             # add_boundary BOUND
85             #
86             # I
87             # Let BOUND be the new innermost boundary. Returns self.
88             #
89             sub add_boundary {
90 40     40 0 58 my ($self, $bound) = @_;
91 40         50 unshift @{$self->{Bounds}}, $bound; ### now at index 0
  40         103  
92 40         148 $self->{BH}{"--$bound"} = "DELIM $bound";
93 40         130 $self->{BH}{"--$bound--"} = "CLOSE $bound";
94 40         82 $self;
95             }
96              
97             #------------------------------
98             #
99             # add_terminator LINE
100             #
101             # I
102             # Let LINE be another terminator. Returns self.
103             #
104             sub add_terminator {
105 344     344 0 348 my ($self, $line) = @_;
106 344         451 foreach (@EOLs) {
107 1720         2763 $self->{TH}{"$line$_"} = "DONE $line";
108             }
109 344         416 $self;
110             }
111              
112             #------------------------------
113             #
114             # has_bounds
115             #
116             # I
117             # Are there boundaries to contend with?
118             #
119             sub has_bounds {
120 214     214 0 169 scalar(@{shift->{Bounds}});
  214         629  
121             }
122              
123             #------------------------------
124             #
125             # depth
126             #
127             # I
128             # How many levels are there?
129             #
130             sub depth {
131 40     40 0 43 scalar(@{shift->{Bounds}});
  40         148  
132             }
133              
134             #------------------------------
135             #
136             # eos [EOS]
137             #
138             # I
139             # Return the last end-of-stream token seen.
140             # See read_chunk() for what these might be.
141             #
142             sub eos {
143 603     603 0 480 my $self = shift;
144 603 100       854 ${$self->{EOS}} = $_[0] if @_;
  20         36  
145 603         448 ${$self->{EOS}};
  603         933  
146             }
147              
148             #------------------------------
149             #
150             # eos_type [EOSTOKEN]
151             #
152             # I
153             # Return the high-level type of the given token (defaults to our token).
154             #
155             # DELIM saw an innermost boundary like --xyz
156             # CLOSE saw an innermost boundary like --xyz--
157             # DONE callback returned false
158             # EOF end of file
159             # EXT saw boundary of some higher-level
160             #
161             sub eos_type {
162 583     583 0 602 my ($self, $eos) = @_;
163 583 50       1218 $eos = $self->eos if (@_ == 1);
164              
165 583 100       2149 if ($eos =~ /^(DONE|EOF)/) {
    50          
166 342         1165 return $1;
167             }
168             elsif ($eos =~ /^(DELIM|CLOSE) (.*)$/) {
169 241 50       1330 return (($2 eq $self->{Bounds}[0]) ? $1 : 'EXT');
170             }
171             else {
172 0         0 die("internal error: unable to classify boundary token ($eos)");
173             }
174             }
175              
176             #------------------------------
177             #
178             # native_handle HANDLE
179             #
180             # I
181             # Can we do native i/o on HANDLE? If true, returns the handle
182             # that will respond to native I/O calls; else, returns undef.
183             #
184             sub native_handle {
185 697     697 0 1528 my $fh = shift;
186 697 100 100     3190 return $fh if ($fh->isa('IO::File') || $fh->isa('IO::Handle'));
187 348 50       815 return $fh if (ref $fh eq 'GLOB');
188 0         0 undef;
189             }
190              
191             #------------------------------
192             #
193             # read_chunk INHANDLE, OUTHANDLE
194             #
195             # I
196             # Get lines until end-of-stream.
197             # Returns the terminating-condition token:
198             #
199             # DELIM xyz saw boundary line "--xyz"
200             # CLOSE xyz saw boundary line "--xyz--"
201             # DONE xyz saw terminator line "xyz"
202             # EOF end of file
203              
204             # Parse up to (and including) the boundary, and dump output.
205             # Follows the RFC 2046 specification, that the CRLF immediately preceding
206             # the boundary is part of the boundary, NOT part of the input!
207             #
208             # NOTE: while parsing bodies, we take care to remember the EXACT end-of-line
209             # sequence. This is because we *may* be handling 'binary' encoded data, and
210             # in that case we can't just massage \r\n into \n! Don't worry... if the
211             # data is styled as '7bit' or '8bit', the "decoder" will massage the CRLF
212             # for us. For now, we're just trying to chop up the data stream.
213              
214             # NBK - Oct 12, 1999
215             # The CRLF at the end of the current line is considered part
216             # of the boundary. I buffer the current line and output the
217             # last. I strip the last CRLF when I hit the boundary.
218              
219             sub read_chunk {
220 347     347 0 414 my ($self, $in, $out, $keep_newline, $normalize_newlines) = @_;
221              
222             # If we're parsing a preamble or epilogue, we need to keep the blank line
223             # that precedes the boundary line.
224 347   100     1032 $keep_newline ||= 0;
225              
226 347   100     661 $normalize_newlines ||= 0;
227             ### Init:
228 347         291 my %bh = %{$self->{BH}};
  347         967  
229 347         327 my %th = %{$self->{TH}}; my $thx = keys %th;
  347         986  
  347         434  
230 347         353 local $_ = $LONGLINE;
231 347         294 my $maybe;
232 347         321 my $last = '';
233 347         328 my $eos = '';
234              
235             ### Determine types:
236 347         481 my $n_in = native_handle($in);
237 347         489 my $n_out = native_handle($out);
238              
239             ### Handle efficiently by type:
240 347 50       581 if ($n_in) {
241 347 50       519 if ($n_out) { ### native input, native output [fastest]
242 347         2109 while (<$n_in>) {
243             # Normalize line ending
244 3347 100       4954 $_ =~ s/(?:\n\r|\r\n|\r)$/\n/ if $normalize_newlines;
245 3347 100       4429 if (substr($_, 0, 2) eq '--') {
246 159         799 ($maybe = $_) =~ s/[ \t\r\n]+\Z//;
247 159 100       349 $bh{$maybe} and do { $eos = $bh{$maybe}; last };
  144         177  
  144         155  
248             }
249 3203 100 66     4434 $thx and $th{$_} and do { $eos = $th{$_}; last };
  169         211  
  169         285  
250 3034         3022 print $n_out $last; $last = $_;
  3034         4759  
251             }
252             }
253             else { ### native input, OO output [slower]
254 0         0 while (<$n_in>) {
255             # Normalize line ending
256 0 0       0 $_ =~ s/(?:\n\r|\r\n|\r)$/\n/ if $normalize_newlines;
257 0 0       0 if (substr($_, 0, 2) eq '--') {
258 0         0 ($maybe = $_) =~ s/[ \t\r\n]+\Z//;
259 0 0       0 $bh{$maybe} and do { $eos = $bh{$maybe}; last };
  0         0  
  0         0  
260             }
261 0 0 0     0 $thx and $th{$_} and do { $eos = $th{$_}; last };
  0         0  
  0         0  
262 0         0 $out->print($last); $last = $_;
  0         0  
263             }
264             }
265             }
266             else {
267 0 0       0 if ($n_out) { ### OO input, native output [even slower]
268 0         0 while (defined($_ = $in->getline)) {
269             # Normalize line ending
270 0 0       0 $_ =~ s/(?:\n\r|\r\n|\r)$/\n/ if $normalize_newlines;
271 0 0       0 if (substr($_, 0, 2) eq '--') {
272 0         0 ($maybe = $_) =~ s/[ \t\r\n]+\Z//;
273 0 0       0 $bh{$maybe} and do { $eos = $bh{$maybe}; last };
  0         0  
  0         0  
274             }
275 0 0 0     0 $thx and $th{$_} and do { $eos = $th{$_}; last };
  0         0  
  0         0  
276 0         0 print $n_out $last; $last = $_;
  0         0  
277             }
278             }
279             else { ### OO input, OO output [slowest]
280 0         0 while (defined($_ = $in->getline)) {
281             # Normalize line ending
282 0 0       0 $_ =~ s/(?:\n\r|\r\n|\r)$/\n/ if $normalize_newlines;
283 0 0       0 if (substr($_, 0, 2) eq '--') {
284 0         0 ($maybe = $_) =~ s/[ \t\r\n]+\Z//;
285 0 0       0 $bh{$maybe} and do { $eos = $bh{$maybe}; last };
  0         0  
  0         0  
286             }
287 0 0 0     0 $thx and $th{$_} and do { $eos = $th{$_}; last };
  0         0  
  0         0  
288 0         0 $out->print($last); $last = $_;
  0         0  
289             }
290             }
291             }
292              
293             # Write out last held line, removing terminating CRLF if ended on bound,
294             # unless the line consists only of CRLF and we're wanting to keep the
295             # preceding blank line (as when parsing a preamble)
296 347 100 100     1677 $last =~ s/[\r\n]+\Z// if ($eos =~ /^(DELIM|CLOSE)/ && !($keep_newline && $last =~ m/^[\r\n]\z/));
      100        
297 347         1116 $out->print($last);
298              
299             ### Save and return what we finished on:
300 347   100     1817 ${$self->{EOS}} = ($eos || 'EOF');
  347         524  
301 347         1053 1;
302             }
303              
304             #------------------------------
305             #
306             # read_lines INHANDLE, \@OUTLINES
307             #
308             # I
309             # Read lines into the given array.
310             #
311             sub read_lines {
312 38     38 0 53 my ($self, $in, $outlines) = @_;
313              
314 38         58 my $data = '';
315 38 50       419 open(my $fh, '>', \$data) or die $!;
316 38         87 $self->read_chunk($in, $fh);
317 38         116 @$outlines = split(/^/, $data);
318 38         71 close $fh;
319              
320 38         102 1;
321             }
322              
323             1;
324             __END__