File Coverage

blib/lib/MIME/Parser/Reader.pm
Criterion Covered Total %
statement 83 122 68.0
branch 23 56 41.0
condition 17 27 62.9
subroutine 12 12 100.0
pod 0 11 0.0
total 135 228 59.2


line stmt bran cond sub pod time code
1             package MIME::Parser::Reader;
2              
3             =head1 NAME
4              
5             MIME::Parser::Reader - a line-oriented reader for a MIME::Parser
6              
7              
8             =head1 SYNOPSIS
9              
10             This module is used internally by MIME::Parser; you probably
11             don't need to be looking at it at all. But just in case...
12              
13             ### Create a top-level reader, where chunks end at EOF:
14             $rdr = MIME::Parser::Reader->new();
15              
16             ### Spawn a child reader, where chunks also end at a boundary:
17             $subrdr = $rdr->spawn->add_boundary($bound);
18              
19             ### Spawn a child reader, where chunks also end at a given string:
20             $subrdr = $rdr->spawn->add_terminator($string);
21              
22             ### Read until boundary or terminator:
23             $subrdr->read_chunk($in, $out);
24              
25              
26             =head1 DESCRIPTION
27              
28             A line-oriented reader which can deal with virtual end-of-stream
29             defined by a collection of boundaries.
30              
31             B this is a private class solely for use by MIME::Parser.
32             This class has no official public interface
33              
34             =cut
35              
36 15     15   54 use strict;
  15         17  
  15         17172  
37              
38             ### All possible end-of-line sequences.
39             ### Note that "" is included because last line of stream may have no newline!
40             my @EOLs = ("", "\r", "\n", "\r\n", "\n\r");
41              
42             ### Long line:
43             my $LONGLINE = ' ' x 1000;
44              
45              
46             #------------------------------
47             #
48             # new
49             #
50             # I
51             # Construct an empty (top-level) reader.
52             #
53             sub new {
54 53     53 0 77 my ($class) = @_;
55 53         52 my $eos;
56 53         389 return bless {
57             Bounds => [],
58             BH => {},
59             TH => {},
60             EOS => \$eos,
61             }, $class;
62             }
63              
64             #------------------------------
65             #
66             # spawn
67             #
68             # I
69             # Return a reader which is mostly a duplicate, except that the EOS
70             # accumulator is shared.
71             #
72             sub spawn {
73 208     208 0 191 my $self = shift;
74 208         443 my $dup = bless {}, ref($self);
75 208         220 $dup->{Bounds} = [ @{$self->{Bounds}} ]; ### deep copy
  208         424  
76 208         164 $dup->{BH} = { %{$self->{BH}} }; ### deep copy
  208         550  
77 208         197 $dup->{TH} = { %{$self->{TH}} }; ### deep copy
  208         318  
78 208         222 $dup->{EOS} = $self->{EOS}; ### shallow copy; same ref!
79 208         322 $dup;
80             }
81              
82             #------------------------------
83             #
84             # add_boundary BOUND
85             #
86             # I
87             # Let BOUND be the new innermost boundary. Returns self.
88             #
89             sub add_boundary {
90 39     39 0 49 my ($self, $bound) = @_;
91 39         39 unshift @{$self->{Bounds}}, $bound; ### now at index 0
  39         91  
92 39         141 $self->{BH}{"--$bound"} = "DELIM $bound";
93 39         116 $self->{BH}{"--$bound--"} = "CLOSE $bound";
94 39         67 $self;
95             }
96              
97             #------------------------------
98             #
99             # add_terminator LINE
100             #
101             # I
102             # Let LINE be another terminator. Returns self.
103             #
104             sub add_terminator {
105 338     338 0 305 my ($self, $line) = @_;
106 338         416 foreach (@EOLs) {
107 1690         2533 $self->{TH}{"$line$_"} = "DONE $line";
108             }
109 338         422 $self;
110             }
111              
112             #------------------------------
113             #
114             # has_bounds
115             #
116             # I
117             # Are there boundaries to contend with?
118             #
119             sub has_bounds {
120 210     210 0 148 scalar(@{shift->{Bounds}});
  210         668  
121             }
122              
123             #------------------------------
124             #
125             # depth
126             #
127             # I
128             # How many levels are there?
129             #
130             sub depth {
131 39     39 0 40 scalar(@{shift->{Bounds}});
  39         126  
132             }
133              
134             #------------------------------
135             #
136             # eos [EOS]
137             #
138             # I
139             # Return the last end-of-stream token seen.
140             # See read_chunk() for what these might be.
141             #
142             sub eos {
143 592     592 0 480 my $self = shift;
144 592 100       829 ${$self->{EOS}} = $_[0] if @_;
  20         38  
145 592         417 ${$self->{EOS}};
  592         822  
146             }
147              
148             #------------------------------
149             #
150             # eos_type [EOSTOKEN]
151             #
152             # I
153             # Return the high-level type of the given token (defaults to our token).
154             #
155             # DELIM saw an innermost boundary like --xyz
156             # CLOSE saw an innermost boundary like --xyz--
157             # DONE callback returned false
158             # EOF end of file
159             # EXT saw boundary of some higher-level
160             #
161             sub eos_type {
162 572     572 0 536 my ($self, $eos) = @_;
163 572 50       1248 $eos = $self->eos if (@_ == 1);
164              
165 572 100       1829 if ($eos =~ /^(DONE|EOF)/) {
    50          
166 336         1042 return $1;
167             }
168             elsif ($eos =~ /^(DELIM|CLOSE) (.*)$/) {
169 236 50       1251 return (($2 eq $self->{Bounds}[0]) ? $1 : 'EXT');
170             }
171             else {
172 0         0 die("internal error: unable to classify boundary token ($eos)");
173             }
174             }
175              
176             #------------------------------
177             #
178             # native_handle HANDLE
179             #
180             # I
181             # Can we do native i/o on HANDLE? If true, returns the handle
182             # that will respond to native I/O calls; else, returns undef.
183             #
184             sub native_handle {
185 683     683 0 1950 my $fh = shift;
186 683 100 100     3032 return $fh if ($fh->isa('IO::File') || $fh->isa('IO::Handle'));
187 343 50       1552 return $fh if (ref $fh eq 'GLOB');
188 0         0 undef;
189             }
190              
191             #------------------------------
192             #
193             # read_chunk INHANDLE, OUTHANDLE
194             #
195             # I
196             # Get lines until end-of-stream.
197             # Returns the terminating-condition token:
198             #
199             # DELIM xyz saw boundary line "--xyz"
200             # CLOSE xyz saw boundary line "--xyz--"
201             # DONE xyz saw terminator line "xyz"
202             # EOF end of file
203              
204             # Parse up to (and including) the boundary, and dump output.
205             # Follows the RFC 2046 specification, that the CRLF immediately preceding
206             # the boundary is part of the boundary, NOT part of the input!
207             #
208             # NOTE: while parsing bodies, we take care to remember the EXACT end-of-line
209             # sequence. This is because we *may* be handling 'binary' encoded data, and
210             # in that case we can't just massage \r\n into \n! Don't worry... if the
211             # data is styled as '7bit' or '8bit', the "decoder" will massage the CRLF
212             # for us. For now, we're just trying to chop up the data stream.
213              
214             # NBK - Oct 12, 1999
215             # The CRLF at the end of the current line is considered part
216             # of the boundary. I buffer the current line and output the
217             # last. I strip the last CRLF when I hit the boundary.
218              
219             sub read_chunk {
220 340     340 0 372 my ($self, $in, $out, $keep_newline, $normalize_newlines) = @_;
221              
222             # If we're parsing a preamble or epilogue, we need to keep the blank line
223             # that precedes the boundary line.
224 340   100     913 $keep_newline ||= 0;
225              
226 340   100     670 $normalize_newlines ||= 0;
227             ### Init:
228 340         237 my %bh = %{$self->{BH}};
  340         946  
229 340         285 my %th = %{$self->{TH}}; my $thx = keys %th;
  340         994  
  340         400  
230 340         408 local $_ = $LONGLINE;
231 340         227 my $maybe;
232 340         288 my $last = '';
233 340         235 my $eos = '';
234              
235             ### Determine types:
236 340         525 my $n_in = native_handle($in);
237 340         480 my $n_out = native_handle($out);
238              
239             ### Handle efficiently by type:
240 340 50       501 if ($n_in) {
241 340 50       578 if ($n_out) { ### native input, native output [fastest]
242 340         1785 while (<$n_in>) {
243             # Normalize line ending
244 3326 100       4698 $_ =~ s/(?:\n\r|\r\n|\r)$/\n/ if $normalize_newlines;
245 3326 100       4145 if (substr($_, 0, 2) eq '--') {
246 156         757 ($maybe = $_) =~ s/[ \t\r\n]+\Z//;
247 156 100       582 $bh{$maybe} and do { $eos = $bh{$maybe}; last };
  141         158  
  141         176  
248             }
249 3185 100 66     4411 $thx and $th{$_} and do { $eos = $th{$_}; last };
  166         180  
  166         209  
250 3019         2616 print $n_out $last; $last = $_;
  3019         4394  
251             }
252             }
253             else { ### native input, OO output [slower]
254 0         0 while (<$n_in>) {
255             # Normalize line ending
256 0 0       0 $_ =~ s/(?:\n\r|\r\n|\r)$/\n/ if $normalize_newlines;
257 0 0       0 if (substr($_, 0, 2) eq '--') {
258 0         0 ($maybe = $_) =~ s/[ \t\r\n]+\Z//;
259 0 0       0 $bh{$maybe} and do { $eos = $bh{$maybe}; last };
  0         0  
  0         0  
260             }
261 0 0 0     0 $thx and $th{$_} and do { $eos = $th{$_}; last };
  0         0  
  0         0  
262 0         0 $out->print($last); $last = $_;
  0         0  
263             }
264             }
265             }
266             else {
267 0 0       0 if ($n_out) { ### OO input, native output [even slower]
268 0         0 while (defined($_ = $in->getline)) {
269             # Normalize line ending
270 0 0       0 $_ =~ s/(?:\n\r|\r\n|\r)$/\n/ if $normalize_newlines;
271 0 0       0 if (substr($_, 0, 2) eq '--') {
272 0         0 ($maybe = $_) =~ s/[ \t\r\n]+\Z//;
273 0 0       0 $bh{$maybe} and do { $eos = $bh{$maybe}; last };
  0         0  
  0         0  
274             }
275 0 0 0     0 $thx and $th{$_} and do { $eos = $th{$_}; last };
  0         0  
  0         0  
276 0         0 print $n_out $last; $last = $_;
  0         0  
277             }
278             }
279             else { ### OO input, OO output [slowest]
280 0         0 while (defined($_ = $in->getline)) {
281             # Normalize line ending
282 0 0       0 $_ =~ s/(?:\n\r|\r\n|\r)$/\n/ if $normalize_newlines;
283 0 0       0 if (substr($_, 0, 2) eq '--') {
284 0         0 ($maybe = $_) =~ s/[ \t\r\n]+\Z//;
285 0 0       0 $bh{$maybe} and do { $eos = $bh{$maybe}; last };
  0         0  
  0         0  
286             }
287 0 0 0     0 $thx and $th{$_} and do { $eos = $th{$_}; last };
  0         0  
  0         0  
288 0         0 $out->print($last); $last = $_;
  0         0  
289             }
290             }
291             }
292              
293             # Write out last held line, removing terminating CRLF if ended on bound,
294             # unless the line consists only of CRLF and we're wanting to keep the
295             # preceding blank line (as when parsing a preamble)
296 340 100 100     1616 $last =~ s/[\r\n]+\Z// if ($eos =~ /^(DELIM|CLOSE)/ && !($keep_newline && $last =~ m/^[\r\n]\z/));
      100        
297 340         894 $out->print($last);
298              
299             ### Save and return what we finished on:
300 340   100     1599 ${$self->{EOS}} = ($eos || 'EOF');
  340         535  
301 340         1097 1;
302             }
303              
304             #------------------------------
305             #
306             # read_lines INHANDLE, \@OUTLINES
307             #
308             # I
309             # Read lines into the given array.
310             #
311             sub read_lines {
312 37     37 0 50 my ($self, $in, $outlines) = @_;
313              
314 37         46 my $data = '';
315 37 50       390 open(my $fh, '>', \$data) or die $!;
316 37         79 $self->read_chunk($in, $fh);
317 37         103 @$outlines = split(/^/, $data);
318 37         81 close $fh;
319              
320 37         95 1;
321             }
322              
323             1;
324             __END__