File Coverage

blib/lib/MIME/Parser.pm
Criterion Covered Total %
statement 389 442 88.0
branch 127 194 65.4
condition 25 40 62.5
subroutine 51 62 82.2
pod 32 46 69.5
total 624 784 79.5


line stmt bran cond sub pod time code
1             package MIME::Parser;
2              
3              
4             =head1 NAME
5              
6             MIME::Parser - experimental class for parsing MIME streams
7              
8              
9             =head1 SYNOPSIS
10              
11             Before reading further, you should see L to make sure that
12             you understand where this module fits into the grand scheme of things.
13             Go on, do it now. I'll wait.
14              
15             Ready? Ok...
16              
17             =head2 Basic usage examples
18              
19             ### Create a new parser object:
20             my $parser = new MIME::Parser;
21              
22             ### Tell it where to put things:
23             $parser->output_under("/tmp");
24              
25             ### Parse an input filehandle:
26             $entity = $parser->parse(\*STDIN);
27              
28             ### Congratulations: you now have a (possibly multipart) MIME entity!
29             $entity->dump_skeleton; # for debugging
30              
31              
32             =head2 Examples of input
33              
34             ### Parse from filehandles:
35             $entity = $parser->parse(\*STDIN);
36             $entity = $parser->parse(IO::File->new("some command|");
37              
38             ### Parse from any object that supports getline() and read():
39             $entity = $parser->parse($myHandle);
40              
41             ### Parse an in-core MIME message:
42             $entity = $parser->parse_data($message);
43              
44             ### Parse an MIME message in a file:
45             $entity = $parser->parse_open("/some/file.msg");
46              
47             ### Parse an MIME message out of a pipeline:
48             $entity = $parser->parse_open("gunzip - < file.msg.gz |");
49              
50             ### Parse already-split input (as "deliver" would give it to you):
51             $entity = $parser->parse_two("msg.head", "msg.body");
52              
53              
54             =head2 Examples of output control
55              
56             ### Keep parsed message bodies in core (default outputs to disk):
57             $parser->output_to_core(1);
58              
59             ### Output each message body to a one-per-message directory:
60             $parser->output_under("/tmp");
61              
62             ### Output each message body to the same directory:
63             $parser->output_dir("/tmp");
64              
65             ### Change how nameless message-component files are named:
66             $parser->output_prefix("msg");
67              
68             ### Put temporary files somewhere else
69             $parser->tmp_dir("/var/tmp/mytmpdir");
70              
71             =head2 Examples of error recovery
72              
73             ### Normal mechanism:
74             eval { $entity = $parser->parse(\*STDIN) };
75             if ($@) {
76             $results = $parser->results;
77             $decapitated = $parser->last_head; ### get last top-level head
78             }
79              
80             ### Ultra-tolerant mechanism:
81             $parser->ignore_errors(1);
82             $entity = eval { $parser->parse(\*STDIN) };
83             $error = ($@ || $parser->last_error);
84              
85             ### Cleanup all files created by the parse:
86             eval { $entity = $parser->parse(\*STDIN) };
87             ...
88             $parser->filer->purge;
89              
90              
91             =head2 Examples of parser options
92              
93             ### Automatically attempt to RFC 2047-decode the MIME headers?
94             $parser->decode_headers(1); ### default is false
95              
96             ### Parse contained "message/rfc822" objects as nested MIME streams?
97             $parser->extract_nested_messages(0); ### default is true
98              
99             ### Look for uuencode in "text" messages, and extract it?
100             $parser->extract_uuencode(1); ### default is false
101              
102             ### Should we forgive normally-fatal errors?
103             $parser->ignore_errors(0); ### default is true
104              
105              
106             =head2 Miscellaneous examples
107              
108             ### Convert a Mail::Internet object to a MIME::Entity:
109             my $data = join('', (@{$mail->header}, "\n", @{$mail->body}));
110             $entity = $parser->parse_data(\$data);
111              
112              
113              
114             =head1 DESCRIPTION
115              
116             You can inherit from this class to create your own subclasses
117             that parse MIME streams into MIME::Entity objects.
118              
119              
120             =head1 PUBLIC INTERFACE
121              
122             =cut
123              
124             #------------------------------
125              
126             require 5.004;
127              
128             ### Pragmas:
129 15     15   344018 use strict;
  15         34  
  15         393  
130 15     15   44 use vars (qw($VERSION $CAT $CRLF));
  15         17  
  15         663  
131              
132             ### core Perl modules
133 15     15   4249 use IO::File;
  15         61253  
  15         1477  
134 15     15   79 use File::Spec;
  15         17  
  15         265  
135 15     15   47 use File::Path;
  15         14  
  15         633  
136 15     15   55 use Config qw(%Config);
  15         20  
  15         434  
137 15     15   50 use Carp;
  15         19  
  15         678  
138              
139             ### Kit modules:
140 15     15   4595 use MIME::Tools qw(:config :utils :msgtypes usage tmpopen );
  15         23  
  15         2330  
141 15     15   5409 use MIME::Head;
  15         32  
  15         371  
142 15     15   4978 use MIME::Body;
  15         27  
  15         281  
143 15     15   7787 use MIME::Entity;
  15         39  
  15         408  
144 15     15   76 use MIME::Decoder;
  15         19  
  15         234  
145 15     15   6107 use MIME::Parser::Reader;
  15         23  
  15         386  
146 15     15   6314 use MIME::Parser::Filer;
  15         30  
  15         337  
147 15     15   5447 use MIME::Parser::Results;
  15         28  
  15         53885  
148              
149             #------------------------------
150             #
151             # Globals
152             #
153             #------------------------------
154              
155             ### The package version, both in 1.23 style *and* usable by MakeMaker:
156             $VERSION = "5.508";
157              
158             ### How to catenate:
159             $CAT = '/bin/cat';
160              
161             ### The CRLF sequence:
162             $CRLF = "\015\012";
163              
164             ### Who am I?
165             my $ME = 'MIME::Parser';
166              
167              
168              
169             #------------------------------------------------------------
170              
171             =head2 Construction
172              
173             =over 4
174              
175             =cut
176              
177             #------------------------------
178              
179             =item new ARGS...
180              
181             I
182             Create a new parser object.
183             Once you do this, you can then set up various parameters
184             before doing the actual parsing. For example:
185              
186             my $parser = new MIME::Parser;
187             $parser->output_dir("/tmp");
188             $parser->output_prefix("msg1");
189             my $entity = $parser->parse(\*STDIN);
190              
191             Any arguments are passed into C.
192             Don't override this in your subclasses; override init() instead.
193              
194             =cut
195              
196             sub new {
197 42     42 1 12791825 my $self = bless {}, shift;
198 42         162 $self->init(@_);
199             }
200              
201             #------------------------------
202              
203             =item init ARGS...
204              
205             I
206             Initiallize a new MIME::Parser object.
207             This is automatically sent to a new object; you may want to override it.
208             If you override this, be sure to invoke the inherited method.
209              
210             =cut
211              
212             sub init {
213 42     42 1 117 my $self = shift;
214              
215 42         138 $self->{MP5_DecodeHeaders} = 0;
216 42         98 $self->{MP5_DecodeBodies} = 1;
217 42         75 $self->{MP5_Interface} = {};
218 42         85 $self->{MP5_ParseNested} = 'NEST';
219 42         59 $self->{MP5_TmpToCore} = 0;
220 42         73 $self->{MP5_IgnoreErrors} = 1;
221 42         51 $self->{MP5_UUDecode} = 0;
222 42         81 $self->{MP5_MaxParts} = -1;
223 42         57 $self->{MP5_TmpDir} = undef;
224              
225 42         123 $self->interface(ENTITY_CLASS => 'MIME::Entity');
226 42         82 $self->interface(HEAD_CLASS => 'MIME::Head');
227              
228 42         104 $self->output_dir(".");
229              
230 42         203 $self;
231             }
232              
233             #------------------------------
234              
235             =item init_parse
236              
237             I
238             Invoked automatically whenever one of the top-level parse() methods
239             is called, to reset the parser to a "ready" state.
240              
241             =cut
242              
243             sub init_parse {
244 53     53 1 67 my $self = shift;
245              
246 53         340 $self->{MP5_Results} = new MIME::Parser::Results;
247              
248 53         158 $self->{MP5_Filer}->results($self->{MP5_Results});
249 53         203 $self->{MP5_Filer}->purgeable([]);
250 53         222 $self->{MP5_Filer}->init_parse();
251 53         81 $self->{MP5_NumParts} = 0;
252 53         51 1;
253             }
254              
255             =back
256              
257             =cut
258              
259              
260              
261              
262              
263             #------------------------------------------------------------
264              
265             =head2 Altering how messages are parsed
266              
267             =over 4
268              
269             =cut
270              
271             #------------------------------
272              
273             =item decode_headers [YESNO]
274              
275             I
276             Controls whether the parser will attempt to decode all the MIME headers
277             (as per RFC 2047) the moment it sees them. B
278             for two very important reasons:>
279              
280             =over
281              
282             =item *
283              
284             B
285             If you fully decode the headers into bytes, you can inadvertently
286             transform a parseable MIME header like this:
287              
288             Content-type: text/plain; filename="=?ISO-8859-1?Q?Hi=22Ho?="
289              
290             into unparseable gobbledygook; in this case:
291              
292             Content-type: text/plain; filename="Hi"Ho"
293              
294             =item *
295              
296             B An encoded string which contains
297             both Latin-1 and Cyrillic characters will be turned into a binary
298             mishmosh which simply can't be rendered.
299              
300             =back
301              
302             B
303             This method was once the only out-of-the-box way to deal with attachments
304             whose filenames had non-ASCII characters. However, since MIME-tools 5.4xx
305             this is no longer necessary.
306              
307             B
308             If YESNO is true, decoding is done. However, you will get a warning
309             unless you use one of the special "true" values:
310              
311             "I_NEED_TO_FIX_THIS"
312             Just shut up and do it. Not recommended.
313             Provided only for those who need to keep old scripts functioning.
314              
315             "I_KNOW_WHAT_I_AM_DOING"
316             Just shut up and do it. Not recommended.
317             Provided for those who REALLY know what they are doing.
318              
319             If YESNO is false (the default), no attempt at decoding will be done.
320             With no argument, just returns the current setting.
321             B you can always decode the headers I the parsing
322             has completed (see L), or
323             decode the words on demand (see L).
324              
325             =cut
326              
327             sub decode_headers {
328 0     0 1 0 my ($self, $yesno) = @_;
329 0 0       0 if (@_ > 1) {
330 0         0 $self->{MP5_DecodeHeaders} = $yesno;
331 0 0       0 if ($yesno) {
332 0 0 0     0 if (($yesno eq "I_KNOW_WHAT_I_AM_DOING") ||
333             ($yesno eq "I_NEED_TO_FIX_THIS")) {
334             ### ok
335             }
336             else {
337 0         0 $self->whine("as of 5.4xx, decode_headers() should NOT be ".
338             "set true... if you are doing this to make sure ".
339             "that non-ASCII filenames are translated, ".
340             "that's now done automatically; for all else, ".
341             "use MIME::Words.");
342             }
343             }
344             }
345 0         0 $self->{MP5_DecodeHeaders};
346             }
347              
348             #------------------------------
349              
350             =item extract_nested_messages OPTION
351              
352             I
353             Some MIME messages will contain a part of type C
354             ,C or C:
355             literally, the text of an embedded mail/news/whatever message.
356             This option controls whether (and how) we parse that embedded message.
357              
358             If the OPTION is false, we treat such a message just as if it were a
359             C document, without attempting to decode its contents.
360              
361             If the OPTION is true (the default), the body of the C
362             or C part is parsed by this parser, creating an
363             entity object. What happens then is determined by the actual OPTION:
364              
365             =over 4
366              
367             =item NEST or 1
368              
369             The default setting.
370             The contained message becomes the sole "part" of the C
371             entity (as if the containing message were a special kind of
372             "multipart" message).
373             You can recover the sub-entity by invoking the L
374             method on the C entity.
375              
376             =item REPLACE
377              
378             The contained message replaces the C entity, as though
379             the C "container" never existed.
380              
381             B notice that, with this option, all the header information
382             in the C header is lost. This might seriously bother
383             you if you're dealing with a top-level message, and you've just lost
384             the sender's address and the subject line. C<:-/>.
385              
386             =back
387              
388             I
389              
390             =cut
391              
392             sub extract_nested_messages {
393 51     51 1 130 my ($self, $option) = @_;
394 51 100       124 $self->{MP5_ParseNested} = $option if (@_ > 1);
395 51         119 $self->{MP5_ParseNested};
396             }
397              
398             sub parse_nested_messages {
399 0     0 0 0 usage "parse_nested_messages() is now extract_nested_messages()";
400 0         0 shift->extract_nested_messages(@_);
401             }
402              
403             #------------------------------
404              
405             =item extract_uuencode [YESNO]
406              
407             I
408             If set true, then whenever we are confronted with a message
409             whose effective content-type is "text/plain" and whose encoding
410             is 7bit/8bit/binary, we scan the encoded body to see if it contains
411             uuencoded data (generally given away by a "begin XXX" line).
412              
413             If it does, we explode the uuencoded message into a multipart,
414             where the text before the first "begin XXX" becomes the first part,
415             and all "begin...end" sections following become the subsequent parts.
416             The filename (if given) is accessible through the normal means.
417              
418             =cut
419              
420             sub extract_uuencode {
421 255     255 1 369 my ($self, $yesno) = @_;
422 255 100       390 $self->{MP5_UUDecode} = $yesno if @_ > 1;
423 255         553 $self->{MP5_UUDecode};
424             }
425              
426             #------------------------------
427              
428             =item ignore_errors [YESNO]
429              
430             I
431             Controls whether the parser will attempt to ignore normally-fatal
432             errors, treating them as warnings and continuing with the parse.
433              
434             If YESNO is true (the default), many syntax errors are tolerated.
435             If YESNO is false, fatal errors throw exceptions.
436             With no argument, just returns the current setting.
437              
438             =cut
439              
440             sub ignore_errors {
441 25     25 1 71 my ($self, $yesno) = @_;
442 25 50       59 $self->{MP5_IgnoreErrors} = $yesno if (@_ > 1);
443 25         31 $self->{MP5_IgnoreErrors};
444             }
445              
446              
447             #------------------------------
448              
449             =item decode_bodies [YESNO]
450              
451             I
452             Controls whether the parser should decode entity bodies or not.
453             If this is set to a false value (default is true), all entity bodies
454             will be kept as-is in the original content-transfer encoding.
455              
456             To prevent double encoding on the output side MIME::Body->is_encoded
457             is set, which tells MIME::Body not to encode the data again, if encoded
458             data was requested. This is in particular useful, when it's important that
459             the content B be modified, e.g. if you want to calculate
460             OpenPGP signatures from it.
461              
462             B: the semantics change significantly if you parse MIME
463             messages with this option set, because MIME::Entity resp. MIME::Body
464             *always* see encoded data now, while the default behaviour is
465             working with *decoded* data (and encoding it only if you request it).
466             You need to decode the data yourself, if you want to have it decoded.
467              
468             So use this option only if you exactly know, what you're doing, and
469             that you're sure, that you really need it.
470              
471             =cut
472              
473             sub decode_bodies {
474 258     258 1 524 my ($self, $yesno) = @_;
475 258 100       488 $self->{MP5_DecodeBodies} = $yesno if (@_ > 1);
476 258         615 $self->{MP5_DecodeBodies};
477             }
478              
479             #------------------------------
480             #
481             # MESSAGES...
482             #
483              
484             #------------------------------
485             #
486             # debug MESSAGE...
487             #
488             sub debug {
489 1243     1243 0 999 my $self = shift;
490 1243 50       3035 if (MIME::Tools->debugging()) {
491 0 0       0 if (my $r = $self->{MP5_Results}) {
492 0         0 unshift @_, $r->indent;
493 0         0 $r->msg($M_DEBUG, @_);
494             }
495 0         0 MIME::Tools::debug(@_);
496             }
497             }
498              
499             #------------------------------
500             #
501             # whine PROBLEM...
502             #
503             sub whine {
504 6     6 0 7 my $self = shift;
505 6 50       14 if (my $r = $self->{MP5_Results}) {
506 6         21 unshift @_, $r->indent;
507 6         18 $r->msg($M_WARNING, @_);
508             }
509 6         21 &MIME::Tools::whine(@_);
510             }
511              
512             #------------------------------
513             #
514             # error PROBLEM...
515             #
516             # Possibly-forgivable parse error occurred.
517             # Raises a fatal exception unless we are ignoring errors.
518             #
519             sub error {
520 5     5 1 7 my $self = shift;
521 5 50       11 if (my $r = $self->{MP5_Results}) {
522 5         13 unshift @_, $r->indent;
523 5         14 $r->msg($M_ERROR, @_);
524             }
525 5         14 &MIME::Tools::error(@_);
526 5 50       13 $self->{MP5_IgnoreErrors} ? return undef : die @_;
527             }
528              
529              
530              
531              
532             #------------------------------
533             #
534             # PARSING...
535             #
536              
537             #------------------------------
538             #
539             # process_preamble IN, READER, ENTITY
540             #
541             # I
542             # Dispose of a multipart message's preamble.
543             #
544             sub process_preamble {
545 39     39 0 47 my ($self, $in, $rdr, $ent) = @_;
546              
547             ### Sanity:
548 39 50       92 ($rdr->depth > 0) or die "$ME: internal logic error";
549              
550             ### Parse preamble:
551 39         44 my @saved;
552 39         42 my $data = '';
553 39 50       361 open(my $fh, '>', \$data) or die $!;
554 39         108 $rdr->read_chunk($in, $fh, 1);
555 39         61 close $fh;
556              
557             # Ugh. Horrible. If the preamble consists only of CRLF, squash it down
558             # to the empty string. Else, remove the trailing CRLF.
559 39 100       102 if( $data =~ m/^[\r\n]\z/ ) {
560 2         5 @saved = ('');
561             } else {
562 37         75 $data =~ s/[\r\n]\z//;
563 37         97 @saved = split(/^/, $data);
564             }
565 39         145 $ent->preamble(\@saved);
566 39         97 1;
567             }
568              
569             #------------------------------
570             #
571             # process_epilogue IN, READER, ENTITY
572             #
573             # I
574             # Dispose of a multipart message's epilogue.
575             #
576             sub process_epilogue {
577 37     37 0 57 my ($self, $in, $rdr, $ent) = @_;
578 37         68 $self->debug("process_epilogue");
579              
580             ### Parse epilogue:
581 37         34 my @saved;
582 37         107 $rdr->read_lines($in, \@saved);
583 37         112 $ent->epilogue(\@saved);
584 37         45 1;
585             }
586              
587             #------------------------------
588             #
589             # process_to_bound IN, READER, OUT
590             #
591             # I
592             # Dispose of the next chunk into the given output stream OUT.
593             #
594             sub process_to_bound {
595 95     95 0 105 my ($self, $in, $rdr, $out) = @_;
596              
597             ### Parse:
598 95         253 $rdr->read_chunk($in, $out);
599 95         96 1;
600             }
601              
602             #------------------------------
603             #
604             # process_header IN, READER
605             #
606             # I
607             # Process and return the next header.
608             # Return undef if, instead of a header, the encapsulation boundary is found.
609             # Fatal exception on failure.
610             #
611             sub process_header {
612 169     169 0 195 my ($self, $in, $rdr) = @_;
613 169         262 $self->debug("process_header");
614              
615             ### Parse and save the (possibly empty) header, up to and including the
616             ### blank line that terminates it:
617 169         257 my $head = $self->interface('HEAD_CLASS')->new;
618              
619             ### Read the lines of the header.
620             ### We localize IO inside here, so that we can support the IO:: interface
621 169         2812 my @headlines;
622 169         390 my $hdr_rdr = $rdr->spawn;
623 169         338 $hdr_rdr->add_terminator("");
624 169         292 $hdr_rdr->add_terminator("\r"); ### sigh
625              
626 169         157 my $headstr = '';
627 9 50   9   54 open(my $outfh, '>:scalar', \$headstr) or die $!;
  9         11  
  9         52  
  169         1869  
628 169         7155 $hdr_rdr->read_chunk($in, $outfh, 0, 1);
629 169         271 close $outfh;
630              
631             ### How did we do?
632 169 100       328 if ($hdr_rdr->eos_type eq 'DELIM') {
633 2         4 $self->whine("bogus part, without CRLF before body");
634 2         11 return undef;
635             }
636 167 100       316 ($hdr_rdr->eos_type eq 'DONE') or
637             $self->error("unexpected end of header\n");
638              
639             ### Extract the header (note that zero-size headers are admissible!):
640 167 50       2061 open(my $readfh, '<:scalar', \$headstr) or die $!;
641 167         450 $head->read( $readfh );
642              
643 167 50       51497 unless( $readfh->eof() ) {
644             # Not entirely correct, since ->read consumes the line it gives up on.
645             # it's actually the line /before/ the one we get with ->getline
646 0         0 $self->error("couldn't parse head; error near:\n", $readfh->getline());
647             }
648              
649              
650             ### If desired, auto-decode the header as per RFC 2047
651             ### This shouldn't affect non-encoded headers; however, it will decode
652             ### headers with international characters. WARNING: currently, the
653             ### character-set information is LOST after decoding.
654 167 50       1189 $head->decode($self->{MP5_DecodeHeaders}) if $self->{MP5_DecodeHeaders};
655              
656             ### If this is the top-level head, save it:
657 167 100       282 $self->results->top_head($head) if !$self->results->top_head;
658              
659 167         1260 return $head;
660             }
661              
662             #------------------------------
663             #
664             # process_multipart IN, READER, ENTITY
665             #
666             # I
667             # Process the multipart body, and return the state.
668             # Fatal exception on failure.
669             # Invoked by process_part().
670             #
671             sub process_multipart {
672 39     39 0 47 my ($self, $in, $rdr, $ent) = @_;
673 39         92 my $head = $ent->head;
674              
675 39         63 $self->debug("process_multipart...");
676              
677             ### Get actual type and subtype from the header:
678 39         84 my ($type, $subtype) = (split('/', $head->mime_type, -1), '');
679              
680             ### If this was a type "multipart/digest", then the RFCs say we
681             ### should default the parts to have type "message/rfc822".
682             ### Thanks to Carsten Heyl for suggesting this...
683 39 100       103 my $retype = (($subtype eq 'digest') ? 'message/rfc822' : '');
684              
685             ### Get the boundaries for the parts:
686 39         92 my $bound = $head->multipart_boundary;
687 39 50 33     206 if (!defined($bound) || ($bound =~ /[\r\n]/)) {
688 0         0 $self->error("multipart boundary is missing, or contains CR or LF\n");
689 0         0 $ent->effective_type("application/x-unparseable-multipart");
690 0         0 return $self->process_singlepart($in, $rdr, $ent);
691             }
692 39         122 my $part_rdr = $rdr->spawn->add_boundary($bound);
693              
694             ### Prepare to parse:
695 39         43 my $eos_type;
696             my $more_parts;
697              
698             ### Parse preamble...
699 39         93 $self->process_preamble($in, $part_rdr, $ent);
700              
701             ### ...and look at how we finished up:
702 39         84 $eos_type = $part_rdr->eos_type;
703 39 100       81 if ($eos_type eq 'DELIM'){ $more_parts = 1 }
  38 50       41  
704 1         3 elsif ($eos_type eq 'CLOSE'){ $self->whine("empty multipart message\n");
705 1         1 $more_parts = 0; }
706 0         0 else { $self->error("unexpected end of preamble\n");
707 0         0 return 1; }
708              
709             ### Parse parts:
710 39         41 my $partno = 0;
711 39         32 my $part;
712 39         73 while ($more_parts) {
713 103         96 ++$partno;
714 103         253 $self->debug("parsing part $partno...");
715              
716             ### Parse the next part, and add it to the entity...
717 103         241 my $part = $self->process_part($in, $part_rdr, Retype=>$retype);
718 103 50       180 return undef unless defined($part);
719              
720 103         236 $ent->add_part($part);
721              
722             ### ...and look at how we finished up:
723 103         246 $eos_type = $part_rdr->eos_type;
724 103 100       228 if ($eos_type eq 'DELIM') { $more_parts = 1 }
  65 100       110  
725 36         83 elsif ($eos_type eq 'CLOSE') { $more_parts = 0; }
726 2         7 else { $self->error("unexpected end of parts ".
727             "before epilogue\n");
728 2         13 return 1; }
729             }
730              
731             ### Parse epilogue...
732             ### (note that we use the *parent's* reader here, which does not
733             ### know about the boundaries in this multipart!)
734 37         91 $self->process_epilogue($in, $rdr, $ent);
735              
736             ### ...and there's no need to look at how we finished up!
737 37         188 1;
738             }
739              
740             #------------------------------
741             #
742             # process_singlepart IN, READER, ENTITY
743             #
744             # I
745             # Process the singlepart body. Returns true.
746             # Fatal exception on failure.
747             # Invoked by process_part().
748             #
749             sub process_singlepart {
750 115     115 0 129 my ($self, $in, $rdr, $ent) = @_;
751 115         241 my $head = $ent->head;
752              
753 115         166 $self->debug("process_singlepart...");
754              
755             ### Obtain a filehandle for reading the encoded information:
756             ### We have two different approaches, based on whether or not we
757             ### have to contend with boundaries.
758 115         99 my $ENCODED; ### handle
759 115   100     264 my $can_shortcut = (!$rdr->has_bounds and !$self->{MP5_UUDecode});
760 115 100       163 if ($can_shortcut) {
761 20         36 $self->debug("taking shortcut");
762              
763 20         23 $ENCODED = $in;
764 20         47 $rdr->eos('EOF'); ### be sure to bogus-up the reader state to EOF:
765             }
766             else {
767              
768 95         123 $self->debug("using temp file");
769 95         196 $ENCODED = $self->new_tmpfile();
770              
771             ### Read encoded body until boundary (or EOF)...
772 95         208 $self->process_to_bound($in, $rdr, $ENCODED);
773              
774             ### ...and look at how we finished up.
775             ### If we have bounds, we want DELIM or CLOSE.
776             ### Otherwise, we want EOF (and that's all we'd get, anyway!).
777 95 100       173 if ($rdr->has_bounds) {
778 94 100       166 ($rdr->eos_type =~ /^(DELIM|CLOSE)$/) or
779             $self->error("part did not end with expected boundary\n");
780             }
781              
782             ### Flush and rewind encoded buffer, so we can read it:
783 95 50       2604 $ENCODED->flush or die "$ME: can't flush: $!";
784 95 50       460 $ENCODED->seek(0, 0) or die "$ME: can't seek: $!";
785             }
786              
787             ### Get a content-decoder to decode this part's encoding:
788 115         777 my $encoding = $head->mime_encoding;
789 115         513 my $decoder = new MIME::Decoder $encoding;
790 115 50       282 if (!$decoder) {
791 0         0 $self->whine("Unsupported encoding '$encoding': using 'binary'... \n".
792             "The entity will have an effective MIME type of \n".
793             "application/octet-stream."); ### as per RFC-2045
794 0         0 $ent->effective_type('application/octet-stream');
795 0         0 $decoder = new MIME::Decoder 'binary';
796 0         0 $encoding = 'binary';
797             }
798              
799             ### Data should be stored encoded / as-is?
800 115 100       236 if ( !$self->decode_bodies ) {
801 7         15 $decoder = new MIME::Decoder 'binary';
802 7         29 $encoding = 'binary';
803             }
804              
805             ### If desired, sidetrack to troll for UUENCODE:
806 115         229 $self->debug("extract uuencode? ", $self->extract_uuencode);
807 115         192 $self->debug("encoding? ", $encoding);
808 115         293 $self->debug("effective type? ", $ent->effective_type);
809              
810 115 50 66     203 if ($self->extract_uuencode and
      66        
811             ($encoding =~ /^(7bit|8bit|binary)\Z/) and
812             ($ent->effective_type =~
813             m{^(?:text/plain|application/mac-binhex40|application/mac-binhex)\Z})) {
814             ### Hunt for it:
815 3         4 my $uu_ent = eval { $self->hunt_for_uuencode($ENCODED, $ent) };
  3         10  
816 3 100       8 if ($uu_ent) { ### snark
817 2         9 %$ent = %$uu_ent;
818 2         13 return 1;
819             }
820             else { ### boojum
821 1         4 $self->whine("while hunting for uuencode: $@");
822 1 50       3 $ENCODED->seek(0,0) or die "$ME: can't seek: $!";
823             }
824             }
825              
826             ### Open a new bodyhandle for outputting the data:
827 113 50       212 my $body = $self->new_body_for($head) or die "$ME: no body"; # gotta die
828 113 100 50     258 $body->binmode(1) or die "$ME: can't set to binmode: $!"
      100        
829             unless textual_type($ent->effective_type) or !$self->decode_bodies;
830 113 100       227 $body->is_encoded(1) if !$self->decode_bodies;
831              
832             ### Decode and save the body (using the decoder):
833 113 50       291 my $DECODED = $body->open("w") or die "$ME: body not opened: $!";
834 113         1272 eval { $decoder->decode($ENCODED, $DECODED); };
  113         425  
835 113 50       178 $@ and $self->error($@);
836 113 50       267 $DECODED->close or die "$ME: can't close: $!";
837              
838             ### Success! Remember where we put stuff:
839 113         3024 $ent->bodyhandle($body);
840              
841             ### Done!
842 113         604 1;
843             }
844              
845             #------------------------------
846             #
847             # hunt_for_uuencode ENCODED, ENTITY
848             #
849             # I
850             # Try to detect and dispatch embedded uuencode as a fake multipart message.
851             # Returns new entity or undef.
852             #
853             sub hunt_for_uuencode {
854 3     3 0 4 my ($self, $ENCODED, $ent) = @_;
855 3         3 my ($good, $how_encoded);
856 3         3 local $_;
857 3         6 $self->debug("sniffing around for UUENCODE");
858              
859             ### Heuristic:
860 3 50       9 $ENCODED->seek(0,0) or die "$ME: can't seek: $!";
861 3         79 while (defined($_ = $ENCODED->getline)) {
862 18 100       326 if ($good = /^begin [0-7]{3}/) {
863 2         4 $how_encoded = 'uu';
864 2         4 last;
865             }
866 16 50       214 if ($good = /^\(This file must be converted with/i) {
867 0         0 $how_encoded = 'binhex';
868 0         0 last;
869             }
870             }
871 3 100       24 $good or do { $self->debug("no one made the cut"); return 0 };
  1         2  
  1         3  
872              
873             # If a decoder doesn't exist for this type, forget it!
874 2 50       10 my $decoder = MIME::Decoder->new(($how_encoded eq 'uu')?'x-uuencode'
875             :'binhex');
876 2 50       7 unless (defined($decoder)) {
877 0         0 $self->debug("No decoder for $how_encoded attachments");
878 0         0 return 0;
879             }
880              
881             ### New entity:
882 2         10 my $top_ent = $ent->dup; ### no data yet
883 2         6 $top_ent->make_multipart;
884 2         4 my @parts;
885              
886             ### Made the first cut; on to the real stuff:
887 2 50       6 $ENCODED->seek(0,0) or die "$ME: can't seek: $!";
888 2         22 $self->whine("Found a $how_encoded attachment");
889 2         4 my $pre;
890 2         3 while (1) {
891 6         8 my $bin_data = '';
892              
893             ### Try next part:
894 6         68 my $out = IO::File->new(\$bin_data, '>:');
895 6 100       235 eval { $decoder->decode($ENCODED, $out) }; last if $@;
  6         25  
  6         19  
896 4         10 my $preamble = $decoder->last_preamble;
897 4         9 my $filename = $decoder->last_filename;
898 4         9 my $mode = $decoder->last_mode;
899              
900             ### Get probable type:
901 4         4 my $type = 'application/octet-stream';
902 4   50     15 my ($ext) = $filename =~ /\.(\w+)\Z/; $ext = lc($ext || '');
  4         12  
903 4 50       16 if ($ext =~ /^(gif|jpe?g|xbm|xpm|png)\Z/) { $type = "image/$1" }
  4         9  
904              
905             ### If we got our first preamble, create the text portion:
906 4 100 66     40 if (@$preamble and
      66        
907             (grep /\S/, @$preamble) and
908             !@parts) {
909 2         6 my $txt_ent = $self->interface('ENTITY_CLASS')->new;
910              
911 2         10 MIME::Entity->build(Type => "text/plain",
912             Data => "");
913 2         5 $txt_ent->bodyhandle($self->new_body_for($txt_ent->head));
914 2 50       50 my $io = $txt_ent->bodyhandle->open("w") or die "$ME: can't create: $!";
915 2 50       10 $io->print(@$preamble) or die "$ME: can't print: $!";
916 2 50       28 $io->close or die "$ME: can't close: $!";
917 2         75 push @parts, $txt_ent;
918             }
919              
920             ### Create the attachment:
921             ### We use the x-unix-mode convention from "dtmail 1.2.1 SunOS 5.6".
922 4         7 if (1) {
923 4         24 my $bin_ent = MIME::Entity->build(Type=>$type,
924             Filename=>$filename,
925             Data=>"");
926 4         13 $bin_ent->head->mime_attr('Content-type.x-unix-mode' => "0$mode");
927 4         13 $bin_ent->bodyhandle($self->new_body_for($bin_ent->head));
928 4 50       8 $bin_ent->bodyhandle->binmode(1) or die "$ME: can't set to binmode: $!";
929 4 50       9 my $io = $bin_ent->bodyhandle->open("w") or die "$ME: can't create: $!";
930 4 50       11 $io->print($bin_data) or die "$ME: can't print: $!";
931 4 50       58 $io->close or die "$ME: can't close: $!";
932 4         176 push @parts, $bin_ent;
933             }
934             }
935              
936             ### Did we get anything?
937 2 50       6 @parts or return undef;
938             ### Set the parts and a nice preamble:
939 2         9 $top_ent->parts(\@parts);
940 2         18 $top_ent->preamble
941             (["The following is a multipart MIME message which was extracted\n",
942             "from a $how_encoded-encoded message.\n"]);
943 2         19 $top_ent;
944             }
945              
946             #------------------------------
947             #
948             # process_message IN, READER, ENTITY
949             #
950             # I
951             # Process the singlepart body, and return true.
952             # Fatal exception on failure.
953             # Invoked by process_part().
954             #
955             sub process_message {
956 13     13 0 19 my ($self, $in, $rdr, $ent) = @_;
957 13         29 my $head = $ent->head;
958              
959 13         26 $self->debug("process_message");
960              
961             ### Verify the encoding restrictions:
962 13         31 my $encoding = $head->mime_encoding;
963 13 50       59 if ($encoding !~ /^(7bit|8bit|binary)$/) {
964 0         0 $self->error("illegal encoding [$encoding] for MIME type ".
965             $head->mime_type."\n");
966 0         0 $encoding = 'binary';
967             }
968              
969             ### Parse the message:
970 13         36 my $msg = $self->process_part($in, $rdr);
971 13 50       39 return undef unless defined($msg);
972              
973             ### How to handle nested messages?
974 13 100       24 if ($self->extract_nested_messages eq 'REPLACE') {
975 1         6 %$ent = %$msg; ### shallow replace
976 1         3 %$msg = ();
977             }
978             else { ### "NEST" or generic 1:
979 12         34 $ent->bodyhandle(undef);
980 12         30 $ent->add_part($msg);
981             }
982 13         741 1;
983             }
984              
985             #------------------------------
986             #
987             # process_part IN, READER, [OPTSHASH...]
988             #
989             # I
990             # The real back-end engine.
991             # See the documentation up top for the overview of the algorithm.
992             # The OPTSHASH can contain:
993             #
994             # Retype => retype this part to the given content-type
995             #
996             # Return the entity.
997             # Fatal exception on failure. Returns undef if message to complex
998             #
999             sub process_part {
1000 169     169 0 307 my ($self, $in, $rdr, %p) = @_;
1001              
1002 169 50       348 if ($self->{MP5_MaxParts} > 0) {
1003 0         0 $self->{MP5_NumParts}++;
1004 0 0       0 if ($self->{MP5_NumParts} > $self->{MP5_MaxParts}) {
1005             # Return UNDEF if msg too complex
1006 0         0 return undef;
1007             }
1008             }
1009              
1010 169   66     592 $rdr ||= MIME::Parser::Reader->new;
1011             #debug "process_part";
1012 169         285 $self->results->level(+1);
1013              
1014             ### Create a new entity:
1015 169         268 my $ent = $self->interface('ENTITY_CLASS')->new;
1016              
1017             ### Parse and add the header:
1018 169         327 my $head = $self->process_header($in, $rdr);
1019 169 100       283 if (not defined $head) {
1020 2         4 $self->debug("bogus empty part");
1021 2         4 $head = $self->interface('HEAD_CLASS')->new;
1022 2         41 $head->mime_type('text/plain');
1023 2         5 $ent->head($head);
1024 2         5 $ent->bodyhandle($self->new_body_for($head));
1025 2 50       5 $ent->bodyhandle->open("w")->close or die "$ME: can't close: $!";
1026 2         25 $self->results->level(-1);
1027 2         4 return $ent;
1028             }
1029 167         401 $ent->head($head);
1030              
1031             ### Tweak the content-type based on context from our parent...
1032             ### For example, multipart/digest messages default to type message/rfc822:
1033 167 100       316 $head->mime_type($p{Retype}) if $p{Retype};
1034              
1035             ### Get the MIME type and subtype:
1036 167         430 my ($type, $subtype) = (split('/', $head->mime_type, -1), '');
1037 167         462 $self->debug("type = $type, subtype = $subtype");
1038              
1039             ### Handle, according to the MIME type:
1040 167 100 66     1027 if ($type eq 'multipart') {
    100 33        
1041 39 50       104 return undef unless defined($self->process_multipart($in, $rdr, $ent));
1042             }
1043             elsif (("$type/$subtype" eq "message/rfc822" ||
1044             "$type/$subtype" eq "message/external-body" ||
1045             ("$type/$subtype" eq "message/partial" && defined($head->mime_attr("content-type.number")) && $head->mime_attr("content-type.number") == 1)) &&
1046             $self->extract_nested_messages) {
1047 13         20 $self->debug("attempting to process a nested message");
1048 13 50       35 return undef unless defined($self->process_message($in, $rdr, $ent));
1049             }
1050             else {
1051 115         227 $self->process_singlepart($in, $rdr, $ent);
1052             }
1053              
1054             ### Done (we hope!):
1055 167         16663 $self->results->level(-1);
1056 167         412 return $ent;
1057             }
1058              
1059              
1060              
1061             =back
1062              
1063             =head2 Parsing an input source
1064              
1065             =over 4
1066              
1067             =cut
1068              
1069             #------------------------------
1070              
1071             =item parse_data DATA
1072              
1073             I
1074             Parse a MIME message that's already in core. This internally creates an "in
1075             memory" filehandle on a Perl scalar value using PerlIO
1076              
1077             You may supply the DATA in any of a number of ways...
1078              
1079             =over 4
1080              
1081             =item *
1082              
1083             B which holds the message. A reference to this scalar will be used
1084             internally.
1085              
1086             =item *
1087              
1088             B which holds the message. This reference will be used
1089             internally.
1090              
1091             =item *
1092              
1093             B
1094              
1095             B The array is internally concatenated into a
1096             temporary string, and a reference to the new string is used internally.
1097              
1098             It is much more efficient to pass in a scalar reference, so please consider
1099             refactoring your code to use that interface instead. If you absolutely MUST
1100             pass an array, you may be better off using IO::ScalarArray in the calling code
1101             to generate a filehandle, and passing that filehandle to I
1102              
1103             =back
1104              
1105             Returns the parsed MIME::Entity on success.
1106              
1107             =cut
1108              
1109             sub parse_data {
1110 11     11 1 927 my ($self, $data) = @_;
1111              
1112             ### Get data as a scalar:
1113 11         15 my $io;
1114              
1115 11 100       28 if (! ref $data ) {
    100          
    50          
1116 9         61 $io = IO::File->new(\$data, '<:');
1117             } elsif( ref $data eq 'SCALAR' ) {
1118 1         5 $io = IO::File->new($data, '<:');
1119             } elsif( ref $data eq 'ARRAY' ) {
1120             # Passing arrays is deprecated now that we've nuked IO::ScalarArray
1121             # but for backwards compatibility we still support it by joining the
1122             # array lines to a scalar and doing scalar IO on it.
1123 1         3 my $tmp_data = join('', @$data);
1124 1         5 $io = IO::File->new(\$tmp_data, '<:');
1125             } else {
1126 0         0 croak "parse_data: wrong argument ref type: ", ref($data);
1127             }
1128              
1129             ### Parse!
1130 11         4075 return $self->parse($io);
1131             }
1132              
1133             #------------------------------
1134              
1135             =item parse INSTREAM
1136              
1137             I
1138             Takes a MIME-stream and splits it into its component entities.
1139              
1140             The INSTREAM can be given as an IO::File, a globref filehandle (like
1141             C<\*STDIN>), or as I blessed object conforming to the IO::
1142             interface (which minimally implements getline() and read()).
1143              
1144             Returns the parsed MIME::Entity on success.
1145             Throws exception on failure. If the message contained too many
1146             parts (as set by I), returns undef.
1147              
1148             =cut
1149              
1150             sub parse {
1151 53     53 1 2229 my $self = shift;
1152 53         83 my $in = shift;
1153 53         53 my $entity;
1154 53         189 local $/ = "\n"; ### just to be safe
1155              
1156 53         120 local $\ = undef; # CPAN ticket #71041
1157 53         134 $self->init_parse;
1158 53         125 $entity = $self->process_part($in, undef); ### parse!
1159              
1160 53         249 $entity;
1161             }
1162              
1163             ### Backcompat:
1164             sub read {
1165 0     0 1 0 shift->parse(@_);
1166             }
1167             sub parse_FH {
1168 0     0 0 0 shift->parse(@_);
1169             }
1170              
1171             #------------------------------
1172              
1173             =item parse_open EXPR
1174              
1175             I
1176             Convenience front-end onto C.
1177             Simply give this method any expression that may be sent as the second
1178             argument to open() to open a filehandle for reading.
1179              
1180             Returns the parsed MIME::Entity on success.
1181             Throws exception on failure.
1182              
1183             =cut
1184              
1185             sub parse_open {
1186 30     30 1 4456 my ($self, $expr) = @_;
1187 30         34 my $ent;
1188              
1189 30 50       155 my $io = IO::File->new($expr) or die "$ME: couldn't open $expr: $!";
1190 30         1680 $ent = $self->parse($io);
1191 30 50       71 $io->close or die "$ME: can't close: $!";
1192 30         368 $ent;
1193             }
1194              
1195             ### Backcompat:
1196             sub parse_in {
1197 0     0 0 0 usage "parse_in() is now parse_open()";
1198 0         0 shift->parse_open(@_);
1199             }
1200              
1201             #------------------------------
1202              
1203             =item parse_two HEADFILE, BODYFILE
1204              
1205             I
1206             Convenience front-end onto C, intended for programs
1207             running under mail-handlers like B, which splits the incoming
1208             mail message into a header file and a body file.
1209             Simply give this method the paths to the respective files.
1210              
1211             B it is assumed that, once the files are cat'ed together,
1212             there will be a blank line separating the head part and the body part.
1213              
1214             B new implementation slurps files into line array
1215             for portability, instead of using 'cat'. May be an issue if
1216             your messages are large.
1217              
1218             Returns the parsed MIME::Entity on success.
1219             Throws exception on failure.
1220              
1221             =cut
1222              
1223             sub parse_two {
1224 1     1 1 5 my ($self, $headfile, $bodyfile) = @_;
1225 1         1 my $data;
1226 1         2 foreach ($headfile, $bodyfile) {
1227 2 50       45 open IN, "<$_" or die "$ME: open $_: $!";
1228 2         3 $data .= do { local $/; };
  2         5  
  2         22  
1229 2 50       12 close IN or die "$ME: can't close: $!";
1230             }
1231 1         4 return $self->parse_data($data);
1232             }
1233              
1234             =back
1235              
1236             =cut
1237              
1238              
1239              
1240              
1241             #------------------------------------------------------------
1242              
1243             =head2 Specifying output destination
1244              
1245             B in 5.212 and before, this was done by methods
1246             of MIME::Parser. However, since many users have requested
1247             fine-tuned control over how this is done, the logic has been split
1248             off from the parser into its own class, MIME::Parser::Filer
1249             Every MIME::Parser maintains an instance of a MIME::Parser::Filer
1250             subclass to manage disk output (see L for details.)
1251              
1252             The benefit to this is that the MIME::Parser code won't be
1253             confounded with a lot of garbage related to disk output.
1254             The drawback is that the way you override the default behavior
1255             will change.
1256              
1257             For now, all the normal public-interface methods are still provided,
1258             but many are only stubs which create or delegate to the underlying
1259             MIME::Parser::Filer object.
1260              
1261             =over 4
1262              
1263             =cut
1264              
1265             #------------------------------
1266              
1267             =item filer [FILER]
1268              
1269             I
1270             Get/set the FILER object used to manage the output of files to disk.
1271             This will be some subclass of L.
1272              
1273             =cut
1274              
1275             sub filer {
1276 315     315 1 16875 my ($self, $filer) = @_;
1277 315 100       579 if (@_ > 1) {
1278 74         104 $self->{MP5_Filer} = $filer;
1279 74         330 $filer->results($self->results); ### but we still need in init_parse
1280             }
1281 315         870 $self->{MP5_Filer};
1282             }
1283              
1284             #------------------------------
1285              
1286             =item output_dir DIRECTORY
1287              
1288             I
1289             Causes messages to be filed directly into the given DIRECTORY.
1290             It does this by setting the underlying L to
1291             a new instance of MIME::Parser::FileInto, and passing the arguments
1292             into that class' new() method.
1293              
1294             B Since this method replaces the underlying
1295             filer, you must invoke it I doing changing any attributes
1296             of the filer, like the output prefix; otherwise those changes
1297             will be lost.
1298              
1299             =cut
1300              
1301             sub output_dir {
1302 78     78 1 273 my ($self, @init) = @_;
1303 78 100       159 if (@_ > 1) {
1304 72         403 $self->filer(MIME::Parser::FileInto->new(@init));
1305             }
1306             else {
1307 6         11 &MIME::Tools::whine("0-arg form of output_dir is deprecated.");
1308 6         9 return $self->filer->output_dir;
1309             }
1310             }
1311              
1312             #------------------------------
1313              
1314             =item output_under BASEDIR, OPTS...
1315              
1316             I
1317             Causes messages to be filed directly into subdirectories of the given
1318             BASEDIR, one subdirectory per message. It does this by setting the
1319             underlying L to a new instance of MIME::Parser::FileUnder,
1320             and passing the arguments into that class' new() method.
1321              
1322             B Since this method replaces the underlying
1323             filer, you must invoke it I doing changing any attributes
1324             of the filer, like the output prefix; otherwise those changes
1325             will be lost.
1326              
1327             =cut
1328              
1329             sub output_under {
1330 2     2 1 8 my ($self, @init) = @_;
1331 2 50       7 if (@_ > 1) {
1332 2         17 $self->filer(MIME::Parser::FileUnder->new(@init));
1333             }
1334             else {
1335 0         0 &MIME::Tools::whine("0-arg form of output_under is deprecated.");
1336 0         0 return $self->filer->output_dir;
1337             }
1338             }
1339              
1340             #------------------------------
1341              
1342             =item output_path HEAD
1343              
1344             I
1345             Given a MIME head for a file to be extracted, come up with a good
1346             output pathname for the extracted file.
1347             Identical to the preferred form:
1348              
1349             $parser->filer->output_path(...args...);
1350              
1351             We just delegate this to the underlying L object.
1352              
1353             =cut
1354              
1355             sub output_path {
1356 88     88 1 82 my $self = shift;
1357             ### We use it, so don't warn!
1358             ### &MIME::Tools::whine("output_path deprecated in MIME::Parser");
1359 88         126 $self->filer->output_path(@_);
1360             }
1361              
1362             #------------------------------
1363              
1364             =item output_prefix [PREFIX]
1365              
1366             I
1367             Get/set the short string that all filenames for extracted body-parts
1368             will begin with (assuming that there is no better "recommended filename").
1369             Identical to the preferred form:
1370              
1371             $parser->filer->output_prefix(...args...);
1372              
1373             We just delegate this to the underlying L object.
1374              
1375             =cut
1376              
1377             sub output_prefix {
1378 0     0 1 0 my $self = shift;
1379 0         0 &MIME::Tools::whine("output_prefix deprecated in MIME::Parser");
1380 0         0 $self->filer->output_prefix(@_);
1381             }
1382              
1383             #------------------------------
1384              
1385             =item evil_filename NAME
1386              
1387             I
1388             Identical to the preferred form:
1389              
1390             $parser->filer->evil_filename(...args...);
1391              
1392             We just delegate this to the underlying L object.
1393              
1394             =cut
1395              
1396             sub evil_filename {
1397 2     2 1 12 my $self = shift;
1398 2         5 &MIME::Tools::whine("evil_filename deprecated in MIME::Parser");
1399 2         4 $self->filer->evil_filename(@_);
1400             }
1401              
1402             #------------------------------
1403              
1404             =item max_parts NUM
1405              
1406             I
1407             Limits the number of MIME parts we will parse.
1408              
1409             Normally, instances of this class parse a message to the bitter end.
1410             Messages with many MIME parts can cause excessive memory consumption.
1411             If you invoke this method, parsing will abort with a die() if a message
1412             contains more than NUM parts.
1413              
1414             If NUM is set to -1 (the default), then no maximum limit is enforced.
1415              
1416             With no argument, returns the current setting as an integer
1417              
1418             =cut
1419              
1420             sub max_parts {
1421 0     0 1 0 my($self, $num) = @_;
1422 0 0       0 if (@_ > 1) {
1423 0         0 $self->{MP5_MaxParts} = $num;
1424             }
1425 0         0 return $self->{MP5_MaxParts};
1426             }
1427              
1428             #------------------------------
1429              
1430             =item output_to_core YESNO
1431              
1432             I
1433             Normally, instances of this class output all their decoded body
1434             data to disk files (via MIME::Body::File). However, you can change
1435             this behaviour by invoking this method before parsing:
1436              
1437             If YESNO is false (the default), then all body data goes
1438             to disk files.
1439              
1440             If YESNO is true, then all body data goes to in-core data structures
1441             This is a little risky (what if someone emails you an MPEG or a tar
1442             file, hmmm?) but people seem to want this bit of noose-shaped rope,
1443             so I'm providing it.
1444             Note that setting this attribute true I mean that parser-internal
1445             temporary files are avoided! Use L for that.
1446              
1447             With no argument, returns the current setting as a boolean.
1448              
1449             =cut
1450              
1451             sub output_to_core {
1452 162     162 1 2719 my ($self, $yesno) = @_;
1453 162 100       312 if (@_ > 1) {
1454 41 100 100     149 $yesno = 0 if ($yesno and $yesno eq 'NONE');
1455 41         71 $self->{MP5_FilerToCore} = $yesno;
1456             }
1457 162         258 $self->{MP5_FilerToCore};
1458             }
1459              
1460              
1461             =item tmp_recycling
1462              
1463             I
1464              
1465             This method is a no-op to preserve the pre-5.421 API.
1466              
1467             The tmp_recycling() feature was removed in 5.421 because it had never actually
1468             worked. Please update your code to stop using it.
1469              
1470             =cut
1471              
1472             sub tmp_recycling
1473             {
1474 1     1 1 271 return;
1475             }
1476              
1477              
1478              
1479             #------------------------------
1480              
1481             =item tmp_to_core [YESNO]
1482              
1483             I
1484             Should L create real temp files, or
1485             use fake in-core ones? Normally we allow the creation of temporary
1486             disk files, since this allows us to handle huge attachments even when
1487             core is limited.
1488              
1489             If YESNO is true, we implement new_tmpfile() via in-core handles.
1490             If YESNO is false (the default), we use real tmpfiles.
1491             With no argument, just returns the current setting.
1492              
1493             =cut
1494              
1495             sub tmp_to_core {
1496 0     0 1 0 my ($self, $yesno) = @_;
1497 0 0       0 $self->{MP5_TmpToCore} = $yesno if (@_ > 1);
1498 0         0 $self->{MP5_TmpToCore};
1499             }
1500              
1501             #------------------------------
1502              
1503             =item use_inner_files [YESNO]
1504              
1505             I.
1506              
1507             I
1508              
1509             MIME::Parser no longer supports IO::InnerFile, but this method is retained for
1510             backwards compatibility. It does nothing.
1511              
1512             The original reasoning for IO::InnerFile was that inner files were faster than
1513             "in-core" temp files. At the time, the "in-core" tempfile support was
1514             implemented with IO::Scalar from the IO-Stringy distribution, which used the
1515             tie() interface to wrap a scalar with the appropriate IO::Handle operations.
1516             The penalty for this was fairly hefty, and IO::InnerFile actually was faster.
1517              
1518             Nowadays, MIME::Parser uses Perl's built in ability to open a filehandle on an
1519             in-memory scalar variable via PerlIO. Benchmarking shows that IO::InnerFile is
1520             slightly slower than using in-memory temporary files, and is slightly faster
1521             than on-disk temporary files. Both measurements are within a few percent of
1522             each other. Since there's no real benefit, and since the IO::InnerFile abuse
1523             was fairly hairy and evil ("writes" to it were faked by extending the size of
1524             the inner file with the assumption that the only data you'd ever ->print() to
1525             it would be the line from the "outer" file, for example) it's been removed.
1526              
1527             =cut
1528              
1529             sub use_inner_files {
1530 0     0 1 0 return 0;
1531             }
1532              
1533             =back
1534              
1535             =cut
1536              
1537              
1538             #------------------------------------------------------------
1539              
1540             =head2 Specifying classes to be instantiated
1541              
1542             =over 4
1543              
1544             =cut
1545              
1546             #------------------------------
1547              
1548             =item interface ROLE,[VALUE]
1549              
1550             I
1551             During parsing, the parser normally creates instances of certain classes,
1552             like MIME::Entity. However, you may want to create a parser subclass
1553             that uses your own experimental head, entity, etc. classes (for example,
1554             your "head" class may provide some additional MIME-field-oriented methods).
1555              
1556             If so, then this is the method that your subclass should invoke during
1557             init. Use it like this:
1558              
1559             package MyParser;
1560             @ISA = qw(MIME::Parser);
1561             ...
1562             sub init {
1563             my $self = shift;
1564             $self->SUPER::init(@_); ### do my parent's init
1565             $self->interface(ENTITY_CLASS => 'MIME::MyEntity');
1566             $self->interface(HEAD_CLASS => 'MIME::MyHead');
1567             $self; ### return
1568             }
1569              
1570             With no VALUE, returns the VALUE currently associated with that ROLE.
1571              
1572             =cut
1573              
1574             sub interface {
1575 426     426 1 421 my ($self, $role, $value) = @_;
1576 426 100       731 $self->{MP5_Interface}{$role} = $value if (defined($value));
1577 426         1237 $self->{MP5_Interface}{$role};
1578             }
1579              
1580             #------------------------------
1581              
1582             =item new_body_for HEAD
1583              
1584             I
1585             Based on the HEAD of a part we are parsing, return a new
1586             body object (any desirable subclass of MIME::Body) for
1587             receiving that part's data.
1588              
1589             If you set the C option to false before parsing
1590             (the default), then we call C and create a
1591             new MIME::Body::File on that filename.
1592              
1593             If you set the C option to true before parsing,
1594             then you get a MIME::Body::InCore instead.
1595              
1596             If you want the parser to do something else entirely, you can
1597             override this method in a subclass.
1598              
1599             =cut
1600              
1601             sub new_body_for {
1602 121     121 1 132 my ($self, $head) = @_;
1603              
1604 121 100       210 if ($self->output_to_core) {
1605 27         43 $self->debug("outputting body to core");
1606 27         186 return (new MIME::Body::InCore);
1607             }
1608             else {
1609 94         166 my $outpath = $self->output_path($head);
1610 94         317 $self->debug("outputting body to disk file: $outpath");
1611 94         165 $self->filer->purgeable($outpath); ### we plan to use it
1612 94         468 return (new MIME::Body::File $outpath);
1613             }
1614             }
1615              
1616             #------------------------------
1617              
1618             =pod
1619              
1620             =back
1621              
1622             =head2 Temporary File Creation
1623              
1624             =over
1625              
1626             =item tmp_dir DIRECTORY
1627              
1628             I
1629             Causes any temporary files created by this parser to be created in the
1630             given DIRECTORY.
1631              
1632             If called without arguments, returns current value.
1633              
1634             The default value is undef, which will cause new_tmpfile() to use the
1635             system default temporary directory.
1636              
1637             =cut
1638              
1639             sub tmp_dir
1640             {
1641 98     98 1 94 my ($self, $dirname) = @_;
1642 98 50       160 if ( $dirname ) {
1643 0         0 $self->{MP5_TmpDir} = $dirname;
1644             }
1645              
1646 98         171 return $self->{MP5_TmpDir};
1647             }
1648              
1649             =item new_tmpfile
1650              
1651             I
1652             Return an IO handle to be used to hold temporary data during a parse.
1653              
1654             The default uses MIME::Tools::tmpopen() to create a new temporary file,
1655             unless L dictates otherwise, but you can
1656             override this. You shouldn't need to.
1657              
1658             The location for temporary files can be changed on a per-parser basis
1659             with L.
1660              
1661             If you do override this, make certain that the object you return is
1662             set for binmode(), and is able to handle the following methods:
1663              
1664             read(BUF, NBYTES)
1665             getline()
1666             getlines()
1667             print(@ARGS)
1668             flush()
1669             seek(0, 0)
1670              
1671             Fatal exception if the stream could not be established.
1672              
1673             =cut
1674              
1675             sub new_tmpfile {
1676 98     98 1 3390 my ($self) = @_;
1677              
1678 98         83 my $io;
1679 98 100       162 if ($self->{MP5_TmpToCore}) {
1680 1         1 my $var;
1681 1 50       5 $io = IO::File->new(\$var, '+>:') or die "$ME: Can't open in-core tmpfile: $!";
1682             } else {
1683 97         107 my $args = {};
1684 97 100       166 if( $self->tmp_dir ) {
1685 1         2 $args->{DIR} = $self->tmp_dir;
1686             }
1687 97 50       187 $io = tmpopen( $args ) or die "$ME: can't open tmpfile: $!\n";
1688 97 50       29702 binmode($io) or die "$ME: can't set to binmode: $!";
1689             }
1690 98         181 return $io;
1691             }
1692              
1693             =back
1694              
1695             =cut
1696              
1697              
1698              
1699              
1700              
1701              
1702             #------------------------------------------------------------
1703              
1704             =head2 Parse results and error recovery
1705              
1706             =over 4
1707              
1708             =cut
1709              
1710             #------------------------------
1711              
1712             =item last_error
1713              
1714             I
1715             Return the error (if any) that we ignored in the last parse.
1716              
1717             =cut
1718              
1719             sub last_error {
1720 0     0 1 0 join '', shift->results->errors;
1721             }
1722              
1723              
1724             #------------------------------
1725              
1726             =item last_head
1727              
1728             I
1729             Return the top-level MIME header of the last stream we attempted to parse.
1730             This is useful for replying to people who sent us bad MIME messages.
1731              
1732             ### Parse an input stream:
1733             eval { $entity = $parser->parse(\*STDIN) };
1734             if (!$entity) { ### parse failed!
1735             my $decapitated = $parser->last_head;
1736             ...
1737             }
1738              
1739             =cut
1740              
1741             sub last_head {
1742 0     0 1 0 shift->results->top_head;
1743             }
1744              
1745             #------------------------------
1746              
1747             =item results
1748              
1749             I
1750             Return an object containing lots of info from the last entity parsed.
1751             This will be an instance of class
1752             L.
1753              
1754             =cut
1755              
1756             sub results {
1757 657     657 1 2017 shift->{MP5_Results};
1758             }
1759              
1760              
1761             =back
1762              
1763             =cut
1764              
1765              
1766             #------------------------------
1767             1;
1768             __END__