File Coverage

blib/lib/MIME/Parser.pm
Criterion Covered Total %
statement 398 454 87.6
branch 135 206 65.5
condition 26 40 65.0
subroutine 51 62 82.2
pod 33 47 70.2
total 643 809 79.4


line stmt bran cond sub pod time code
1             package MIME::Parser;
2              
3              
4             =head1 NAME
5              
6             MIME::Parser - experimental class for parsing MIME streams
7              
8              
9             =head1 SYNOPSIS
10              
11             Before reading further, you should see L to make sure that
12             you understand where this module fits into the grand scheme of things.
13             Go on, do it now. I'll wait.
14              
15             Ready? Ok...
16              
17             =head2 Basic usage examples
18              
19             ### Create a new parser object:
20             my $parser = new MIME::Parser;
21              
22             ### Tell it where to put things:
23             $parser->output_under("/tmp");
24              
25             ### Parse an input filehandle:
26             $entity = $parser->parse(\*STDIN);
27              
28             ### Congratulations: you now have a (possibly multipart) MIME entity!
29             $entity->dump_skeleton; # for debugging
30              
31              
32             =head2 Examples of input
33              
34             ### Parse from filehandles:
35             $entity = $parser->parse(\*STDIN);
36             $entity = $parser->parse(IO::File->new("some command|");
37              
38             ### Parse from any object that supports getline() and read():
39             $entity = $parser->parse($myHandle);
40              
41             ### Parse an in-core MIME message:
42             $entity = $parser->parse_data($message);
43              
44             ### Parse an MIME message in a file:
45             $entity = $parser->parse_open("/some/file.msg");
46              
47             ### Parse an MIME message out of a pipeline:
48             $entity = $parser->parse_open("gunzip - < file.msg.gz |");
49              
50             ### Parse already-split input (as "deliver" would give it to you):
51             $entity = $parser->parse_two("msg.head", "msg.body");
52              
53              
54             =head2 Examples of output control
55              
56             ### Keep parsed message bodies in core (default outputs to disk):
57             $parser->output_to_core(1);
58              
59             ### Output each message body to a one-per-message directory:
60             $parser->output_under("/tmp");
61              
62             ### Output each message body to the same directory:
63             $parser->output_dir("/tmp");
64              
65             ### Change how nameless message-component files are named:
66             $parser->output_prefix("msg");
67              
68             ### Put temporary files somewhere else
69             $parser->tmp_dir("/var/tmp/mytmpdir");
70              
71             =head2 Examples of error recovery
72              
73             ### Normal mechanism:
74             eval { $entity = $parser->parse(\*STDIN) };
75             if ($@) {
76             $results = $parser->results;
77             $decapitated = $parser->last_head; ### get last top-level head
78             }
79              
80             ### Ultra-tolerant mechanism:
81             $parser->ignore_errors(1);
82             $entity = eval { $parser->parse(\*STDIN) };
83             $error = ($@ || $parser->last_error);
84              
85             ### Cleanup all files created by the parse:
86             eval { $entity = $parser->parse(\*STDIN) };
87             ...
88             $parser->filer->purge;
89              
90              
91             =head2 Examples of parser options
92              
93             ### Automatically attempt to RFC 2047-decode the MIME headers?
94             $parser->decode_headers(1); ### default is false
95              
96             ### Parse contained "message/rfc822" objects as nested MIME streams?
97             $parser->extract_nested_messages(0); ### default is true
98              
99             ### Look for uuencode in "text" messages, and extract it?
100             $parser->extract_uuencode(1); ### default is false
101              
102             ### Should we forgive normally-fatal errors?
103             $parser->ignore_errors(0); ### default is true
104              
105              
106             =head2 Miscellaneous examples
107              
108             ### Convert a Mail::Internet object to a MIME::Entity:
109             my $data = join('', (@{$mail->header}, "\n", @{$mail->body}));
110             $entity = $parser->parse_data(\$data);
111              
112              
113              
114             =head1 DESCRIPTION
115              
116             You can inherit from this class to create your own subclasses
117             that parse MIME streams into MIME::Entity objects.
118              
119              
120             =head1 PUBLIC INTERFACE
121              
122             =cut
123              
124             #------------------------------
125              
126             require 5.004;
127              
128             ### Pragmas:
129 19     19   2199106 use strict;
  19         48  
  19         1137  
130 19     19   161 use vars (qw($VERSION $CAT $CRLF));
  19         35  
  19         1660  
131              
132             ### core Perl modules
133 19     19   8055 use IO::File;
  19         121839  
  19         2857  
134 19     19   211 use File::Spec;
  19         52  
  19         701  
135 19     19   135 use File::Path;
  19         37  
  19         1378  
136 19     19   125 use Config qw(%Config);
  19         54  
  19         888  
137 19     19   115 use Carp;
  19         47  
  19         1550  
138              
139             ### Kit modules:
140 19     19   7965 use MIME::Tools qw(:config :utils :msgtypes usage tmpopen );
  19         69  
  19         4116  
141 19     19   9068 use MIME::Head;
  19         75  
  19         786  
142 19     19   9204 use MIME::Body;
  19         66  
  19         683  
143 19     19   12403 use MIME::Entity;
  19         91  
  19         810  
144 19     19   138 use MIME::Decoder;
  19         35  
  19         499  
145 19     19   12045 use MIME::Parser::Reader;
  19         70  
  19         844  
146 19     19   12121 use MIME::Parser::Filer;
  19         76  
  19         769  
147 19     19   9869 use MIME::Parser::Results;
  19         59  
  19         134309  
148              
149             #------------------------------
150             #
151             # Globals
152             #
153             #------------------------------
154              
155             ### The package version, both in 1.23 style *and* usable by MakeMaker:
156             $VERSION = "5.517";
157              
158             ### How to catenate:
159             $CAT = '/bin/cat';
160              
161             ### The CRLF sequence:
162             $CRLF = "\015\012";
163              
164             ### Who am I?
165             my $ME = 'MIME::Parser';
166              
167             #------------------------------------------------------------
168              
169             =head2 Construction
170              
171             =over 4
172              
173             =cut
174              
175             #------------------------------
176              
177             =item new ARGS...
178              
179             I
180             Create a new parser object.
181             Once you do this, you can then set up various parameters
182             before doing the actual parsing. For example:
183              
184             my $parser = new MIME::Parser;
185             $parser->output_dir("/tmp");
186             $parser->output_prefix("msg1");
187             my $entity = $parser->parse(\*STDIN);
188              
189             Any arguments are passed into C.
190             Don't override this in your subclasses; override init() instead.
191              
192             =cut
193              
194             sub new {
195 45     45 1 2675518 my $self = bless {}, shift;
196 45         228 $self->init(@_);
197             }
198              
199             #------------------------------
200              
201             =item init ARGS...
202              
203             I
204             Initiallize a new MIME::Parser object.
205             This is automatically sent to a new object; you may want to override it.
206             If you override this, be sure to invoke the inherited method.
207              
208             =cut
209              
210             sub init {
211 45     45 1 104 my $self = shift;
212              
213 45         219 $self->{MP5_DecodeHeaders} = 0;
214 45         128 $self->{MP5_DecodeBodies} = 1;
215 45         134 $self->{MP5_Interface} = {};
216 45         118 $self->{MP5_ParseNested} = 'NEST';
217 45         141 $self->{MP5_TmpToCore} = 0;
218 45         133 $self->{MP5_IgnoreErrors} = 1;
219 45         148 $self->{MP5_UUDecode} = 0;
220 45         108 $self->{MP5_MaxParts} = -1;
221 45         121 $self->{MP5_TmpDir} = undef;
222 45         98 $self->{MP5_AmbiguousContent} = 0;
223              
224 45         227 $self->interface(ENTITY_CLASS => 'MIME::Entity');
225 45         160 $self->interface(HEAD_CLASS => 'MIME::Head');
226              
227 45         275 $self->output_dir(".");
228              
229 45         221 $self;
230             }
231              
232             #------------------------------
233              
234             =item init_parse
235              
236             I
237             Invoked automatically whenever one of the top-level parse() methods
238             is called, to reset the parser to a "ready" state.
239              
240             =cut
241              
242             sub init_parse {
243 65     65 1 142 my $self = shift;
244              
245 65         643 $self->{MP5_Results} = new MIME::Parser::Results;
246              
247 65         417 $self->{MP5_Filer}->results($self->{MP5_Results});
248 65         470 $self->{MP5_Filer}->purgeable([]);
249 65         322 $self->{MP5_Filer}->init_parse();
250 65         219 $self->{MP5_NumParts} = 0;
251 65         154 $self->{MP5_AmbiguousContent} = 0;
252 65         140 1;
253             }
254              
255             =back
256              
257             =cut
258              
259              
260              
261              
262              
263             #------------------------------------------------------------
264              
265             =head2 Altering how messages are parsed
266              
267             =over 4
268              
269             =cut
270              
271             #------------------------------
272              
273             =item decode_headers [YESNO]
274              
275             I
276             Controls whether the parser will attempt to decode all the MIME headers
277             (as per RFC 2047) the moment it sees them. B
278             for two very important reasons:>
279              
280             =over
281              
282             =item *
283              
284             B
285             If you fully decode the headers into bytes, you can inadvertently
286             transform a parseable MIME header like this:
287              
288             Content-type: text/plain; filename="=?ISO-8859-1?Q?Hi=22Ho?="
289              
290             into unparseable gobbledygook; in this case:
291              
292             Content-type: text/plain; filename="Hi"Ho"
293              
294             =item *
295              
296             B An encoded string which contains
297             both Latin-1 and Cyrillic characters will be turned into a binary
298             mishmosh which simply can't be rendered.
299              
300             =back
301              
302             B
303             This method was once the only out-of-the-box way to deal with attachments
304             whose filenames had non-ASCII characters. However, since MIME-tools 5.4xx
305             this is no longer necessary.
306              
307             B
308             If YESNO is true, decoding is done. However, you will get a warning
309             unless you use one of the special "true" values:
310              
311             "I_NEED_TO_FIX_THIS"
312             Just shut up and do it. Not recommended.
313             Provided only for those who need to keep old scripts functioning.
314              
315             "I_KNOW_WHAT_I_AM_DOING"
316             Just shut up and do it. Not recommended.
317             Provided for those who REALLY know what they are doing.
318              
319             If YESNO is false (the default), no attempt at decoding will be done.
320             With no argument, just returns the current setting.
321             B you can always decode the headers I the parsing
322             has completed (see L), or
323             decode the words on demand (see L).
324              
325             =cut
326              
327             sub decode_headers {
328 0     0 1 0 my ($self, $yesno) = @_;
329 0 0       0 if (@_ > 1) {
330 0         0 $self->{MP5_DecodeHeaders} = $yesno;
331 0 0       0 if ($yesno) {
332 0 0 0     0 if (($yesno eq "I_KNOW_WHAT_I_AM_DOING") ||
333             ($yesno eq "I_NEED_TO_FIX_THIS")) {
334             ### ok
335             }
336             else {
337 0         0 $self->whine("as of 5.4xx, decode_headers() should NOT be ".
338             "set true... if you are doing this to make sure ".
339             "that non-ASCII filenames are translated, ".
340             "that's now done automatically; for all else, ".
341             "use MIME::Words.");
342             }
343             }
344             }
345 0         0 $self->{MP5_DecodeHeaders};
346             }
347              
348             #------------------------------
349              
350             =item extract_nested_messages OPTION
351              
352             I
353             Some MIME messages will contain a part of type C
354             ,C or C:
355             literally, the text of an embedded mail/news/whatever message.
356             This option controls whether (and how) we parse that embedded message.
357              
358             If the OPTION is false, we treat such a message just as if it were a
359             C document, without attempting to decode its contents.
360              
361             If the OPTION is true (the default), the body of the C
362             or C part is parsed by this parser, creating an
363             entity object. What happens then is determined by the actual OPTION:
364              
365             =over 4
366              
367             =item NEST or 1
368              
369             The default setting.
370             The contained message becomes the sole "part" of the C
371             entity (as if the containing message were a special kind of
372             "multipart" message).
373             You can recover the sub-entity by invoking the L
374             method on the C entity.
375              
376             =item REPLACE
377              
378             The contained message replaces the C entity, as though
379             the C "container" never existed.
380              
381             B notice that, with this option, all the header information
382             in the C header is lost. This might seriously bother
383             you if you're dealing with a top-level message, and you've just lost
384             the sender's address and the subject line. C<:-/>.
385              
386             =back
387              
388             I
389              
390             =cut
391              
392             sub extract_nested_messages {
393 51     51 1 221 my ($self, $option) = @_;
394 51 100       170 $self->{MP5_ParseNested} = $option if (@_ > 1);
395 51         176 $self->{MP5_ParseNested};
396             }
397              
398             sub parse_nested_messages {
399 0     0 0 0 usage "parse_nested_messages() is now extract_nested_messages()";
400 0         0 shift->extract_nested_messages(@_);
401             }
402              
403             #------------------------------
404              
405             =item extract_uuencode [YESNO]
406              
407             I
408             If set true, then whenever we are confronted with a message
409             whose effective content-type is "text/plain" and whose encoding
410             is 7bit/8bit/binary, we scan the encoded body to see if it contains
411             uuencoded data (generally given away by a "begin XXX" line).
412              
413             If it does, we explode the uuencoded message into a multipart,
414             where the text before the first "begin XXX" becomes the first part,
415             and all "begin...end" sections following become the subsequent parts.
416             The filename (if given) is accessible through the normal means.
417              
418             =cut
419              
420             sub extract_uuencode {
421 299     299 1 758 my ($self, $yesno) = @_;
422 299 100       668 $self->{MP5_UUDecode} = $yesno if @_ > 1;
423 299         1146 $self->{MP5_UUDecode};
424             }
425              
426             #------------------------------
427              
428             =item ignore_errors [YESNO]
429              
430             I
431             Controls whether the parser will attempt to ignore normally-fatal
432             errors, treating them as warnings and continuing with the parse.
433              
434             If YESNO is true (the default), many syntax errors are tolerated.
435             If YESNO is false, fatal errors throw exceptions.
436             With no argument, just returns the current setting.
437              
438             =cut
439              
440             sub ignore_errors {
441 25     25 1 133 my ($self, $yesno) = @_;
442 25 50       76 $self->{MP5_IgnoreErrors} = $yesno if (@_ > 1);
443 25         54 $self->{MP5_IgnoreErrors};
444             }
445              
446              
447             #------------------------------
448              
449             =item decode_bodies [YESNO]
450              
451             I
452             Controls whether the parser should decode entity bodies or not.
453             If this is set to a false value (default is true), all entity bodies
454             will be kept as-is in the original content-transfer encoding.
455              
456             To prevent double encoding on the output side MIME::Body->is_encoded
457             is set, which tells MIME::Body not to encode the data again, if encoded
458             data was requested. This is in particular useful, when it's important that
459             the content B be modified, e.g. if you want to calculate
460             OpenPGP signatures from it.
461              
462             B: the semantics change significantly if you parse MIME
463             messages with this option set, because MIME::Entity resp. MIME::Body
464             *always* see encoded data now, while the default behaviour is
465             working with *decoded* data (and encoding it only if you request it).
466             You need to decode the data yourself, if you want to have it decoded.
467              
468             So use this option only if you exactly know, what you're doing, and
469             that you're sure, that you really need it.
470              
471             =cut
472              
473             sub decode_bodies {
474 310     310 1 1127 my ($self, $yesno) = @_;
475 310 100       743 $self->{MP5_DecodeBodies} = $yesno if (@_ > 1);
476 310         1238 $self->{MP5_DecodeBodies};
477             }
478              
479             #------------------------------
480             #
481             # MESSAGES...
482             #
483              
484             #------------------------------
485             #
486             # debug MESSAGE...
487             #
488             sub debug {
489 1489     1489 0 2411 my $self = shift;
490 1489 50       6677 if (MIME::Tools->debugging()) {
491 0 0       0 if (my $r = $self->{MP5_Results}) {
492 0         0 unshift @_, $r->indent;
493 0         0 $r->msg($M_DEBUG, @_);
494             }
495 0         0 MIME::Tools::debug(@_);
496             }
497             }
498              
499             #------------------------------
500             #
501             # whine PROBLEM...
502             #
503             sub whine {
504 6     6 0 27 my $self = shift;
505 6 50       22 if (my $r = $self->{MP5_Results}) {
506 6         24 unshift @_, $r->indent;
507 6         20 $r->msg($M_WARNING, @_);
508             }
509 6         28 &MIME::Tools::whine(@_);
510             }
511              
512             #------------------------------
513             #
514             # error PROBLEM...
515             #
516             # Possibly-forgivable parse error occurred.
517             # Raises a fatal exception unless we are ignoring errors.
518             #
519             sub error {
520 5     5 1 10 my $self = shift;
521 5 50       82 if (my $r = $self->{MP5_Results}) {
522 5         21 unshift @_, $r->indent;
523 5         17 $r->msg($M_ERROR, @_);
524             }
525 5         38 &MIME::Tools::error(@_);
526 5 50       48 $self->{MP5_IgnoreErrors} ? return undef : die @_;
527             }
528              
529              
530              
531              
532             #------------------------------
533             #
534             # PARSING...
535             #
536              
537             #------------------------------
538             #
539             # process_preamble IN, READER, ENTITY
540             #
541             # I
542             # Dispose of a multipart message's preamble.
543             #
544             sub process_preamble {
545 51     51 0 166 my ($self, $in, $rdr, $ent) = @_;
546              
547             ### Sanity:
548 51 50       206 ($rdr->depth > 0) or die "$ME: internal logic error";
549              
550             ### Parse preamble:
551 51         157 my @saved;
552 51         127 my $data = '';
553 51 50       669 open(my $fh, '>', \$data) or die $!;
554 51         248 $rdr->read_chunk($in, $fh, 1);
555 51         163 close $fh;
556              
557             # Ugh. Horrible. If the preamble consists only of CRLF, squash it down
558             # to the empty string. Else, remove the trailing CRLF.
559 51 100       183 if( $data =~ m/^[\r\n]\z/ ) {
560 2         9 @saved = ('');
561             } else {
562 49         175 $data =~ s/[\r\n]\z//;
563 49         233 @saved = split(/^/, $data);
564             }
565 51         347 $ent->preamble(\@saved);
566 51         230 1;
567             }
568              
569             #------------------------------
570             #
571             # process_epilogue IN, READER, ENTITY
572             #
573             # I
574             # Dispose of a multipart message's epilogue.
575             #
576             sub process_epilogue {
577 49     49 0 190 my ($self, $in, $rdr, $ent) = @_;
578 49         225 $self->debug("process_epilogue");
579              
580             ### Parse epilogue:
581 49         94 my @saved;
582 49         279 $rdr->read_lines($in, \@saved);
583 49         263 $ent->epilogue(\@saved);
584 49         104 1;
585             }
586              
587             #------------------------------
588             #
589             # process_to_bound IN, READER, OUT
590             #
591             # I
592             # Dispose of the next chunk into the given output stream OUT.
593             #
594             sub process_to_bound {
595 117     117 0 308 my ($self, $in, $rdr, $out) = @_;
596              
597             ### Parse:
598 117         603 $rdr->read_chunk($in, $out);
599 117         218 1;
600             }
601              
602             #------------------------------
603             #
604             # process_header IN, READER
605             #
606             # I
607             # Process and return the next header.
608             # Return undef if, instead of a header, the encapsulation boundary is found.
609             # Fatal exception on failure.
610             #
611             sub process_header {
612 203     203 0 646 my ($self, $in, $rdr) = @_;
613 203         672 $self->debug("process_header");
614              
615             ### Parse and save the (possibly empty) header, up to and including the
616             ### blank line that terminates it:
617 203         603 my $head = $self->interface('HEAD_CLASS')->new;
618              
619             ### Read the lines of the header.
620             ### We localize IO inside here, so that we can support the IO:: interface
621 203         7147 my @headlines;
622 203         842 my $hdr_rdr = $rdr->spawn;
623 203         752 $hdr_rdr->add_terminator("");
624 203         626 $hdr_rdr->add_terminator("\r"); ### sigh
625              
626 203         411 my $headstr = '';
627 203 50       3181 open(my $outfh, '>:scalar', \$headstr) or die $!;
628 203         1007 $hdr_rdr->read_chunk($in, $outfh, 0, 1);
629 203         587 close $outfh;
630              
631             ### How did we do?
632 203 100       640 if ($hdr_rdr->eos_type eq 'DELIM') {
633 2         6 $self->whine("bogus part, without CRLF before body");
634 2         14 return undef;
635             }
636 201 100       566 ($hdr_rdr->eos_type eq 'DONE') or
637             $self->error("unexpected end of header\n");
638              
639              
640             ### If header line begins with a UTF-8 Byte-Order mark, remove it.
641 201         539 $headstr =~ s/^\x{EF}\x{BB}\x{BF}//;
642              
643             ### Extract the header (note that zero-size headers are admissible!):
644 201 50       2311 open(my $readfh, '<:scalar', \$headstr) or die $!;
645 201         1133 $head->read( $readfh );
646              
647 201 50       132341 unless( $readfh->eof() ) {
648             # Not entirely correct, since ->read consumes the line it gives up on.
649             # it's actually the line /before/ the one we get with ->getline
650 0         0 $self->error("couldn't parse head; error near:\n", $readfh->getline());
651             }
652              
653              
654             ### If desired, auto-decode the header as per RFC 2047
655             ### This shouldn't affect non-encoded headers; however, it will decode
656             ### headers with international characters. WARNING: currently, the
657             ### character-set information is LOST after decoding.
658 201 50       2261 $head->decode($self->{MP5_DecodeHeaders}) if $self->{MP5_DecodeHeaders};
659              
660             ### If this is the top-level head, save it:
661 201 100       724 $self->results->top_head($head) if !$self->results->top_head;
662              
663 201         2759 return $head;
664             }
665              
666             #------------------------------
667             #
668             # process_multipart IN, READER, ENTITY
669             #
670             # I
671             # Process the multipart body, and return the state.
672             # Fatal exception on failure.
673             # Invoked by process_part().
674             #
675             sub process_multipart {
676 51     51 0 172 my ($self, $in, $rdr, $ent) = @_;
677 51         265 my $head = $ent->head;
678              
679 51         173 $self->debug("process_multipart...");
680              
681             ### Get actual type and subtype from the header:
682 51         162 my ($type, $subtype) = (split('/', $head->mime_type, -1), '');
683              
684             ### If this was a type "multipart/digest", then the RFCs say we
685             ### should default the parts to have type "message/rfc822".
686             ### Thanks to Carsten Heyl for suggesting this...
687 51 100       228 my $retype = (($subtype eq 'digest') ? 'message/rfc822' : '');
688              
689             ### Get the boundaries for the parts:
690 51         214 my $bound = $head->multipart_boundary;
691 51 50 33     403 if (!defined($bound) || ($bound =~ /[\r\n]/)) {
692 0         0 $self->error("multipart boundary is missing, or contains CR or LF\n");
693 0         0 $ent->effective_type("application/x-unparseable-multipart");
694 0         0 return $self->process_singlepart($in, $rdr, $ent);
695             }
696 51         231 my $part_rdr = $rdr->spawn->add_boundary($bound);
697              
698             ### Prepare to parse:
699 51         124 my $eos_type;
700             my $more_parts;
701              
702             ### Parse preamble...
703 51         243 $self->process_preamble($in, $part_rdr, $ent);
704              
705             ### ...and look at how we finished up:
706 51         192 $eos_type = $part_rdr->eos_type;
707 51 100       155 if ($eos_type eq 'DELIM'){ $more_parts = 1 }
  50 50       94  
708 1         5 elsif ($eos_type eq 'CLOSE'){ $self->whine("empty multipart message\n");
709 1         3 $more_parts = 0; }
710 0         0 else { $self->error("unexpected end of preamble\n");
711 0         0 return 1; }
712              
713             ### Parse parts:
714 51         93 my $partno = 0;
715 51         99 my $part;
716 51         173 while ($more_parts) {
717 125         222 ++$partno;
718 125         573 $self->debug("parsing part $partno...");
719              
720             ### Parse the next part, and add it to the entity...
721 125         616 my $part = $self->process_part($in, $part_rdr, Retype=>$retype);
722 125 50       348 return undef unless defined($part);
723              
724 125         611 $ent->add_part($part);
725              
726             ### ...and look at how we finished up:
727 125         741 $eos_type = $part_rdr->eos_type;
728 125 100       472 if ($eos_type eq 'DELIM') { $more_parts = 1 }
  75 100       237  
729 48         270 elsif ($eos_type eq 'CLOSE') { $more_parts = 0; }
730 2         22 else { $self->error("unexpected end of parts ".
731             "before epilogue\n");
732 2         18 return 1; }
733             }
734              
735             ### Parse epilogue...
736             ### (note that we use the *parent's* reader here, which does not
737             ### know about the boundaries in this multipart!)
738 49         250 $self->process_epilogue($in, $rdr, $ent);
739              
740             ### ...and there's no need to look at how we finished up!
741 49         451 1;
742             }
743              
744             #------------------------------
745             #
746             # process_singlepart IN, READER, ENTITY
747             #
748             # I
749             # Process the singlepart body. Returns true.
750             # Fatal exception on failure.
751             # Invoked by process_part().
752             #
753             sub process_singlepart {
754 137     137 0 382 my ($self, $in, $rdr, $ent) = @_;
755 137         535 my $head = $ent->head;
756              
757 137         396 $self->debug("process_singlepart...");
758              
759             ### Obtain a filehandle for reading the encoded information:
760             ### We have two different approaches, based on whether or not we
761             ### have to contend with boundaries.
762 137         221 my $ENCODED; ### handle
763 137   100     519 my $can_shortcut = (!$rdr->has_bounds and !$self->{MP5_UUDecode});
764 137 100       380 if ($can_shortcut) {
765 20         96 $self->debug("taking shortcut");
766              
767 20         33 $ENCODED = $in;
768 20         106 $rdr->eos('EOF'); ### be sure to bogus-up the reader state to EOF:
769             }
770             else {
771              
772 117         350 $self->debug("using temp file");
773 117         461 $ENCODED = $self->new_tmpfile();
774              
775             ### Read encoded body until boundary (or EOF)...
776 117         573 $self->process_to_bound($in, $rdr, $ENCODED);
777              
778             ### ...and look at how we finished up.
779             ### If we have bounds, we want DELIM or CLOSE.
780             ### Otherwise, we want EOF (and that's all we'd get, anyway!).
781 117 100       416 if ($rdr->has_bounds) {
782 116 100       439 ($rdr->eos_type =~ /^(DELIM|CLOSE)$/) or
783             $self->error("part did not end with expected boundary\n");
784             }
785              
786             ### Flush and rewind encoded buffer, so we can read it:
787 117 50       7585 $ENCODED->flush or die "$ME: can't flush: $!";
788 117 50       1016 $ENCODED->seek(0, 0) or die "$ME: can't seek: $!";
789             }
790              
791             ### Get a content-decoder to decode this part's encoding:
792 137         1945 my $encoding = $head->mime_encoding;
793 137         1139 my $decoder = new MIME::Decoder $encoding;
794 137 50       502 if (!$decoder) {
795 0         0 $self->whine("Unsupported encoding '$encoding': using 'binary'... \n".
796             "The entity will have an effective MIME type of \n".
797             "application/octet-stream."); ### as per RFC-2045
798 0         0 $ent->effective_type('application/octet-stream');
799 0         0 $decoder = new MIME::Decoder 'binary';
800 0         0 $encoding = 'binary';
801             }
802              
803             ### Data should be stored encoded / as-is?
804 137 100       530 if ( !$self->decode_bodies ) {
805 7         25 $decoder = new MIME::Decoder 'binary';
806 7         18 $encoding = 'binary';
807             }
808              
809             ### If desired, sidetrack to troll for UUENCODE:
810 137         504 $self->debug("extract uuencode? ", $self->extract_uuencode);
811 137         454 $self->debug("encoding? ", $encoding);
812 137         650 $self->debug("effective type? ", $ent->effective_type);
813              
814 137 50 66     427 if ($self->extract_uuencode and
      66        
815             ($encoding =~ /^(7bit|8bit|binary)\Z/) and
816             ($ent->effective_type =~
817             m{^(?:text/plain|application/mac-binhex40|application/mac-binhex)\Z})) {
818             ### Hunt for it:
819 3         7 my $uu_ent = eval { $self->hunt_for_uuencode($ENCODED, $ent) };
  3         9  
820 3 100       11 if ($uu_ent) { ### snark
821 2         9 %$ent = %$uu_ent;
822 2         13 return 1;
823             }
824             else { ### boojum
825 1         4 $self->whine("while hunting for uuencode: $@");
826 1 50       5 $ENCODED->seek(0,0) or die "$ME: can't seek: $!";
827             }
828             }
829              
830             ### Open a new bodyhandle for outputting the data:
831 135 50       491 my $body = $self->new_body_for($head) or die "$ME: no body"; # gotta die
832 135 100 50     547 $body->binmode(1) or die "$ME: can't set to binmode: $!"
      100        
833             unless textual_type($ent->effective_type) or !$self->decode_bodies;
834 135 100       469 $body->is_encoded(1) if !$self->decode_bodies;
835              
836             ### Decode and save the body (using the decoder):
837 135 50       578 my $DECODED = $body->open("w") or die "$ME: body not opened: $!";
838 135         6259 eval { $decoder->decode($ENCODED, $DECODED); };
  135         847  
839 135 50       397 $@ and $self->error($@);
840 135 50       580 $DECODED->close or die "$ME: can't close: $!";
841              
842             ### Success! Remember where we put stuff:
843 135         6100 $ent->bodyhandle($body);
844              
845             ### Done!
846 135         1367 1;
847             }
848              
849             #------------------------------
850             #
851             # hunt_for_uuencode ENCODED, ENTITY
852             #
853             # I
854             # Try to detect and dispatch embedded uuencode as a fake multipart message.
855             # Returns new entity or undef.
856             #
857             sub hunt_for_uuencode {
858 3     3 0 7 my ($self, $ENCODED, $ent) = @_;
859 3         4 my ($good, $how_encoded);
860 3         3 local $_;
861 3         8 $self->debug("sniffing around for UUENCODE");
862              
863             ### Heuristic:
864 3 50       10 $ENCODED->seek(0,0) or die "$ME: can't seek: $!";
865 3         111 while (defined($_ = $ENCODED->getline)) {
866 18 100       38 if ($good = /^begin [0-7]{3}/) {
867 2         3 $how_encoded = 'uu';
868 2         4 last;
869             }
870 16 50       37 if ($good = /^\(This file must be converted with/i) {
871 0         0 $how_encoded = 'binhex';
872 0         0 last;
873             }
874             }
875 3 100       18 $good or do { $self->debug("no one made the cut"); return 0 };
  1         7  
  1         3  
876              
877             # If a decoder doesn't exist for this type, forget it!
878 2 50       12 my $decoder = MIME::Decoder->new(($how_encoded eq 'uu')?'x-uuencode'
879             :'binhex');
880 2 50       6 unless (defined($decoder)) {
881 0         0 $self->debug("No decoder for $how_encoded attachments");
882 0         0 return 0;
883             }
884              
885             ### New entity:
886 2         10 my $top_ent = $ent->dup; ### no data yet
887 2         9 $top_ent->make_multipart;
888 2         2 my @parts;
889              
890             ### Made the first cut; on to the real stuff:
891 2 50       8 $ENCODED->seek(0,0) or die "$ME: can't seek: $!";
892 2         50 $self->whine("Found a $how_encoded attachment");
893 2         3 my $pre;
894 2         4 while (1) {
895 6         12 my $bin_data = '';
896              
897             ### Try next part:
898 6         34 my $out = IO::File->new(\$bin_data, '>:');
899 6 100       284 eval { $decoder->decode($ENCODED, $out) }; last if $@;
  6         26  
  6         16  
900 4         10 my $preamble = $decoder->last_preamble;
901 4         8 my $filename = $decoder->last_filename;
902 4         9 my $mode = $decoder->last_mode;
903              
904             ### Get probable type:
905 4         5 my $type = 'application/octet-stream';
906 4   50     19 my ($ext) = $filename =~ /\.(\w+)\Z/; $ext = lc($ext || '');
  4         11  
907 4 50       13 if ($ext =~ /^(gif|jpe?g|xbm|xpm|png)\Z/) { $type = "image/$1" }
  4         12  
908              
909             ### If we got our first preamble, create the text portion:
910 4 100 66     37 if (@$preamble and
      66        
911             (grep /\S/, @$preamble) and
912             !@parts) {
913 2         7 my $txt_ent = $self->interface('ENTITY_CLASS')->new;
914              
915 2         9 MIME::Entity->build(Type => "text/plain",
916             Data => "");
917 2         5 $txt_ent->bodyhandle($self->new_body_for($txt_ent->head));
918 2 50       5 my $io = $txt_ent->bodyhandle->open("w") or die "$ME: can't create: $!";
919 2 50       7 $io->print(@$preamble) or die "$ME: can't print: $!";
920 2 50       38 $io->close or die "$ME: can't close: $!";
921 2         98 push @parts, $txt_ent;
922             }
923              
924             ### Create the attachment:
925             ### We use the x-unix-mode convention from "dtmail 1.2.1 SunOS 5.6".
926 4         6 if (1) {
927 4         30 my $bin_ent = MIME::Entity->build(Type=>$type,
928             Filename=>$filename,
929             Data=>"");
930 4         13 $bin_ent->head->mime_attr('Content-type.x-unix-mode' => "0$mode");
931 4         11 $bin_ent->bodyhandle($self->new_body_for($bin_ent->head));
932 4 50       7 $bin_ent->bodyhandle->binmode(1) or die "$ME: can't set to binmode: $!";
933 4 50       9 my $io = $bin_ent->bodyhandle->open("w") or die "$ME: can't create: $!";
934 4 50       12 $io->print($bin_data) or die "$ME: can't print: $!";
935 4 50       83 $io->close or die "$ME: can't close: $!";
936 4         242 push @parts, $bin_ent;
937             }
938             }
939              
940             ### Did we get anything?
941 2 50       6 @parts or return undef;
942             ### Set the parts and a nice preamble:
943 2         10 $top_ent->parts(\@parts);
944 2         9 $top_ent->preamble
945             (["The following is a multipart MIME message which was extracted\n",
946             "from a $how_encoded-encoded message.\n"]);
947 2         16 $top_ent;
948             }
949              
950             #------------------------------
951             #
952             # process_message IN, READER, ENTITY
953             #
954             # I
955             # Process the singlepart body, and return true.
956             # Fatal exception on failure.
957             # Invoked by process_part().
958             #
959             sub process_message {
960 13     13 0 38 my ($self, $in, $rdr, $ent) = @_;
961 13         72 my $head = $ent->head;
962              
963 13         58 $self->debug("process_message");
964              
965             ### Verify the encoding restrictions:
966 13         45 my $encoding = $head->mime_encoding;
967 13 50       141 if ($encoding !~ /^(7bit|8bit|binary)$/) {
968 0         0 $self->error("illegal encoding [$encoding] for MIME type ".
969             $head->mime_type."\n");
970 0         0 $encoding = 'binary';
971             }
972              
973             ### Parse the message:
974 13         77 my $msg = $self->process_part($in, $rdr);
975 13 50       54 return undef unless defined($msg);
976              
977             ### How to handle nested messages?
978 13 100       44 if ($self->extract_nested_messages eq 'REPLACE') {
979 1         5 %$ent = %$msg; ### shallow replace
980 1         2 %$msg = ();
981             }
982             else { ### "NEST" or generic 1:
983 12         59 $ent->bodyhandle(undef);
984 12         43 $ent->add_part($msg);
985             }
986 13         79 1;
987             }
988              
989             #------------------------------
990             #
991             # process_part IN, READER, [OPTSHASH...]
992             #
993             # I
994             # The real back-end engine.
995             # See the documentation up top for the overview of the algorithm.
996             # The OPTSHASH can contain:
997             #
998             # Retype => retype this part to the given content-type
999             #
1000             # Return the entity.
1001             # Fatal exception on failure. Returns undef if message to complex
1002             #
1003             sub process_part {
1004 203     203 0 807 my ($self, $in, $rdr, %p) = @_;
1005              
1006 203 50       655 if ($self->{MP5_MaxParts} > 0) {
1007 0         0 $self->{MP5_NumParts}++;
1008 0 0       0 if ($self->{MP5_NumParts} > $self->{MP5_MaxParts}) {
1009             # Return UNDEF if msg too complex
1010 0         0 return undef;
1011             }
1012             }
1013              
1014 203   66     1304 $rdr ||= MIME::Parser::Reader->new;
1015             #debug "process_part";
1016 203         595 $self->results->level(+1);
1017              
1018             ### Create a new entity:
1019 203         690 my $ent = $self->interface('ENTITY_CLASS')->new;
1020              
1021             ### Parse and add the header:
1022 203         850 my $head = $self->process_header($in, $rdr);
1023 203 100       744 if (not defined $head) {
1024 2         4 $self->debug("bogus empty part");
1025 2         13 $head = $self->interface('HEAD_CLASS')->new;
1026 2         65 $head->mime_type('text/plain');
1027 2         8 $ent->head($head);
1028 2         4 $ent->bodyhandle($self->new_body_for($head));
1029 2 50       3 $ent->bodyhandle->open("w")->close or die "$ME: can't close: $!";
1030 2 50       33 if (!$self->{MP5_AmbiguousContent}) {
1031 2 50       19 if ($ent->head->ambiguous_content) {
1032 0         0 $self->{MP5_AmbiguousContent} = 1;
1033             }
1034             }
1035 2         93 $self->results->level(-1);
1036 2         4 return $ent;
1037             }
1038 201         955 $ent->head($head);
1039              
1040             ### Tweak the content-type based on context from our parent...
1041             ### For example, multipart/digest messages default to type message/rfc822:
1042 201 100       701 $head->mime_type($p{Retype}) if $p{Retype};
1043              
1044             # We have the header, so that's enough to check for
1045             # ambiguous content...
1046 201 100       874 if (!$self->{MP5_AmbiguousContent}) {
1047 195 100       697 if ($ent->head->ambiguous_content) {
1048 9         35 $self->{MP5_AmbiguousContent} = 1;
1049             }
1050             }
1051             ### Get the MIME type and subtype:
1052 201         846 my ($type, $subtype) = (split('/', $head->mime_type, -1), '');
1053 201         1060 $self->debug("type = $type, subtype = $subtype");
1054              
1055             ### Handle, according to the MIME type:
1056 201 100 100     1849 if ($type eq 'multipart') {
    100 33        
1057 51 50       264 return undef unless defined($self->process_multipart($in, $rdr, $ent));
1058             }
1059             elsif (("$type/$subtype" eq "message/rfc822" ||
1060             "$type/$subtype" eq "message/external-body" ||
1061             ("$type/$subtype" eq "message/partial" && defined($head->mime_attr("content-type.number")) && $head->mime_attr("content-type.number") == 1)) &&
1062             $self->extract_nested_messages) {
1063 13         41 $self->debug("attempting to process a nested message");
1064 13 50       55 return undef unless defined($self->process_message($in, $rdr, $ent));
1065             }
1066             else {
1067 137         531 $self->process_singlepart($in, $rdr, $ent);
1068             }
1069              
1070             ### Done (we hope!):
1071 201         59168 $self->results->level(-1);
1072 201         948 return $ent;
1073             }
1074              
1075              
1076              
1077             =back
1078              
1079             =head2 Parsing an input source
1080              
1081             =over 4
1082              
1083             =cut
1084              
1085             #------------------------------
1086              
1087             =item parse_data DATA
1088              
1089             I
1090             Parse a MIME message that's already in core. This internally creates an "in
1091             memory" filehandle on a Perl scalar value using PerlIO
1092              
1093             You may supply the DATA in any of a number of ways...
1094              
1095             =over 4
1096              
1097             =item *
1098              
1099             B which holds the message. A reference to this scalar will be used
1100             internally.
1101              
1102             =item *
1103              
1104             B which holds the message. This reference will be used
1105             internally.
1106              
1107             =item *
1108              
1109             B
1110              
1111             B The array is internally concatenated into a
1112             temporary string, and a reference to the new string is used internally.
1113              
1114             It is much more efficient to pass in a scalar reference, so please consider
1115             refactoring your code to use that interface instead. If you absolutely MUST
1116             pass an array, you may be better off using IO::ScalarArray in the calling code
1117             to generate a filehandle, and passing that filehandle to I
1118              
1119             =back
1120              
1121             Returns the parsed MIME::Entity on success.
1122              
1123             =cut
1124              
1125             sub parse_data {
1126 11     11 1 1378 my ($self, $data) = @_;
1127              
1128 11 50       36 if (!defined($data)) {
1129 0         0 croak "parse_data: No data passed";
1130             }
1131              
1132             ### Get data as a scalar:
1133 11         18 my $io;
1134              
1135 11 100       44 if (! ref $data ) {
    100          
    50          
1136 9         75 $io = IO::File->new(\$data, '<:');
1137             } elsif( ref $data eq 'SCALAR' ) {
1138 1         6 $io = IO::File->new($data, '<:');
1139             } elsif( ref $data eq 'ARRAY' ) {
1140             # Passing arrays is deprecated now that we've nuked IO::ScalarArray
1141             # but for backwards compatibility we still support it by joining the
1142             # array lines to a scalar and doing scalar IO on it.
1143 1         4 my $tmp_data = join('', @$data);
1144 1         7 $io = IO::File->new(\$tmp_data, '<:');
1145             } else {
1146 0         0 croak "parse_data: wrong argument ref type: ", ref($data);
1147             }
1148              
1149 11 50       765 if (!$io) {
1150 0         0 croak "parse_data: unable to open in-memory file handle";
1151             }
1152              
1153             ### Parse!
1154 11         50 return $self->parse($io);
1155             }
1156              
1157             #------------------------------
1158              
1159             =item parse INSTREAM
1160              
1161             I
1162             Takes a MIME-stream and splits it into its component entities.
1163              
1164             The INSTREAM can be given as an IO::File, a globref filehandle (like
1165             C<\*STDIN>), or as I blessed object conforming to the IO::
1166             interface (which minimally implements getline() and read()).
1167              
1168             Returns the parsed MIME::Entity on success.
1169             Throws exception on failure. If the message contained too many
1170             parts (as set by I), returns undef.
1171              
1172             =cut
1173              
1174             sub parse {
1175 65     65 1 5290 my $self = shift;
1176 65         140 my $in = shift;
1177 65         168 my $entity;
1178 65         408 local $/ = "\n"; ### just to be safe
1179              
1180 65         257 local $\ = undef; # CPAN ticket #71041
1181 65         329 $self->init_parse;
1182 65         265 $entity = $self->process_part($in, undef); ### parse!
1183              
1184 65         707 $entity;
1185             }
1186              
1187             ### Backcompat:
1188             sub read {
1189 0     0 1 0 shift->parse(@_);
1190             }
1191             sub parse_FH {
1192 0     0 0 0 shift->parse(@_);
1193             }
1194              
1195             #------------------------------
1196              
1197             =item parse_open EXPR
1198              
1199             I
1200             Convenience front-end onto C.
1201             Simply give this method any expression that may be sent as the second
1202             argument to open() to open a filehandle for reading.
1203              
1204             Returns the parsed MIME::Entity on success.
1205             Throws exception on failure.
1206              
1207             =cut
1208              
1209             sub parse_open {
1210 41     41 1 9029 my ($self, $expr) = @_;
1211 41         85 my $ent;
1212              
1213 41 50       437 my $io = IO::File->new($expr) or die "$ME: couldn't open $expr: $!";
1214 41         5581 $ent = $self->parse($io);
1215 41 50       175 $io->close or die "$ME: can't close: $!";
1216 41         2846 $ent;
1217             }
1218              
1219             ### Backcompat:
1220             sub parse_in {
1221 0     0 0 0 usage "parse_in() is now parse_open()";
1222 0         0 shift->parse_open(@_);
1223             }
1224              
1225             #------------------------------
1226              
1227             =item parse_two HEADFILE, BODYFILE
1228              
1229             I
1230             Convenience front-end onto C, intended for programs
1231             running under mail-handlers like B, which splits the incoming
1232             mail message into a header file and a body file.
1233             Simply give this method the paths to the respective files.
1234              
1235             B it is assumed that, once the files are cat'ed together,
1236             there will be a blank line separating the head part and the body part.
1237              
1238             B new implementation slurps files into line array
1239             for portability, instead of using 'cat'. May be an issue if
1240             your messages are large.
1241              
1242             Returns the parsed MIME::Entity on success.
1243             Throws exception on failure.
1244              
1245             =cut
1246              
1247             sub parse_two {
1248 1     1 1 7 my ($self, $headfile, $bodyfile) = @_;
1249 1         2 my $data;
1250 1         2 foreach ($headfile, $bodyfile) {
1251 2 50       120 open IN, "<$_" or die "$ME: open $_: $!";
1252 2         9 $data .= do { local $/; };
  2         8  
  2         78  
1253 2 50       30 close IN or die "$ME: can't close: $!";
1254             }
1255 1         5 return $self->parse_data($data);
1256             }
1257              
1258             =back
1259              
1260             =cut
1261              
1262              
1263              
1264              
1265             #------------------------------------------------------------
1266              
1267             =head2 Specifying output destination
1268              
1269             B in 5.212 and before, this was done by methods
1270             of MIME::Parser. However, since many users have requested
1271             fine-tuned control over how this is done, the logic has been split
1272             off from the parser into its own class, MIME::Parser::Filer
1273             Every MIME::Parser maintains an instance of a MIME::Parser::Filer
1274             subclass to manage disk output (see L for details.)
1275              
1276             The benefit to this is that the MIME::Parser code won't be
1277             confounded with a lot of garbage related to disk output.
1278             The drawback is that the way you override the default behavior
1279             will change.
1280              
1281             For now, all the normal public-interface methods are still provided,
1282             but many are only stubs which create or delegate to the underlying
1283             MIME::Parser::Filer object.
1284              
1285             =over 4
1286              
1287             =cut
1288              
1289             #------------------------------
1290              
1291             =item filer [FILER]
1292              
1293             I
1294             Get/set the FILER object used to manage the output of files to disk.
1295             This will be some subclass of L.
1296              
1297             =cut
1298              
1299             sub filer {
1300 323     323 1 18541 my ($self, $filer) = @_;
1301 323 100       855 if (@_ > 1) {
1302 78         545 $self->{MP5_Filer} = $filer;
1303 78         614 $filer->results($self->results); ### but we still need in init_parse
1304             }
1305 323         1400 $self->{MP5_Filer};
1306             }
1307              
1308             #------------------------------
1309              
1310             =item output_dir DIRECTORY
1311              
1312             I
1313             Causes messages to be filed directly into the given DIRECTORY.
1314             It does this by setting the underlying L to
1315             a new instance of MIME::Parser::FileInto, and passing the arguments
1316             into that class' new() method.
1317              
1318             B Since this method replaces the underlying
1319             filer, you must invoke it I doing changing any attributes
1320             of the filer, like the output prefix; otherwise those changes
1321             will be lost.
1322              
1323             =cut
1324              
1325             sub output_dir {
1326 81     81 1 303 my ($self, @init) = @_;
1327 81 100       264 if (@_ > 1) {
1328 75         650 $self->filer(MIME::Parser::FileInto->new(@init));
1329             }
1330             else {
1331 6         15 &MIME::Tools::whine("0-arg form of output_dir is deprecated.");
1332 6         12 return $self->filer->output_dir;
1333             }
1334             }
1335              
1336             #------------------------------
1337              
1338             =item output_under BASEDIR, OPTS...
1339              
1340             I
1341             Causes messages to be filed directly into subdirectories of the given
1342             BASEDIR, one subdirectory per message. It does this by setting the
1343             underlying L to a new instance of MIME::Parser::FileUnder,
1344             and passing the arguments into that class' new() method.
1345              
1346             B Since this method replaces the underlying
1347             filer, you must invoke it I doing changing any attributes
1348             of the filer, like the output prefix; otherwise those changes
1349             will be lost.
1350              
1351             =cut
1352              
1353             sub output_under {
1354 3     3 1 17 my ($self, @init) = @_;
1355 3 50       10 if (@_ > 1) {
1356 3         40 $self->filer(MIME::Parser::FileUnder->new(@init));
1357             }
1358             else {
1359 0         0 &MIME::Tools::whine("0-arg form of output_under is deprecated.");
1360 0         0 return $self->filer->output_dir;
1361             }
1362             }
1363              
1364             #------------------------------
1365              
1366             =item output_path HEAD
1367              
1368             I
1369             Given a MIME head for a file to be extracted, come up with a good
1370             output pathname for the extracted file.
1371             Identical to the preferred form:
1372              
1373             $parser->filer->output_path(...args...);
1374              
1375             We just delegate this to the underlying L object.
1376              
1377             =cut
1378              
1379             sub output_path {
1380 90     90 1 166 my $self = shift;
1381             ### We use it, so don't warn!
1382             ### &MIME::Tools::whine("output_path deprecated in MIME::Parser");
1383 90         255 $self->filer->output_path(@_);
1384             }
1385              
1386             #------------------------------
1387              
1388             =item output_prefix [PREFIX]
1389              
1390             I
1391             Get/set the short string that all filenames for extracted body-parts
1392             will begin with (assuming that there is no better "recommended filename").
1393             Identical to the preferred form:
1394              
1395             $parser->filer->output_prefix(...args...);
1396              
1397             We just delegate this to the underlying L object.
1398              
1399             =cut
1400              
1401             sub output_prefix {
1402 0     0 1 0 my $self = shift;
1403 0         0 &MIME::Tools::whine("output_prefix deprecated in MIME::Parser");
1404 0         0 $self->filer->output_prefix(@_);
1405             }
1406              
1407             #------------------------------
1408              
1409             =item evil_filename NAME
1410              
1411             I
1412             Identical to the preferred form:
1413              
1414             $parser->filer->evil_filename(...args...);
1415              
1416             We just delegate this to the underlying L object.
1417              
1418             =cut
1419              
1420             sub evil_filename {
1421 2     2 1 17 my $self = shift;
1422 2         7 &MIME::Tools::whine("evil_filename deprecated in MIME::Parser");
1423 2         5 $self->filer->evil_filename(@_);
1424             }
1425              
1426             #------------------------------
1427              
1428             =item max_parts NUM
1429              
1430             I
1431             Limits the number of MIME parts we will parse.
1432              
1433             Normally, instances of this class parse a message to the bitter end.
1434             Messages with many MIME parts can cause excessive memory consumption.
1435             If you invoke this method, parsing will abort with a die() if a message
1436             contains more than NUM parts.
1437              
1438             If NUM is set to -1 (the default), then no maximum limit is enforced.
1439              
1440             With no argument, returns the current setting as an integer
1441              
1442             =cut
1443              
1444             sub max_parts {
1445 0     0 1 0 my($self, $num) = @_;
1446 0 0       0 if (@_ > 1) {
1447 0         0 $self->{MP5_MaxParts} = $num;
1448             }
1449 0         0 return $self->{MP5_MaxParts};
1450             }
1451              
1452             #------------------------------
1453              
1454             =item output_to_core YESNO
1455              
1456             I
1457             Normally, instances of this class output all their decoded body
1458             data to disk files (via MIME::Body::File). However, you can change
1459             this behaviour by invoking this method before parsing:
1460              
1461             If YESNO is false (the default), then all body data goes
1462             to disk files.
1463              
1464             If YESNO is true, then all body data goes to in-core data structures
1465             This is a little risky (what if someone emails you an MPEG or a tar
1466             file, hmmm?) but people seem to want this bit of noose-shaped rope,
1467             so I'm providing it.
1468             Note that setting this attribute true I mean that parser-internal
1469             temporary files are avoided! Use L for that.
1470              
1471             With no argument, returns the current setting as a boolean.
1472              
1473             =cut
1474              
1475             sub output_to_core {
1476 187     187 1 5081 my ($self, $yesno) = @_;
1477 187 100       575 if (@_ > 1) {
1478 44 100 100     253 $yesno = 0 if ($yesno and $yesno eq 'NONE');
1479 44         146 $self->{MP5_FilerToCore} = $yesno;
1480             }
1481 187         591 $self->{MP5_FilerToCore};
1482             }
1483              
1484              
1485             =item tmp_recycling
1486              
1487             I
1488              
1489             This method is a no-op to preserve the pre-5.421 API.
1490              
1491             The tmp_recycling() feature was removed in 5.421 because it had never actually
1492             worked. Please update your code to stop using it.
1493              
1494             =cut
1495              
1496             sub tmp_recycling
1497             {
1498 1     1 1 255 return;
1499             }
1500              
1501              
1502              
1503             #------------------------------
1504              
1505             =item tmp_to_core [YESNO]
1506              
1507             I
1508             Should L create real temp files, or
1509             use fake in-core ones? Normally we allow the creation of temporary
1510             disk files, since this allows us to handle huge attachments even when
1511             core is limited.
1512              
1513             If YESNO is true, we implement new_tmpfile() via in-core handles.
1514             If YESNO is false (the default), we use real tmpfiles.
1515             With no argument, just returns the current setting.
1516              
1517             =cut
1518              
1519             sub tmp_to_core {
1520 0     0 1 0 my ($self, $yesno) = @_;
1521 0 0       0 $self->{MP5_TmpToCore} = $yesno if (@_ > 1);
1522 0         0 $self->{MP5_TmpToCore};
1523             }
1524              
1525             #------------------------------
1526              
1527             =item use_inner_files [YESNO]
1528              
1529             I.
1530              
1531             I
1532              
1533             MIME::Parser no longer supports IO::InnerFile, but this method is retained for
1534             backwards compatibility. It does nothing.
1535              
1536             The original reasoning for IO::InnerFile was that inner files were faster than
1537             "in-core" temp files. At the time, the "in-core" tempfile support was
1538             implemented with IO::Scalar from the IO-Stringy distribution, which used the
1539             tie() interface to wrap a scalar with the appropriate IO::Handle operations.
1540             The penalty for this was fairly hefty, and IO::InnerFile actually was faster.
1541              
1542             Nowadays, MIME::Parser uses Perl's built in ability to open a filehandle on an
1543             in-memory scalar variable via PerlIO. Benchmarking shows that IO::InnerFile is
1544             slightly slower than using in-memory temporary files, and is slightly faster
1545             than on-disk temporary files. Both measurements are within a few percent of
1546             each other. Since there's no real benefit, and since the IO::InnerFile abuse
1547             was fairly hairy and evil ("writes" to it were faked by extending the size of
1548             the inner file with the assumption that the only data you'd ever ->print() to
1549             it would be the line from the "outer" file, for example) it's been removed.
1550              
1551             =cut
1552              
1553             sub use_inner_files {
1554 0     0 1 0 return 0;
1555             }
1556              
1557             =back
1558              
1559             =cut
1560              
1561              
1562             #------------------------------------------------------------
1563              
1564             =head2 Specifying classes to be instantiated
1565              
1566             =over 4
1567              
1568             =cut
1569              
1570             #------------------------------
1571              
1572             =item interface ROLE,[VALUE]
1573              
1574             I
1575             During parsing, the parser normally creates instances of certain classes,
1576             like MIME::Entity. However, you may want to create a parser subclass
1577             that uses your own experimental head, entity, etc. classes (for example,
1578             your "head" class may provide some additional MIME-field-oriented methods).
1579              
1580             If so, then this is the method that your subclass should invoke during
1581             init. Use it like this:
1582              
1583             package MyParser;
1584             @ISA = qw(MIME::Parser);
1585             ...
1586             sub init {
1587             my $self = shift;
1588             $self->SUPER::init(@_); ### do my parent's init
1589             $self->interface(ENTITY_CLASS => 'MIME::MyEntity');
1590             $self->interface(HEAD_CLASS => 'MIME::MyHead');
1591             $self; ### return
1592             }
1593              
1594             With no VALUE, returns the VALUE currently associated with that ROLE.
1595              
1596             =cut
1597              
1598             sub interface {
1599 500     500 1 3358 my ($self, $role, $value) = @_;
1600 500 100       1318 $self->{MP5_Interface}{$role} = $value if (defined($value));
1601 500         2769 $self->{MP5_Interface}{$role};
1602             }
1603              
1604             #------------------------------
1605              
1606             =item new_body_for HEAD
1607              
1608             I
1609             Based on the HEAD of a part we are parsing, return a new
1610             body object (any desirable subclass of MIME::Body) for
1611             receiving that part's data.
1612              
1613             If you set the C option to false before parsing
1614             (the default), then we call C and create a
1615             new MIME::Body::File on that filename.
1616              
1617             If you set the C option to true before parsing,
1618             then you get a MIME::Body::InCore instead.
1619              
1620             If you want the parser to do something else entirely, you can
1621             override this method in a subclass.
1622              
1623             =cut
1624              
1625             sub new_body_for {
1626 143     143 1 359 my ($self, $head) = @_;
1627              
1628 143 100       536 if ($self->output_to_core) {
1629 47         187 $self->debug("outputting body to core");
1630 47         766 return (new MIME::Body::InCore);
1631             }
1632             else {
1633 96         280 my $outpath = $self->output_path($head);
1634 96         507 $self->debug("outputting body to disk file: $outpath");
1635 96         298 $self->filer->purgeable($outpath); ### we plan to use it
1636 96         911 return (new MIME::Body::File $outpath);
1637             }
1638             }
1639              
1640             #------------------------------
1641              
1642             =pod
1643              
1644             =back
1645              
1646             =head2 Temporary File Creation
1647              
1648             =over
1649              
1650             =item tmp_dir DIRECTORY
1651              
1652             I
1653             Causes any temporary files created by this parser to be created in the
1654             given DIRECTORY.
1655              
1656             If called without arguments, returns current value.
1657              
1658             The default value is undef, which will cause new_tmpfile() to use the
1659             system default temporary directory.
1660              
1661             =cut
1662              
1663             sub tmp_dir
1664             {
1665 120     120 1 300 my ($self, $dirname) = @_;
1666 120 50       344 if ( $dirname ) {
1667 0         0 $self->{MP5_TmpDir} = $dirname;
1668             }
1669              
1670 120         368 return $self->{MP5_TmpDir};
1671             }
1672              
1673             =item new_tmpfile
1674              
1675             I
1676             Return an IO handle to be used to hold temporary data during a parse.
1677              
1678             The default uses MIME::Tools::tmpopen() to create a new temporary file,
1679             unless L dictates otherwise, but you can
1680             override this. You shouldn't need to.
1681              
1682             The location for temporary files can be changed on a per-parser basis
1683             with L.
1684              
1685             If you do override this, make certain that the object you return is
1686             set for binmode(), and is able to handle the following methods:
1687              
1688             read(BUF, NBYTES)
1689             getline()
1690             getlines()
1691             print(@ARGS)
1692             flush()
1693             seek(0, 0)
1694              
1695             Fatal exception if the stream could not be established.
1696              
1697             =cut
1698              
1699             sub new_tmpfile {
1700 120     120 1 4276 my ($self) = @_;
1701              
1702 120         197 my $io;
1703 120 100       423 if ($self->{MP5_TmpToCore}) {
1704 1         2 my $var;
1705 1 50       7 $io = IO::File->new(\$var, '+>:') or die "$ME: Can't open in-core tmpfile: $!";
1706             } else {
1707 119         252 my $args = {};
1708 119 100       367 if( $self->tmp_dir ) {
1709 1         5 $args->{DIR} = $self->tmp_dir;
1710             }
1711 119 50       506 $io = tmpopen( $args ) or die "$ME: can't open tmpfile: $!\n";
1712 119 50       105162 binmode($io) or die "$ME: can't set to binmode: $!";
1713             }
1714 120         399 return $io;
1715             }
1716              
1717             =back
1718              
1719             =cut
1720              
1721              
1722              
1723              
1724              
1725              
1726             #------------------------------------------------------------
1727              
1728             =head2 Parse results and error recovery
1729              
1730             =over 4
1731              
1732             =cut
1733              
1734             #------------------------------
1735              
1736             =item last_error
1737              
1738             I
1739             Return the error (if any) that we ignored in the last parse.
1740              
1741             =cut
1742              
1743             sub last_error {
1744 0     0 1 0 join '', shift->results->errors;
1745             }
1746              
1747             =item ambiguous_content
1748              
1749             I
1750             Returns true if the most recently parsed message has one or more
1751             entities with ambiguous content. See the documentation of
1752             C's C method for details.
1753              
1754             Note that while these two calls to ambuguous_content return the same
1755             thing:
1756              
1757             $entity = $parser->parse($whatever_stream);
1758             $parser->ambuguous_content();
1759             $entity->ambuguous_content();
1760              
1761             the former is faster because it simply returns the results that were
1762             detected during the parse, while the latter actually executes the code
1763             that checks for ambiguous content again.
1764              
1765             Messages with ambiguous content should be treated as a security risk.
1766             In particular, if MIME::Parser is used in an email security tool,
1767             ambiguous messages should not be delivered to end-users.
1768              
1769             =cut
1770             sub ambiguous_content {
1771 10     10 1 23173 my ($self) = @_;
1772 10         98 return $self->{MP5_AmbiguousContent};
1773             }
1774              
1775             #------------------------------
1776              
1777             =item last_head
1778              
1779             I
1780             Return the top-level MIME header of the last stream we attempted to parse.
1781             This is useful for replying to people who sent us bad MIME messages.
1782              
1783             ### Parse an input stream:
1784             eval { $entity = $parser->parse(\*STDIN) };
1785             if (!$entity) { ### parse failed!
1786             my $decapitated = $parser->last_head;
1787             ...
1788             }
1789              
1790             =cut
1791              
1792             sub last_head {
1793 0     0 1 0 shift->results->top_head;
1794             }
1795              
1796             #------------------------------
1797              
1798             =item results
1799              
1800             I
1801             Return an object containing lots of info from the last entity parsed.
1802             This will be an instance of class
1803             L.
1804              
1805             =cut
1806              
1807             sub results {
1808 775     775 1 3958 shift->{MP5_Results};
1809             }
1810              
1811              
1812             =back
1813              
1814             =cut
1815              
1816              
1817             #------------------------------
1818             1;
1819             __END__