| line | stmt | bran | cond | sub | pod | time | code | 
| 1 | 1 |  |  | 1 |  | 7 | use strict; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 31 |  | 
| 2 | 1 |  |  | 1 |  | 5 | use warnings; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 44 |  | 
| 3 |  |  |  |  |  |  | package Email::Folder::Mbox; | 
| 4 |  |  |  |  |  |  | { | 
| 5 |  |  |  |  |  |  | $Email::Folder::Mbox::VERSION = '0.858'; | 
| 6 |  |  |  |  |  |  | } | 
| 7 |  |  |  |  |  |  | # ABSTRACT: reads raw RFC822 mails from an mbox file | 
| 8 | 1 |  |  | 1 |  | 6 | use Carp; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 100 |  | 
| 9 | 1 |  |  | 1 |  | 873 | use IO::File; | 
|  | 1 |  |  |  |  | 12229 |  | 
|  | 1 |  |  |  |  | 189 |  | 
| 10 | 1 |  |  | 1 |  | 668 | use Email::Folder::Reader; | 
|  | 1 |  |  |  |  | 4 |  | 
|  | 1 |  |  |  |  | 33 |  | 
| 11 | 1 |  |  | 1 |  | 818 | use parent 'Email::Folder::Reader'; | 
|  | 1 |  |  |  |  | 303 |  | 
|  | 1 |  |  |  |  | 4 |  | 
| 12 |  |  |  |  |  |  |  | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | sub defaults { | 
| 15 | 9 |  |  | 9 | 0 | 85 | ( eol => "\n") | 
| 16 |  |  |  |  |  |  | } | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  | sub _open_it { | 
| 19 | 9 |  |  | 9 |  | 15 | my $self = shift; | 
| 20 | 9 |  |  |  |  | 117 | my $file = $self->{_file}; | 
| 21 |  |  |  |  |  |  |  | 
| 22 |  |  |  |  |  |  | # sanity checking | 
| 23 | 9 | 50 |  |  |  | 187 | croak "$file does not exist" unless (-e $file); | 
| 24 | 9 | 50 |  |  |  | 123 | croak "$file is not a file"  unless (-f $file); | 
| 25 |  |  |  |  |  |  |  | 
| 26 | 9 |  |  |  |  | 50 | local $/ = $self->{eol}; | 
| 27 | 9 |  |  |  |  | 290 | my $fh = $self->_get_fh($file); | 
| 28 |  |  |  |  |  |  |  | 
| 29 | 9 | 100 |  |  |  | 31 | if ($self->{seek_to}) { | 
| 30 |  |  |  |  |  |  | # we were told to seek.  hope it all goes well | 
| 31 | 1 |  |  |  |  | 7 | seek $fh, $self->{seek_to}, 0; | 
| 32 |  |  |  |  |  |  | } | 
| 33 |  |  |  |  |  |  | else { | 
| 34 | 8 |  |  |  |  | 201 | my $firstline = <$fh>; | 
| 35 | 8 | 100 |  |  |  | 22 | if ($firstline) { | 
| 36 | 7 | 50 |  |  |  | 57 | croak "$file is not an mbox file" unless $firstline =~ /^From /; | 
| 37 |  |  |  |  |  |  | } | 
| 38 |  |  |  |  |  |  | } | 
| 39 |  |  |  |  |  |  |  | 
| 40 | 9 |  |  |  |  | 64 | $self->{_fh} = $fh; | 
| 41 |  |  |  |  |  |  | } | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  | sub _get_fh { | 
| 44 | 9 |  |  | 9 |  | 15 | my $self = shift; | 
| 45 | 9 |  |  |  |  | 11 | my $file = shift; | 
| 46 | 9 | 50 |  |  |  | 73 | my $fh = IO::File->new($file) or croak "Cannot open $file"; | 
| 47 | 9 |  |  |  |  | 885 | binmode($fh); | 
| 48 | 9 |  |  |  |  | 18 | return $fh; | 
| 49 |  |  |  |  |  |  | } | 
| 50 |  |  |  |  |  |  |  | 
| 51 | 1 |  |  | 1 |  | 304 | use constant debug => 0; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 834 |  | 
| 52 |  |  |  |  |  |  | my $count; | 
| 53 |  |  |  |  |  |  |  | 
| 54 |  |  |  |  |  |  | sub next_message { | 
| 55 | 47 |  |  | 47 | 1 | 68 | my $self = shift; | 
| 56 |  |  |  |  |  |  |  | 
| 57 | 47 |  | 66 |  |  | 184 | my $fh = $self->{_fh} || $self->_open_it; | 
| 58 | 47 |  |  |  |  | 147 | local $/ = $self->{eol}; | 
| 59 |  |  |  |  |  |  |  | 
| 60 | 47 |  |  |  |  | 59 | my $mail = ''; | 
| 61 | 47 |  |  |  |  | 61 | my $prev = ''; | 
| 62 | 47 |  |  |  |  | 58 | my $inheaders = 1; | 
| 63 | 47 |  |  |  |  | 55 | ++$count; | 
| 64 | 47 |  |  |  |  | 43 | print "$count starting scanning at line $.\n" if debug; | 
| 65 |  |  |  |  |  |  |  | 
| 66 | 47 |  |  |  |  | 167 | while (my $line = <$fh>) { | 
| 67 | 2310 | 100 | 100 |  |  | 5477 | if ($line eq $/ && $inheaders) { # end of headers | 
| 68 | 40 |  |  |  |  | 45 | print "$count end of headers at line $.\n" if debug; | 
| 69 | 40 |  |  |  |  | 45 | $inheaders = 0; # stop looking for the end of headers | 
| 70 | 40 |  |  |  |  | 58 | my $pos = tell $fh; # where to go back to if it goes wrong | 
| 71 |  |  |  |  |  |  |  | 
| 72 |  |  |  |  |  |  | # look for a content length header, and try to use that | 
| 73 | 40 | 100 |  |  |  | 399 | if ($mail =~ m/^Content-Length: (\d+)$/mi) { | 
| 74 | 18 |  |  |  |  | 53 | $mail .= $prev; | 
| 75 | 18 |  |  |  |  | 24 | $prev = ''; | 
| 76 | 18 |  |  |  |  | 34 | my $length = $1; | 
| 77 | 18 |  |  |  |  | 22 | print " Content-Length: $length\n" if debug; | 
| 78 | 18 |  |  |  |  | 22 | my $read = ''; | 
| 79 | 18 |  |  |  |  | 54 | while (my $bodyline = <$fh>) { | 
| 80 | 506 | 100 |  |  |  | 924 | last if length $read >= $length; | 
| 81 | 488 |  |  |  |  | 1200 | $read .= $bodyline; | 
| 82 |  |  |  |  |  |  | } | 
| 83 |  |  |  |  |  |  | # grab the next line (should be /^From / or undef) | 
| 84 | 18 |  |  |  |  | 82 | my $next = <$fh>; | 
| 85 | 18 | 100 | 100 |  |  | 239 | return "$mail$/$read" | 
| 86 |  |  |  |  |  |  | if !defined $next || $next =~ /^From /; | 
| 87 |  |  |  |  |  |  | # seek back and scan line-by-line like the header | 
| 88 |  |  |  |  |  |  | # wasn't here | 
| 89 | 1 |  |  |  |  | 2 | print " Content-Length assertion failed '$next'\n" if debug; | 
| 90 | 1 |  |  |  |  | 10 | seek $fh, $pos, 0; | 
| 91 |  |  |  |  |  |  | } | 
| 92 |  |  |  |  |  |  |  | 
| 93 |  |  |  |  |  |  | # much the same, but with Lines: | 
| 94 | 23 | 100 |  |  |  | 171 | if ($mail =~ m/^Lines: (\d+)$/mi) { | 
| 95 | 2 |  |  |  |  | 7 | $mail .= $prev; | 
| 96 | 2 |  |  |  |  | 5 | $prev = ''; | 
| 97 | 2 |  |  |  |  | 4 | my $lines = $1; | 
| 98 | 2 |  |  |  |  | 4 | print " Lines: $lines\n" if debug; | 
| 99 | 2 |  |  |  |  | 4 | my $read = ''; | 
| 100 | 2 |  |  |  |  | 11 | for (1 .. $lines) { $read .= <$fh> } | 
|  | 37 |  |  |  |  | 67 |  | 
| 101 | 2 |  |  |  |  | 10 | <$fh>; # trailing newline | 
| 102 | 2 |  |  |  |  | 8 | my $next = <$fh>; | 
| 103 | 2 | 100 | 66 |  |  | 22 | return "$mail$/$read" | 
| 104 |  |  |  |  |  |  | if !defined $next || $next =~ /^From /; | 
| 105 |  |  |  |  |  |  | # seek back and scan line-by-line like the header | 
| 106 |  |  |  |  |  |  | # wasn't here | 
| 107 | 1 |  |  |  |  | 2 | print " Lines assertion failed '$next'\n" if debug; | 
| 108 | 1 |  |  |  |  | 8 | seek $fh, $pos, 0; | 
| 109 |  |  |  |  |  |  | } | 
| 110 |  |  |  |  |  |  | } | 
| 111 |  |  |  |  |  |  |  | 
| 112 | 2292 | 100 | 100 |  |  | 5450 | last if $prev eq $/ && ($line =~ $self->_from_line_re); | 
| 113 |  |  |  |  |  |  |  | 
| 114 | 2272 |  |  |  |  | 2552 | $mail .= $prev; | 
| 115 | 2272 |  |  |  |  | 14848 | $prev = $line; | 
| 116 |  |  |  |  |  |  | } | 
| 117 | 29 |  |  |  |  | 32 | print "$count end of message line $.\n" if debug; | 
| 118 | 29 | 100 |  |  |  | 86 | return unless $mail; | 
| 119 | 22 |  |  |  |  | 192 | return $mail; | 
| 120 |  |  |  |  |  |  | } | 
| 121 |  |  |  |  |  |  |  | 
| 122 |  |  |  |  |  |  | my @FROM_RE; | 
| 123 |  |  |  |  |  |  | BEGIN { | 
| 124 | 1 |  |  | 1 |  | 148 | @FROM_RE = ( | 
| 125 |  |  |  |  |  |  | # according to mutt: | 
| 126 |  |  |  |  |  |  | #   A valid message separator looks like: | 
| 127 |  |  |  |  |  |  | #   From [  ] | 
| 128 |  |  |  |  |  |  | qr/^From (?:\S+\s+)?(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun)/, | 
| 129 |  |  |  |  |  |  |  | 
| 130 |  |  |  |  |  |  | # though, as jwz rants, only this is reliable and portable | 
| 131 |  |  |  |  |  |  | qr/^From /, | 
| 132 |  |  |  |  |  |  | ); | 
| 133 |  |  |  |  |  |  | } | 
| 134 |  |  |  |  |  |  |  | 
| 135 |  |  |  |  |  |  | sub _from_line_re { | 
| 136 | 236 | 50 |  | 236 |  | 7555 | return $FROM_RE[ $_[0]->{jwz_From_} ? 1 : 0 ]; | 
| 137 |  |  |  |  |  |  | } | 
| 138 |  |  |  |  |  |  |  | 
| 139 |  |  |  |  |  |  | sub tell { | 
| 140 | 1 |  |  | 1 | 1 | 3 | my $self = shift; | 
| 141 | 1 |  |  |  |  | 5 | return tell $self->{_fh}; | 
| 142 |  |  |  |  |  |  | } | 
| 143 |  |  |  |  |  |  |  | 
| 144 |  |  |  |  |  |  | 1; | 
| 145 |  |  |  |  |  |  |  | 
| 146 |  |  |  |  |  |  | __END__ |