| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Email::MIME::RFC2047::Decoder; | 
| 2 |  |  |  |  |  |  | $Email::MIME::RFC2047::Decoder::VERSION = '0.96'; | 
| 3 | 6 |  |  | 6 |  | 98835 | use strict; | 
|  | 6 |  |  |  |  | 19 |  | 
|  | 6 |  |  |  |  | 325 |  | 
| 4 | 6 |  |  | 6 |  | 39 | use warnings; | 
|  | 6 |  |  |  |  | 16 |  | 
|  | 6 |  |  |  |  | 248 |  | 
| 5 |  |  |  |  |  |  |  | 
| 6 |  |  |  |  |  |  | # ABSTRACT: Decoding of non-ASCII MIME headers | 
| 7 |  |  |  |  |  |  |  | 
| 8 | 6 |  |  | 6 |  | 4091 | use Encode (); | 
|  | 6 |  |  |  |  | 71789 |  | 
|  | 6 |  |  |  |  | 196 |  | 
| 9 | 6 |  |  | 6 |  | 3645 | use MIME::Base64 (); | 
|  | 6 |  |  |  |  | 4050 |  | 
|  | 6 |  |  |  |  | 5996 |  | 
| 10 |  |  |  |  |  |  |  | 
| 11 |  |  |  |  |  |  | # Don't include period "." to correctly handle obs-phrase. | 
| 12 |  |  |  |  |  |  | my $rfc_specials = '()<>\[\]:;\@\\,"'; | 
| 13 |  |  |  |  |  |  | my $rfc_specials_no_quote = '()<>\[\]:;\@\\,'; | 
| 14 |  |  |  |  |  |  |  | 
| 15 |  |  |  |  |  |  | # Regex for encoded words. | 
| 16 |  |  |  |  |  |  | # This also checks the validity of base64 encoded data because MIME::Base64 | 
| 17 |  |  |  |  |  |  | # silently ignores invalid characters. | 
| 18 |  |  |  |  |  |  | # Captures ($encoding, $content_b, $content_q) | 
| 19 |  |  |  |  |  |  | my $encoded_word_text_re = qr/ | 
| 20 |  |  |  |  |  |  | (?: ^ | (?<= [\r\n\t ] ) ) | 
| 21 |  |  |  |  |  |  | = \? ( [A-Za-z0-9_-]++ ) \? | 
| 22 |  |  |  |  |  |  | (?: | 
| 23 |  |  |  |  |  |  | [Bb] \? | 
| 24 |  |  |  |  |  |  | ( | 
| 25 |  |  |  |  |  |  | (?: | 
| 26 |  |  |  |  |  |  | [A-Za-z0-9+\/]{2} | 
| 27 |  |  |  |  |  |  | (?: == | [A-Za-z0-9+\/] [A-Za-z0-9+\/=] ) | 
| 28 |  |  |  |  |  |  | )++ | 
| 29 |  |  |  |  |  |  | ) | | 
| 30 |  |  |  |  |  |  | [Qq] \? | 
| 31 |  |  |  |  |  |  | ( [\x21-\x3E\x40-\x7E]++ ) | 
| 32 |  |  |  |  |  |  | ) | 
| 33 |  |  |  |  |  |  | \? = | 
| 34 |  |  |  |  |  |  | (?= \z | [\r\n\t ] ) | 
| 35 |  |  |  |  |  |  | /x; | 
| 36 |  |  |  |  |  |  |  | 
| 37 |  |  |  |  |  |  | # Same as $encoded_word_text_re but excluding RFC 2822 special chars | 
| 38 |  |  |  |  |  |  | # Also matches after and before special chars (why?). | 
| 39 |  |  |  |  |  |  | my $encoded_word_phrase_re = qr/ | 
| 40 |  |  |  |  |  |  | (?: ^ | (?<= [\r\n\t $rfc_specials_no_quote] ) ) | 
| 41 |  |  |  |  |  |  | = \? ( [A-Za-z0-9_-]++ ) \? | 
| 42 |  |  |  |  |  |  | (?: | 
| 43 |  |  |  |  |  |  | [Bb] \? | 
| 44 |  |  |  |  |  |  | ( | 
| 45 |  |  |  |  |  |  | (?: | 
| 46 |  |  |  |  |  |  | [A-Za-z0-9+\/]{2} | 
| 47 |  |  |  |  |  |  | (?: == | [A-Za-z0-9+\/] [A-Za-z0-9+\/=] ) | 
| 48 |  |  |  |  |  |  | )++ | 
| 49 |  |  |  |  |  |  | ) | | 
| 50 |  |  |  |  |  |  | [Qq] \? | 
| 51 |  |  |  |  |  |  | ( [A-Za-z0-9!*+\/=_-]++ ) | 
| 52 |  |  |  |  |  |  | ) | 
| 53 |  |  |  |  |  |  | \? = | 
| 54 |  |  |  |  |  |  | (?= \z | [\r\n\t $rfc_specials_no_quote] ) | 
| 55 |  |  |  |  |  |  | /x; | 
| 56 |  |  |  |  |  |  |  | 
| 57 |  |  |  |  |  |  | # Same as $encoded_word_text_re but excluding parens and backslash. | 
| 58 |  |  |  |  |  |  | # Also matches after and before parens. | 
| 59 |  |  |  |  |  |  | #my $encoded_word_comment_re = qr/ | 
| 60 |  |  |  |  |  |  | #    (?<= [\r\n\t ()] ) | 
| 61 |  |  |  |  |  |  | #    = \? ( [A-Za-z0-9_-]++ ) \? | 
| 62 |  |  |  |  |  |  | #    (?: | 
| 63 |  |  |  |  |  |  | #        [Bb] \? | 
| 64 |  |  |  |  |  |  | #        ( | 
| 65 |  |  |  |  |  |  | #            (?: | 
| 66 |  |  |  |  |  |  | #                [A-Za-z0-9+\/]{2} | 
| 67 |  |  |  |  |  |  | #                (?: == | [A-Za-z0-9+\/] [A-Za-z0-9+\/=] ) | 
| 68 |  |  |  |  |  |  | #            )++ | 
| 69 |  |  |  |  |  |  | #        ) | | 
| 70 |  |  |  |  |  |  | #        [Qq] \? | 
| 71 |  |  |  |  |  |  | #        ( [\x21-\x27\x2A-\x3E\x40-\x5B\x5D-\x7E]++ ) | 
| 72 |  |  |  |  |  |  | #    ) | 
| 73 |  |  |  |  |  |  | #    \? = | 
| 74 |  |  |  |  |  |  | #    (?= [\r\n\t ()] ) | 
| 75 |  |  |  |  |  |  | #/x; | 
| 76 |  |  |  |  |  |  |  | 
| 77 |  |  |  |  |  |  | my $quoted_string_re = qr/ | 
| 78 |  |  |  |  |  |  | " | 
| 79 |  |  |  |  |  |  | ( | 
| 80 |  |  |  |  |  |  | (?: | 
| 81 |  |  |  |  |  |  | [^"\\]++ | | 
| 82 |  |  |  |  |  |  | \\ . | 
| 83 |  |  |  |  |  |  | )*+ | 
| 84 |  |  |  |  |  |  | ) | 
| 85 |  |  |  |  |  |  | " | 
| 86 |  |  |  |  |  |  | /sx; | 
| 87 |  |  |  |  |  |  |  | 
| 88 |  |  |  |  |  |  | my $comment_re = qr/ | 
| 89 |  |  |  |  |  |  | ( | 
| 90 |  |  |  |  |  |  | \( | 
| 91 |  |  |  |  |  |  | (?: | 
| 92 |  |  |  |  |  |  | [^()\\]++ | | 
| 93 |  |  |  |  |  |  | \\ . | | 
| 94 |  |  |  |  |  |  | (?-1) | 
| 95 |  |  |  |  |  |  | )*+ | 
| 96 |  |  |  |  |  |  | \) | 
| 97 |  |  |  |  |  |  | ) | 
| 98 |  |  |  |  |  |  | /sx; | 
| 99 |  |  |  |  |  |  |  | 
| 100 |  |  |  |  |  |  | sub new { | 
| 101 | 39 |  |  | 39 | 1 | 1049 | my $package = shift; | 
| 102 |  |  |  |  |  |  |  | 
| 103 | 39 |  |  |  |  | 123 | my $self = {}; | 
| 104 |  |  |  |  |  |  |  | 
| 105 | 39 |  |  |  |  | 262 | return bless($self, $package); | 
| 106 |  |  |  |  |  |  | } | 
| 107 |  |  |  |  |  |  |  | 
| 108 |  |  |  |  |  |  | sub decode_text { | 
| 109 | 13 |  |  | 13 | 1 | 11101 | my $self = shift; | 
| 110 |  |  |  |  |  |  |  | 
| 111 | 13 |  |  |  |  | 51 | return $self->_decode('text', @_); | 
| 112 |  |  |  |  |  |  | } | 
| 113 |  |  |  |  |  |  |  | 
| 114 |  |  |  |  |  |  | sub decode_phrase { | 
| 115 | 101 |  |  | 101 | 1 | 57890 | my $self = shift; | 
| 116 |  |  |  |  |  |  |  | 
| 117 | 101 |  |  |  |  | 417 | return $self->_decode('phrase', @_); | 
| 118 |  |  |  |  |  |  | } | 
| 119 |  |  |  |  |  |  |  | 
| 120 |  |  |  |  |  |  | sub _decode { | 
| 121 | 114 |  |  | 114 |  | 377 | my ($self, $mode, $encoded) = @_; | 
| 122 | 114 | 100 |  |  |  | 400 | my $encoded_ref = ref($encoded) ? $encoded : \$encoded; | 
| 123 |  |  |  |  |  |  |  | 
| 124 | 114 |  |  |  |  | 276 | my $result = ''; | 
| 125 | 114 |  |  |  |  | 239 | my $enc_flag; | 
| 126 |  |  |  |  |  |  | # use shortest match on any characters we don't want to decode | 
| 127 | 114 | 100 |  |  |  | 2879 | my $regex = $mode eq 'phrase' ? | 
| 128 |  |  |  |  |  |  | qr/([^$rfc_specials]*?)($encoded_word_phrase_re|$quoted_string_re|$comment_re)/ : | 
| 129 |  |  |  |  |  |  | qr/(.*?)($encoded_word_text_re)/s; | 
| 130 |  |  |  |  |  |  |  | 
| 131 | 114 |  |  |  |  | 6074 | while ($$encoded_ref =~ /\G$regex/cg) { | 
| 132 | 104 |  |  |  |  | 964 | my ($text, $match, | 
| 133 |  |  |  |  |  |  | $encoding, $b_content, $q_content, | 
| 134 |  |  |  |  |  |  | $qs_content) = | 
| 135 |  |  |  |  |  |  | ($1, $2, $3, $4, $5, $6, $7); | 
| 136 |  |  |  |  |  |  |  | 
| 137 | 104 | 100 |  |  |  | 490 | if (defined($encoding)) { | 
|  |  | 100 |  |  |  |  |  | 
| 138 |  |  |  |  |  |  | # encoded words shouldn't be longer than 75 chars but | 
| 139 |  |  |  |  |  |  | # let's allow up to 255 chars | 
| 140 | 67 | 50 |  |  |  | 264 | if (length($match) > 255) { | 
| 141 | 0 |  |  |  |  | 0 | $result .= $text; | 
| 142 | 0 |  |  |  |  | 0 | $result .= $match; | 
| 143 | 0 |  |  |  |  | 0 | $enc_flag = undef; | 
| 144 | 0 |  |  |  |  | 0 | next; | 
| 145 |  |  |  |  |  |  | } | 
| 146 |  |  |  |  |  |  |  | 
| 147 | 67 |  |  |  |  | 155 | my $content; | 
| 148 |  |  |  |  |  |  |  | 
| 149 | 67 | 100 |  |  |  | 285 | if (defined($b_content)) { | 
| 150 |  |  |  |  |  |  | # MIME B | 
| 151 | 3 |  |  |  |  | 32 | $content = MIME::Base64::decode_base64($b_content); | 
| 152 |  |  |  |  |  |  | } | 
| 153 |  |  |  |  |  |  | else { | 
| 154 |  |  |  |  |  |  | # MIME Q | 
| 155 | 64 |  |  |  |  | 172 | $content = $q_content; | 
| 156 | 64 |  |  |  |  | 219 | $content =~ tr/_/ /; | 
| 157 | 64 |  |  |  |  | 453 | $content =~ s/=([0-9A-Fa-f]{2})/chr(hex($1))/eg; | 
|  | 231 |  |  |  |  | 1255 |  | 
| 158 |  |  |  |  |  |  | } | 
| 159 |  |  |  |  |  |  |  | 
| 160 | 67 |  |  |  |  | 158 | my $chunk; | 
| 161 | 67 |  |  |  |  | 158 | eval { | 
| 162 | 67 |  |  |  |  | 360 | $chunk = Encode::decode( | 
| 163 |  |  |  |  |  |  | $encoding, | 
| 164 |  |  |  |  |  |  | $content, | 
| 165 |  |  |  |  |  |  | Encode::FB_CROAK | 
| 166 |  |  |  |  |  |  | ); | 
| 167 |  |  |  |  |  |  | }; | 
| 168 |  |  |  |  |  |  |  | 
| 169 | 67 | 100 |  |  |  | 12753 | if ($@) { | 
| 170 | 2 |  |  |  |  | 247 | warn($@); | 
| 171 |  |  |  |  |  |  | # display raw encoded word in case of errors | 
| 172 | 2 |  |  |  |  | 15 | $result .= $text; | 
| 173 | 2 |  |  |  |  | 8 | $result .= $match; | 
| 174 | 2 |  |  |  |  | 7 | $enc_flag = undef; | 
| 175 | 2 |  |  |  |  | 32 | next; | 
| 176 |  |  |  |  |  |  | } | 
| 177 |  |  |  |  |  |  |  | 
| 178 |  |  |  |  |  |  | # ignore whitespace between encoded words | 
| 179 | 65 | 100 | 100 |  |  | 370 | $result .= $text if !$enc_flag || $text =~ /\S/; | 
| 180 |  |  |  |  |  |  |  | 
| 181 | 65 |  |  |  |  | 200 | $result .= $chunk; | 
| 182 |  |  |  |  |  |  |  | 
| 183 | 65 |  |  |  |  | 797 | $enc_flag = 1; | 
| 184 |  |  |  |  |  |  | } | 
| 185 |  |  |  |  |  |  | elsif (defined($qs_content)) { | 
| 186 |  |  |  |  |  |  | # quoted string | 
| 187 |  |  |  |  |  |  |  | 
| 188 | 28 |  |  |  |  | 88 | $result .= $text; | 
| 189 |  |  |  |  |  |  |  | 
| 190 |  |  |  |  |  |  | # unquote | 
| 191 | 28 |  |  |  |  | 114 | $qs_content =~ s/\\(.)/$1/gs; | 
| 192 | 28 |  |  |  |  | 76 | $result .= $qs_content; | 
| 193 |  |  |  |  |  |  |  | 
| 194 | 28 |  |  |  |  | 326 | $enc_flag = undef; | 
| 195 |  |  |  |  |  |  | } | 
| 196 |  |  |  |  |  |  | } | 
| 197 |  |  |  |  |  |  |  | 
| 198 | 114 | 100 |  |  |  | 1165 | $regex = $mode eq 'phrase' ? | 
| 199 |  |  |  |  |  |  | qr/[^$rfc_specials]+/ : | 
| 200 |  |  |  |  |  |  | qr/.+/s; | 
| 201 | 114 | 100 |  |  |  | 1522 | $result .= $& if $$encoded_ref =~ /\G$regex/cg; | 
| 202 |  |  |  |  |  |  |  | 
| 203 |  |  |  |  |  |  | # normalize whitespace | 
| 204 | 114 |  |  |  |  | 600 | $result =~ s/^[\r\n\t ]+//; | 
| 205 | 114 |  |  |  |  | 689 | $result =~ s/[\r\n\t ]+\z//; | 
| 206 | 114 |  |  |  |  | 677 | $result =~ s/[\r\n\t ]+/ /g; | 
| 207 |  |  |  |  |  |  |  | 
| 208 |  |  |  |  |  |  | # remove potentially dangerous ASCII control chars | 
| 209 | 114 |  |  |  |  | 355 | $result =~ s/[\x00-\x1f\x7f]//g; | 
| 210 |  |  |  |  |  |  |  | 
| 211 | 114 |  |  |  |  | 690 | return $result; | 
| 212 |  |  |  |  |  |  | } | 
| 213 |  |  |  |  |  |  |  | 
| 214 |  |  |  |  |  |  | 1; | 
| 215 |  |  |  |  |  |  |  | 
| 216 |  |  |  |  |  |  | __END__ |