| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Encode::IMAPUTF7 1.07; |
|
2
|
2
|
|
|
2
|
|
274286
|
use strict; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
82
|
|
|
3
|
2
|
|
|
2
|
|
18
|
use warnings; |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
124
|
|
|
4
|
|
|
|
|
|
|
|
|
5
|
2
|
|
|
2
|
|
524
|
use parent qw(Encode::Encoding); |
|
|
2
|
|
|
|
|
422
|
|
|
|
2
|
|
|
|
|
12
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
# ABSTRACT: modification of UTF-7 encoding for IMAP |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
__PACKAGE__->Define('IMAP-UTF-7', 'imap-utf-7'); |
|
10
|
|
|
|
|
|
|
|
|
11
|
2
|
|
|
2
|
|
25877
|
use Encode (); |
|
|
2
|
|
|
|
|
6
|
|
|
|
2
|
|
|
|
|
31
|
|
|
12
|
2
|
|
|
2
|
|
1123
|
use MIME::Base64; |
|
|
2
|
|
|
|
|
3115
|
|
|
|
2
|
|
|
|
|
1835
|
|
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
#pod =head1 SYNOPSIS |
|
15
|
|
|
|
|
|
|
#pod |
|
16
|
|
|
|
|
|
|
#pod use Encode qw/encode decode/; |
|
17
|
|
|
|
|
|
|
#pod use Encode::IMAPUTF7; |
|
18
|
|
|
|
|
|
|
#pod |
|
19
|
|
|
|
|
|
|
#pod print encode('IMAP-UTF-7', 'Répertoire'); |
|
20
|
|
|
|
|
|
|
#pod print decode('IMAP-UTF-7', 'R&AOk-pertoire'); |
|
21
|
|
|
|
|
|
|
#pod |
|
22
|
|
|
|
|
|
|
#pod =head1 ABSTRACT |
|
23
|
|
|
|
|
|
|
#pod |
|
24
|
|
|
|
|
|
|
#pod IMAP mailbox names are encoded in a modified UTF-7 when names contains |
|
25
|
|
|
|
|
|
|
#pod international characters outside of the printable ASCII range. The modified |
|
26
|
|
|
|
|
|
|
#pod UTF-7 encoding is defined in RFC2060 (section 5.1.3). |
|
27
|
|
|
|
|
|
|
#pod |
|
28
|
|
|
|
|
|
|
#pod =head2 RFC2060 - section 5.1.3 - Mailbox International Naming Convention |
|
29
|
|
|
|
|
|
|
#pod |
|
30
|
|
|
|
|
|
|
#pod By convention, international mailbox names are specified using a modified |
|
31
|
|
|
|
|
|
|
#pod version of the UTF-7 encoding described in [UTF-7]. The purpose of these |
|
32
|
|
|
|
|
|
|
#pod modifications is to correct the following problems with UTF-7: |
|
33
|
|
|
|
|
|
|
#pod |
|
34
|
|
|
|
|
|
|
#pod =for :list |
|
35
|
|
|
|
|
|
|
#pod 1. UTF-7 uses the "+" character for shifting; this conflicts with the common |
|
36
|
|
|
|
|
|
|
#pod use of "+" in mailbox names, in particular USENET newsgroup names. |
|
37
|
|
|
|
|
|
|
#pod 2. UTF-7's encoding is BASE64 which uses the "/" character; this conflicts |
|
38
|
|
|
|
|
|
|
#pod with the use of "/" as a popular hierarchy delimiter. |
|
39
|
|
|
|
|
|
|
#pod 3. UTF-7 prohibits the unencoded usage of "\"; this conflicts with the use of |
|
40
|
|
|
|
|
|
|
#pod "\" as a popular hierarchy delimiter. |
|
41
|
|
|
|
|
|
|
#pod 4. UTF-7 prohibits the unencoded usage of "~"; this conflicts with the use of |
|
42
|
|
|
|
|
|
|
#pod "~" in some servers as a home directory indicator. |
|
43
|
|
|
|
|
|
|
#pod 5. UTF-7 permits multiple alternate forms to represent the same string; in |
|
44
|
|
|
|
|
|
|
#pod particular, printable US-ASCII chararacters can be represented in encoded |
|
45
|
|
|
|
|
|
|
#pod form. |
|
46
|
|
|
|
|
|
|
#pod |
|
47
|
|
|
|
|
|
|
#pod In modified UTF-7, printable US-ASCII characters except for "&" represent |
|
48
|
|
|
|
|
|
|
#pod themselves; that is, characters with octet values 0x20-0x25 and 0x27-0x7e. The |
|
49
|
|
|
|
|
|
|
#pod character "&" (0x26) is represented by the two-octet sequence "&-". |
|
50
|
|
|
|
|
|
|
#pod |
|
51
|
|
|
|
|
|
|
#pod All other characters (octet values 0x00-0x1f, 0x7f-0xff, and all Unicode 16-bit |
|
52
|
|
|
|
|
|
|
#pod octets) are represented in modified BASE64, with a further modification from |
|
53
|
|
|
|
|
|
|
#pod [UTF-7] that "," is used instead of "/". Modified BASE64 MUST NOT be used to |
|
54
|
|
|
|
|
|
|
#pod represent any printing US-ASCII character which can represent itself. |
|
55
|
|
|
|
|
|
|
#pod |
|
56
|
|
|
|
|
|
|
#pod "&" is used to shift to modified BASE64 and "-" to shift back to US- ASCII. |
|
57
|
|
|
|
|
|
|
#pod All names start in US-ASCII, and MUST end in US-ASCII (that is, a name that |
|
58
|
|
|
|
|
|
|
#pod ends with a Unicode 16-bit octet MUST end with a "- "). |
|
59
|
|
|
|
|
|
|
#pod |
|
60
|
|
|
|
|
|
|
#pod For example, here is a mailbox name which mixes English, Japanese, |
|
61
|
|
|
|
|
|
|
#pod and Chinese text: C<~peter/mail/&ZeVnLIqe-/&U,BTFw-> |
|
62
|
|
|
|
|
|
|
#pod |
|
63
|
|
|
|
|
|
|
#pod =cut |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# Algorithms taken from Unicode::String by Gisle Aas |
|
66
|
|
|
|
|
|
|
# Code directly borrowed from Encode::Unicode::UTF7 by Dan Kogai |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# Directly from the definition in RFC2060: |
|
69
|
|
|
|
|
|
|
# Ampersand (\x26) is represented as a special case |
|
70
|
|
|
|
|
|
|
my $re_asis = qr/(?:[\x20-\x25\x27-\x7e])/; # printable US-ASCII except "&" represents itself |
|
71
|
|
|
|
|
|
|
my $re_encoded = qr/(?:[^\x20-\x7e])/; # Everything else are represented by modified base64 |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
my $e_utf16 = Encode::find_encoding("UTF-16BE"); |
|
74
|
|
|
|
|
|
|
|
|
75
|
0
|
|
|
0
|
1
|
0
|
sub needs_lines { 1 }; |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
sub encode($$;$) { |
|
78
|
6
|
|
|
6
|
1
|
607558
|
my ( $obj, $str, $chk ) = @_; |
|
79
|
6
|
|
|
|
|
29
|
my $len = length($str); |
|
80
|
6
|
|
|
|
|
16
|
pos($str) = 0; |
|
81
|
6
|
|
|
|
|
15
|
my $bytes = ''; |
|
82
|
6
|
|
|
|
|
19
|
while ( pos($str) < $len ) { |
|
83
|
739
|
100
|
|
|
|
2683
|
if ( $str =~ /\G($re_asis+)/ogc ) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
84
|
367
|
|
|
|
|
1076
|
$bytes .= $1; |
|
85
|
|
|
|
|
|
|
} elsif ( $str =~ /\G&/ogc ) { |
|
86
|
5
|
|
|
|
|
9
|
$bytes .= "&-"; |
|
87
|
|
|
|
|
|
|
} elsif ( $str =~ /\G($re_encoded+)/ogsc ) { |
|
88
|
367
|
|
|
|
|
702
|
my $s = $1; |
|
89
|
367
|
|
|
|
|
1825
|
my $base64 = encode_base64( $e_utf16->encode($s), '' ); |
|
90
|
367
|
|
|
|
|
1135
|
$base64 =~ s/=+$//; |
|
91
|
367
|
|
|
|
|
803
|
$base64 =~ s/\//,/g; |
|
92
|
367
|
|
|
|
|
1068
|
$bytes .= "&$base64-"; |
|
93
|
|
|
|
|
|
|
} else { |
|
94
|
0
|
|
|
|
|
0
|
die "This should not happen! (pos=" . pos($str) . ")"; |
|
95
|
|
|
|
|
|
|
} |
|
96
|
|
|
|
|
|
|
} |
|
97
|
6
|
50
|
|
|
|
12
|
$_[1] = '' if $chk; |
|
98
|
6
|
|
|
|
|
78
|
return $bytes; |
|
99
|
|
|
|
|
|
|
} |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
sub decode($$;$) { |
|
102
|
1
|
|
|
1
|
1
|
67
|
my ( $obj, $bytes, $chk ) = @_; |
|
103
|
1
|
|
|
|
|
12
|
my $len = length($bytes); |
|
104
|
1
|
|
|
|
|
2
|
my $str = ""; |
|
105
|
1
|
|
|
|
|
5
|
pos($bytes) = 0; |
|
106
|
1
|
|
|
|
|
5
|
while ( pos($bytes) < $len ) { |
|
107
|
726
|
100
|
|
|
|
2918
|
if ( $bytes =~ /\G([^&]+)/ogc ) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
108
|
363
|
|
|
|
|
1051
|
$str .= $1; |
|
109
|
|
|
|
|
|
|
} elsif ( $bytes =~ /\G\&-/ogc ) { |
|
110
|
0
|
|
|
|
|
0
|
$str .= "&"; |
|
111
|
|
|
|
|
|
|
} elsif ( $bytes =~ /\G\&([A-Za-z0-9+,]+)-?/ogsc ) { |
|
112
|
363
|
|
|
|
|
665
|
my $base64 = $1; |
|
113
|
363
|
|
|
|
|
978
|
$base64 =~ s/,/\//g; |
|
114
|
363
|
|
|
|
|
823
|
my $pad = length($base64) % 4; |
|
115
|
363
|
100
|
|
|
|
848
|
$base64 .= "=" x ( 4 - $pad ) if $pad; |
|
116
|
363
|
|
|
|
|
2771
|
$str .= $e_utf16->decode( decode_base64($base64) ); |
|
117
|
|
|
|
|
|
|
} elsif ( $bytes =~ /\G\&/ogc ) { |
|
118
|
0
|
0
|
|
|
|
0
|
$^W and warn "Bad IMAP-UTF7 data escape"; |
|
119
|
0
|
|
|
|
|
0
|
$str .= "&"; |
|
120
|
|
|
|
|
|
|
} else { |
|
121
|
0
|
|
|
|
|
0
|
die "This should not happen " . pos($bytes); |
|
122
|
|
|
|
|
|
|
} |
|
123
|
|
|
|
|
|
|
} |
|
124
|
1
|
50
|
|
|
|
6
|
$_[1] = '' if $chk; |
|
125
|
1
|
|
|
|
|
74
|
return $str; |
|
126
|
|
|
|
|
|
|
} |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
1; |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
__END__ |