File Coverage

blib/lib/Encode/IMAPUTF7.pm
Criterion Covered Total %
statement 44 50 88.0
branch 13 22 59.0
condition n/a
subroutine 7 8 87.5
pod 3 3 100.0
total 67 83 80.7


line stmt bran cond sub pod time code
1             package Encode::IMAPUTF7 1.07;
2 2     2   274286 use strict;
  2         4  
  2         82  
3 2     2   18 use warnings;
  2         5  
  2         124  
4              
5 2     2   524 use parent qw(Encode::Encoding);
  2         422  
  2         12  
6              
7             # ABSTRACT: modification of UTF-7 encoding for IMAP
8              
9             __PACKAGE__->Define('IMAP-UTF-7', 'imap-utf-7');
10              
11 2     2   25877 use Encode ();
  2         6  
  2         31  
12 2     2   1123 use MIME::Base64;
  2         3115  
  2         1835  
13              
14             #pod =head1 SYNOPSIS
15             #pod
16             #pod use Encode qw/encode decode/;
17             #pod use Encode::IMAPUTF7;
18             #pod
19             #pod print encode('IMAP-UTF-7', 'RĂ©pertoire');
20             #pod print decode('IMAP-UTF-7', 'R&AOk-pertoire');
21             #pod
22             #pod =head1 ABSTRACT
23             #pod
24             #pod IMAP mailbox names are encoded in a modified UTF-7 when names contains
25             #pod international characters outside of the printable ASCII range. The modified
26             #pod UTF-7 encoding is defined in RFC2060 (section 5.1.3).
27             #pod
28             #pod =head2 RFC2060 - section 5.1.3 - Mailbox International Naming Convention
29             #pod
30             #pod By convention, international mailbox names are specified using a modified
31             #pod version of the UTF-7 encoding described in [UTF-7]. The purpose of these
32             #pod modifications is to correct the following problems with UTF-7:
33             #pod
34             #pod =for :list
35             #pod 1. UTF-7 uses the "+" character for shifting; this conflicts with the common
36             #pod use of "+" in mailbox names, in particular USENET newsgroup names.
37             #pod 2. UTF-7's encoding is BASE64 which uses the "/" character; this conflicts
38             #pod with the use of "/" as a popular hierarchy delimiter.
39             #pod 3. UTF-7 prohibits the unencoded usage of "\"; this conflicts with the use of
40             #pod "\" as a popular hierarchy delimiter.
41             #pod 4. UTF-7 prohibits the unencoded usage of "~"; this conflicts with the use of
42             #pod "~" in some servers as a home directory indicator.
43             #pod 5. UTF-7 permits multiple alternate forms to represent the same string; in
44             #pod particular, printable US-ASCII chararacters can be represented in encoded
45             #pod form.
46             #pod
47             #pod In modified UTF-7, printable US-ASCII characters except for "&" represent
48             #pod themselves; that is, characters with octet values 0x20-0x25 and 0x27-0x7e. The
49             #pod character "&" (0x26) is represented by the two-octet sequence "&-".
50             #pod
51             #pod All other characters (octet values 0x00-0x1f, 0x7f-0xff, and all Unicode 16-bit
52             #pod octets) are represented in modified BASE64, with a further modification from
53             #pod [UTF-7] that "," is used instead of "/". Modified BASE64 MUST NOT be used to
54             #pod represent any printing US-ASCII character which can represent itself.
55             #pod
56             #pod "&" is used to shift to modified BASE64 and "-" to shift back to US- ASCII.
57             #pod All names start in US-ASCII, and MUST end in US-ASCII (that is, a name that
58             #pod ends with a Unicode 16-bit octet MUST end with a "- ").
59             #pod
60             #pod For example, here is a mailbox name which mixes English, Japanese,
61             #pod and Chinese text: C<~peter/mail/&ZeVnLIqe-/&U,BTFw->
62             #pod
63             #pod =cut
64              
65             # Algorithms taken from Unicode::String by Gisle Aas
66             # Code directly borrowed from Encode::Unicode::UTF7 by Dan Kogai
67              
68             # Directly from the definition in RFC2060:
69             # Ampersand (\x26) is represented as a special case
70             my $re_asis = qr/(?:[\x20-\x25\x27-\x7e])/; # printable US-ASCII except "&" represents itself
71             my $re_encoded = qr/(?:[^\x20-\x7e])/; # Everything else are represented by modified base64
72              
73             my $e_utf16 = Encode::find_encoding("UTF-16BE");
74              
75 0     0 1 0 sub needs_lines { 1 };
76              
77             sub encode($$;$) {
78 6     6 1 607558 my ( $obj, $str, $chk ) = @_;
79 6         29 my $len = length($str);
80 6         16 pos($str) = 0;
81 6         15 my $bytes = '';
82 6         19 while ( pos($str) < $len ) {
83 739 100       2683 if ( $str =~ /\G($re_asis+)/ogc ) {
    100          
    50          
84 367         1076 $bytes .= $1;
85             } elsif ( $str =~ /\G&/ogc ) {
86 5         9 $bytes .= "&-";
87             } elsif ( $str =~ /\G($re_encoded+)/ogsc ) {
88 367         702 my $s = $1;
89 367         1825 my $base64 = encode_base64( $e_utf16->encode($s), '' );
90 367         1135 $base64 =~ s/=+$//;
91 367         803 $base64 =~ s/\//,/g;
92 367         1068 $bytes .= "&$base64-";
93             } else {
94 0         0 die "This should not happen! (pos=" . pos($str) . ")";
95             }
96             }
97 6 50       12 $_[1] = '' if $chk;
98 6         78 return $bytes;
99             }
100              
101             sub decode($$;$) {
102 1     1 1 67 my ( $obj, $bytes, $chk ) = @_;
103 1         12 my $len = length($bytes);
104 1         2 my $str = "";
105 1         5 pos($bytes) = 0;
106 1         5 while ( pos($bytes) < $len ) {
107 726 100       2918 if ( $bytes =~ /\G([^&]+)/ogc ) {
    50          
    50          
    0          
108 363         1051 $str .= $1;
109             } elsif ( $bytes =~ /\G\&-/ogc ) {
110 0         0 $str .= "&";
111             } elsif ( $bytes =~ /\G\&([A-Za-z0-9+,]+)-?/ogsc ) {
112 363         665 my $base64 = $1;
113 363         978 $base64 =~ s/,/\//g;
114 363         823 my $pad = length($base64) % 4;
115 363 100       848 $base64 .= "=" x ( 4 - $pad ) if $pad;
116 363         2771 $str .= $e_utf16->decode( decode_base64($base64) );
117             } elsif ( $bytes =~ /\G\&/ogc ) {
118 0 0       0 $^W and warn "Bad IMAP-UTF7 data escape";
119 0         0 $str .= "&";
120             } else {
121 0         0 die "This should not happen " . pos($bytes);
122             }
123             }
124 1 50       6 $_[1] = '' if $chk;
125 1         74 return $str;
126             }
127              
128             1;
129              
130             __END__