File Coverage

blib/lib/Encode/GSM0338.pm
Criterion Covered Total %
statement 45 47 95.7
branch 33 38 86.8
condition n/a
subroutine 9 11 81.8
pod 4 4 100.0
total 91 100 91.0


line stmt bran cond sub pod time code
1             #
2             # $Id: GSM0338.pm,v 2.7 2017/06/10 17:23:50 dankogai Exp $
3             #
4             package Encode::GSM0338;
5              
6 4     4   412 use strict;
  4         9  
  4         111  
7 4     4   18 use warnings;
  4         7  
  4         98  
8 4     4   20 use Carp;
  4         8  
  4         296  
9              
10 4     4   24 use vars qw($VERSION);
  4         6  
  4         379  
11             $VERSION = do { my @r = ( q$Revision: 2.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
12              
13 4     4   28 use Encode qw(:fallbacks);
  4         10  
  4         598  
14              
15 4     4   27 use parent qw(Encode::Encoding);
  4         9  
  4         27  
16             __PACKAGE__->Define('gsm0338');
17              
18 0     0 1 0 sub needs_lines { 1 }
19 0     0 1 0 sub perlio_ok { 0 }
20              
21 4     4   384 use utf8;
  4         15  
  4         24  
22             our %UNI2GSM = (
23             "\x{0040}" => "\x00", # COMMERCIAL AT
24             "\x{000A}" => "\x0A", # LINE FEED
25             "\x{000C}" => "\x1B\x0A", # FORM FEED
26             "\x{000D}" => "\x0D", # CARRIAGE RETURN
27             "\x{0020}" => "\x20", # SPACE
28             "\x{0021}" => "\x21", # EXCLAMATION MARK
29             "\x{0022}" => "\x22", # QUOTATION MARK
30             "\x{0023}" => "\x23", # NUMBER SIGN
31             "\x{0024}" => "\x02", # DOLLAR SIGN
32             "\x{0025}" => "\x25", # PERCENT SIGN
33             "\x{0026}" => "\x26", # AMPERSAND
34             "\x{0027}" => "\x27", # APOSTROPHE
35             "\x{0028}" => "\x28", # LEFT PARENTHESIS
36             "\x{0029}" => "\x29", # RIGHT PARENTHESIS
37             "\x{002A}" => "\x2A", # ASTERISK
38             "\x{002B}" => "\x2B", # PLUS SIGN
39             "\x{002C}" => "\x2C", # COMMA
40             "\x{002D}" => "\x2D", # HYPHEN-MINUS
41             "\x{002E}" => "\x2E", # FULL STOP
42             "\x{002F}" => "\x2F", # SOLIDUS
43             "\x{0030}" => "\x30", # DIGIT ZERO
44             "\x{0031}" => "\x31", # DIGIT ONE
45             "\x{0032}" => "\x32", # DIGIT TWO
46             "\x{0033}" => "\x33", # DIGIT THREE
47             "\x{0034}" => "\x34", # DIGIT FOUR
48             "\x{0035}" => "\x35", # DIGIT FIVE
49             "\x{0036}" => "\x36", # DIGIT SIX
50             "\x{0037}" => "\x37", # DIGIT SEVEN
51             "\x{0038}" => "\x38", # DIGIT EIGHT
52             "\x{0039}" => "\x39", # DIGIT NINE
53             "\x{003A}" => "\x3A", # COLON
54             "\x{003B}" => "\x3B", # SEMICOLON
55             "\x{003C}" => "\x3C", # LESS-THAN SIGN
56             "\x{003D}" => "\x3D", # EQUALS SIGN
57             "\x{003E}" => "\x3E", # GREATER-THAN SIGN
58             "\x{003F}" => "\x3F", # QUESTION MARK
59             "\x{0041}" => "\x41", # LATIN CAPITAL LETTER A
60             "\x{0042}" => "\x42", # LATIN CAPITAL LETTER B
61             "\x{0043}" => "\x43", # LATIN CAPITAL LETTER C
62             "\x{0044}" => "\x44", # LATIN CAPITAL LETTER D
63             "\x{0045}" => "\x45", # LATIN CAPITAL LETTER E
64             "\x{0046}" => "\x46", # LATIN CAPITAL LETTER F
65             "\x{0047}" => "\x47", # LATIN CAPITAL LETTER G
66             "\x{0048}" => "\x48", # LATIN CAPITAL LETTER H
67             "\x{0049}" => "\x49", # LATIN CAPITAL LETTER I
68             "\x{004A}" => "\x4A", # LATIN CAPITAL LETTER J
69             "\x{004B}" => "\x4B", # LATIN CAPITAL LETTER K
70             "\x{004C}" => "\x4C", # LATIN CAPITAL LETTER L
71             "\x{004D}" => "\x4D", # LATIN CAPITAL LETTER M
72             "\x{004E}" => "\x4E", # LATIN CAPITAL LETTER N
73             "\x{004F}" => "\x4F", # LATIN CAPITAL LETTER O
74             "\x{0050}" => "\x50", # LATIN CAPITAL LETTER P
75             "\x{0051}" => "\x51", # LATIN CAPITAL LETTER Q
76             "\x{0052}" => "\x52", # LATIN CAPITAL LETTER R
77             "\x{0053}" => "\x53", # LATIN CAPITAL LETTER S
78             "\x{0054}" => "\x54", # LATIN CAPITAL LETTER T
79             "\x{0055}" => "\x55", # LATIN CAPITAL LETTER U
80             "\x{0056}" => "\x56", # LATIN CAPITAL LETTER V
81             "\x{0057}" => "\x57", # LATIN CAPITAL LETTER W
82             "\x{0058}" => "\x58", # LATIN CAPITAL LETTER X
83             "\x{0059}" => "\x59", # LATIN CAPITAL LETTER Y
84             "\x{005A}" => "\x5A", # LATIN CAPITAL LETTER Z
85             "\x{005F}" => "\x11", # LOW LINE
86             "\x{0061}" => "\x61", # LATIN SMALL LETTER A
87             "\x{0062}" => "\x62", # LATIN SMALL LETTER B
88             "\x{0063}" => "\x63", # LATIN SMALL LETTER C
89             "\x{0064}" => "\x64", # LATIN SMALL LETTER D
90             "\x{0065}" => "\x65", # LATIN SMALL LETTER E
91             "\x{0066}" => "\x66", # LATIN SMALL LETTER F
92             "\x{0067}" => "\x67", # LATIN SMALL LETTER G
93             "\x{0068}" => "\x68", # LATIN SMALL LETTER H
94             "\x{0069}" => "\x69", # LATIN SMALL LETTER I
95             "\x{006A}" => "\x6A", # LATIN SMALL LETTER J
96             "\x{006B}" => "\x6B", # LATIN SMALL LETTER K
97             "\x{006C}" => "\x6C", # LATIN SMALL LETTER L
98             "\x{006D}" => "\x6D", # LATIN SMALL LETTER M
99             "\x{006E}" => "\x6E", # LATIN SMALL LETTER N
100             "\x{006F}" => "\x6F", # LATIN SMALL LETTER O
101             "\x{0070}" => "\x70", # LATIN SMALL LETTER P
102             "\x{0071}" => "\x71", # LATIN SMALL LETTER Q
103             "\x{0072}" => "\x72", # LATIN SMALL LETTER R
104             "\x{0073}" => "\x73", # LATIN SMALL LETTER S
105             "\x{0074}" => "\x74", # LATIN SMALL LETTER T
106             "\x{0075}" => "\x75", # LATIN SMALL LETTER U
107             "\x{0076}" => "\x76", # LATIN SMALL LETTER V
108             "\x{0077}" => "\x77", # LATIN SMALL LETTER W
109             "\x{0078}" => "\x78", # LATIN SMALL LETTER X
110             "\x{0079}" => "\x79", # LATIN SMALL LETTER Y
111             "\x{007A}" => "\x7A", # LATIN SMALL LETTER Z
112             "\x{000C}" => "\x1B\x0A", # FORM FEED
113             "\x{005B}" => "\x1B\x3C", # LEFT SQUARE BRACKET
114             "\x{005C}" => "\x1B\x2F", # REVERSE SOLIDUS
115             "\x{005D}" => "\x1B\x3E", # RIGHT SQUARE BRACKET
116             "\x{005E}" => "\x1B\x14", # CIRCUMFLEX ACCENT
117             "\x{007B}" => "\x1B\x28", # LEFT CURLY BRACKET
118             "\x{007C}" => "\x1B\x40", # VERTICAL LINE
119             "\x{007D}" => "\x1B\x29", # RIGHT CURLY BRACKET
120             "\x{007E}" => "\x1B\x3D", # TILDE
121             "\x{00A0}" => "\x1B", # NO-BREAK SPACE
122             "\x{00A1}" => "\x40", # INVERTED EXCLAMATION MARK
123             "\x{00A3}" => "\x01", # POUND SIGN
124             "\x{00A4}" => "\x24", # CURRENCY SIGN
125             "\x{00A5}" => "\x03", # YEN SIGN
126             "\x{00A7}" => "\x5F", # SECTION SIGN
127             "\x{00BF}" => "\x60", # INVERTED QUESTION MARK
128             "\x{00C4}" => "\x5B", # LATIN CAPITAL LETTER A WITH DIAERESIS
129             "\x{00C5}" => "\x0E", # LATIN CAPITAL LETTER A WITH RING ABOVE
130             "\x{00C6}" => "\x1C", # LATIN CAPITAL LETTER AE
131             "\x{00C9}" => "\x1F", # LATIN CAPITAL LETTER E WITH ACUTE
132             "\x{00D1}" => "\x5D", # LATIN CAPITAL LETTER N WITH TILDE
133             "\x{00D6}" => "\x5C", # LATIN CAPITAL LETTER O WITH DIAERESIS
134             "\x{00D8}" => "\x0B", # LATIN CAPITAL LETTER O WITH STROKE
135             "\x{00DC}" => "\x5E", # LATIN CAPITAL LETTER U WITH DIAERESIS
136             "\x{00DF}" => "\x1E", # LATIN SMALL LETTER SHARP S
137             "\x{00E0}" => "\x7F", # LATIN SMALL LETTER A WITH GRAVE
138             "\x{00E4}" => "\x7B", # LATIN SMALL LETTER A WITH DIAERESIS
139             "\x{00E5}" => "\x0F", # LATIN SMALL LETTER A WITH RING ABOVE
140             "\x{00E6}" => "\x1D", # LATIN SMALL LETTER AE
141             #"\x{00E7}" => "\x09", # LATIN SMALL LETTER C WITH CEDILLA
142             "\x{00C7}" => "\x09", # LATIN CAPITAL LETTER C WITH CEDILLA
143             "\x{00E8}" => "\x04", # LATIN SMALL LETTER E WITH GRAVE
144             "\x{00E9}" => "\x05", # LATIN SMALL LETTER E WITH ACUTE
145             "\x{00EC}" => "\x07", # LATIN SMALL LETTER I WITH GRAVE
146             "\x{00F1}" => "\x7D", # LATIN SMALL LETTER N WITH TILDE
147             "\x{00F2}" => "\x08", # LATIN SMALL LETTER O WITH GRAVE
148             "\x{00F6}" => "\x7C", # LATIN SMALL LETTER O WITH DIAERESIS
149             "\x{00F8}" => "\x0C", # LATIN SMALL LETTER O WITH STROKE
150             "\x{00F9}" => "\x06", # LATIN SMALL LETTER U WITH GRAVE
151             "\x{00FC}" => "\x7E", # LATIN SMALL LETTER U WITH DIAERESIS
152             "\x{0393}" => "\x13", # GREEK CAPITAL LETTER GAMMA
153             "\x{0394}" => "\x10", # GREEK CAPITAL LETTER DELTA
154             "\x{0398}" => "\x19", # GREEK CAPITAL LETTER THETA
155             "\x{039B}" => "\x14", # GREEK CAPITAL LETTER LAMDA
156             "\x{039E}" => "\x1A", # GREEK CAPITAL LETTER XI
157             "\x{03A0}" => "\x16", # GREEK CAPITAL LETTER PI
158             "\x{03A3}" => "\x18", # GREEK CAPITAL LETTER SIGMA
159             "\x{03A6}" => "\x12", # GREEK CAPITAL LETTER PHI
160             "\x{03A8}" => "\x17", # GREEK CAPITAL LETTER PSI
161             "\x{03A9}" => "\x15", # GREEK CAPITAL LETTER OMEGA
162             "\x{20AC}" => "\x1B\x65", # EURO SIGN
163             );
164             our %GSM2UNI = reverse %UNI2GSM;
165             our $ESC = "\x1b";
166             our $ATMARK = "\x40";
167             our $FBCHAR = "\x3F";
168             our $NBSP = "\x{00A0}";
169              
170             #define ERR_DECODE_NOMAP "%s \"\\x%02" UVXf "\" does not map to Unicode"
171              
172             sub decode ($$;$) {
173 761     761 1 1317 my ( $obj, $bytes, $chk ) = @_;
174 761 100       1266 return undef unless defined $bytes;
175 760         1240 my $str = substr($bytes, 0, 0); # to propagate taintedness;
176 760         1475 while ( length $bytes ) {
177 892         1535 my $c = substr( $bytes, 0, 1, '' );
178 892         1131 my $u;
179 892 100       1764 if ( $c eq "\x00" ) {
    100          
180 131         212 my $c2 = substr( $bytes, 0, 1, '' );
181             $u =
182             !length $c2 ? $ATMARK
183             : $c2 eq "\x00" ? "\x{0000}"
184 131 50       953 : exists $GSM2UNI{$c2} ? $ATMARK . $GSM2UNI{$c2}
    100          
    100          
    100          
185             : $chk
186             ? croak sprintf( "\\x%02X\\x%02X does not map to Unicode",
187             ord($c), ord($c2) )
188             : $ATMARK . $FBCHAR;
189              
190             }
191             elsif ( $c eq $ESC ) {
192 132         293 my $c2 = substr( $bytes, 0, 1, '' );
193             $u =
194             exists $GSM2UNI{ $c . $c2 } ? $GSM2UNI{ $c . $c2 }
195 132 50       788 : exists $GSM2UNI{$c2} ? $NBSP . $GSM2UNI{$c2}
    100          
    100          
196             : $chk
197             ? croak sprintf( "\\x%02X\\x%02X does not map to Unicode",
198             ord($c), ord($c2) )
199             : $NBSP . $FBCHAR;
200             }
201             else {
202             $u =
203             exists $GSM2UNI{$c}
204 629 50       26665 ? $GSM2UNI{$c}
    50          
    100          
205             : $chk ? ref $chk eq 'CODE'
206             ? $chk->( ord $c )
207             : croak sprintf( "\\x%02X does not map to Unicode", ord($c) )
208             : $FBCHAR;
209             }
210 764         1646 $str .= $u;
211             }
212 632 100       1184 $_[1] = $bytes if $chk;
213 632         1478 return $str;
214             }
215              
216             #define ERR_ENCODE_NOMAP "\"\\x{%04" UVxf "}\" does not map to %s"
217              
218             sub encode($$;$) {
219 270     270 1 572 my ( $obj, $str, $chk ) = @_;
220 270 100       695 return undef unless defined $str;
221 269         727 my $bytes = substr($str, 0, 0); # to propagate taintedness
222 269         524 while ( length $str ) {
223 403         1099 my $u = substr( $str, 0, 1, '' );
224 403         516 my $c;
225             $bytes .=
226             exists $UNI2GSM{$u}
227 403 50       1883 ? $UNI2GSM{$u}
    100          
    100          
228             : $chk ? ref $chk eq 'CODE'
229             ? $chk->( ord($u) )
230             : croak sprintf( "\\x{%04x} does not map to %s",
231             ord($u), $obj->name )
232             : $FBCHAR;
233             }
234 141 100       301 $_[1] = $str if $chk;
235 141         371 return $bytes;
236             }
237              
238             1;
239             __END__