line |
stmt |
bran |
cond |
sub |
time |
code |
1
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
# $Id: GSM0338.pm,v 2.4 2013/08/14 02:29:54 dankogai Exp $ |
3
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
package Encode::GSM0338; |
5
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
706
|
use strict; |
|
1
|
|
|
|
5
|
|
|
1
|
|
|
|
68
|
|
7
|
1
|
|
|
1
|
8
|
use warnings; |
|
1
|
|
|
|
3
|
|
|
1
|
|
|
|
40
|
|
8
|
1
|
|
|
1
|
8
|
use Carp; |
|
1
|
|
|
|
3
|
|
|
1
|
|
|
|
128
|
|
9
|
|
|
|
|
|
|
10
|
1
|
|
|
1
|
8
|
use vars qw($VERSION); |
|
1
|
|
|
|
3
|
|
|
1
|
|
|
|
168
|
|
11
|
|
|
|
|
|
$VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; |
12
|
|
|
|
|
|
|
13
|
1
|
|
|
1
|
9
|
use Encode qw(:fallbacks); |
|
1
|
|
|
|
3
|
|
|
1
|
|
|
|
363
|
|
14
|
|
|
|
|
|
|
15
|
1
|
|
|
1
|
8
|
use base qw(Encode::Encoding); |
|
1
|
|
|
|
4
|
|
|
1
|
|
|
|
137
|
|
16
|
|
|
|
|
|
__PACKAGE__->Define('gsm0338'); |
17
|
|
|
|
|
|
|
18
|
0
|
|
|
0
|
0
|
sub needs_lines { 1 } |
19
|
0
|
|
|
0
|
0
|
sub perlio_ok { 0 } |
20
|
|
|
|
|
|
|
21
|
1
|
|
|
1
|
224
|
use utf8; |
|
1
|
|
|
|
3
|
|
|
1
|
|
|
|
12
|
|
22
|
|
|
|
|
|
our %UNI2GSM = ( |
23
|
|
|
|
|
|
"\x{0040}" => "\x00", # COMMERCIAL AT |
24
|
|
|
|
|
|
"\x{000A}" => "\x0A", # LINE FEED |
25
|
|
|
|
|
|
"\x{000C}" => "\x1B\x0A", # FORM FEED |
26
|
|
|
|
|
|
"\x{000D}" => "\x0D", # CARRIAGE RETURN |
27
|
|
|
|
|
|
"\x{0020}" => "\x20", # SPACE |
28
|
|
|
|
|
|
"\x{0021}" => "\x21", # EXCLAMATION MARK |
29
|
|
|
|
|
|
"\x{0022}" => "\x22", # QUOTATION MARK |
30
|
|
|
|
|
|
"\x{0023}" => "\x23", # NUMBER SIGN |
31
|
|
|
|
|
|
"\x{0024}" => "\x02", # DOLLAR SIGN |
32
|
|
|
|
|
|
"\x{0025}" => "\x25", # PERCENT SIGN |
33
|
|
|
|
|
|
"\x{0026}" => "\x26", # AMPERSAND |
34
|
|
|
|
|
|
"\x{0027}" => "\x27", # APOSTROPHE |
35
|
|
|
|
|
|
"\x{0028}" => "\x28", # LEFT PARENTHESIS |
36
|
|
|
|
|
|
"\x{0029}" => "\x29", # RIGHT PARENTHESIS |
37
|
|
|
|
|
|
"\x{002A}" => "\x2A", # ASTERISK |
38
|
|
|
|
|
|
"\x{002B}" => "\x2B", # PLUS SIGN |
39
|
|
|
|
|
|
"\x{002C}" => "\x2C", # COMMA |
40
|
|
|
|
|
|
"\x{002D}" => "\x2D", # HYPHEN-MINUS |
41
|
|
|
|
|
|
"\x{002E}" => "\x2E", # FULL STOP |
42
|
|
|
|
|
|
"\x{002F}" => "\x2F", # SOLIDUS |
43
|
|
|
|
|
|
"\x{0030}" => "\x30", # DIGIT ZERO |
44
|
|
|
|
|
|
"\x{0031}" => "\x31", # DIGIT ONE |
45
|
|
|
|
|
|
"\x{0032}" => "\x32", # DIGIT TWO |
46
|
|
|
|
|
|
"\x{0033}" => "\x33", # DIGIT THREE |
47
|
|
|
|
|
|
"\x{0034}" => "\x34", # DIGIT FOUR |
48
|
|
|
|
|
|
"\x{0035}" => "\x35", # DIGIT FIVE |
49
|
|
|
|
|
|
"\x{0036}" => "\x36", # DIGIT SIX |
50
|
|
|
|
|
|
"\x{0037}" => "\x37", # DIGIT SEVEN |
51
|
|
|
|
|
|
"\x{0038}" => "\x38", # DIGIT EIGHT |
52
|
|
|
|
|
|
"\x{0039}" => "\x39", # DIGIT NINE |
53
|
|
|
|
|
|
"\x{003A}" => "\x3A", # COLON |
54
|
|
|
|
|
|
"\x{003B}" => "\x3B", # SEMICOLON |
55
|
|
|
|
|
|
"\x{003C}" => "\x3C", # LESS-THAN SIGN |
56
|
|
|
|
|
|
"\x{003D}" => "\x3D", # EQUALS SIGN |
57
|
|
|
|
|
|
"\x{003E}" => "\x3E", # GREATER-THAN SIGN |
58
|
|
|
|
|
|
"\x{003F}" => "\x3F", # QUESTION MARK |
59
|
|
|
|
|
|
"\x{0041}" => "\x41", # LATIN CAPITAL LETTER A |
60
|
|
|
|
|
|
"\x{0042}" => "\x42", # LATIN CAPITAL LETTER B |
61
|
|
|
|
|
|
"\x{0043}" => "\x43", # LATIN CAPITAL LETTER C |
62
|
|
|
|
|
|
"\x{0044}" => "\x44", # LATIN CAPITAL LETTER D |
63
|
|
|
|
|
|
"\x{0045}" => "\x45", # LATIN CAPITAL LETTER E |
64
|
|
|
|
|
|
"\x{0046}" => "\x46", # LATIN CAPITAL LETTER F |
65
|
|
|
|
|
|
"\x{0047}" => "\x47", # LATIN CAPITAL LETTER G |
66
|
|
|
|
|
|
"\x{0048}" => "\x48", # LATIN CAPITAL LETTER H |
67
|
|
|
|
|
|
"\x{0049}" => "\x49", # LATIN CAPITAL LETTER I |
68
|
|
|
|
|
|
"\x{004A}" => "\x4A", # LATIN CAPITAL LETTER J |
69
|
|
|
|
|
|
"\x{004B}" => "\x4B", # LATIN CAPITAL LETTER K |
70
|
|
|
|
|
|
"\x{004C}" => "\x4C", # LATIN CAPITAL LETTER L |
71
|
|
|
|
|
|
"\x{004D}" => "\x4D", # LATIN CAPITAL LETTER M |
72
|
|
|
|
|
|
"\x{004E}" => "\x4E", # LATIN CAPITAL LETTER N |
73
|
|
|
|
|
|
"\x{004F}" => "\x4F", # LATIN CAPITAL LETTER O |
74
|
|
|
|
|
|
"\x{0050}" => "\x50", # LATIN CAPITAL LETTER P |
75
|
|
|
|
|
|
"\x{0051}" => "\x51", # LATIN CAPITAL LETTER Q |
76
|
|
|
|
|
|
"\x{0052}" => "\x52", # LATIN CAPITAL LETTER R |
77
|
|
|
|
|
|
"\x{0053}" => "\x53", # LATIN CAPITAL LETTER S |
78
|
|
|
|
|
|
"\x{0054}" => "\x54", # LATIN CAPITAL LETTER T |
79
|
|
|
|
|
|
"\x{0055}" => "\x55", # LATIN CAPITAL LETTER U |
80
|
|
|
|
|
|
"\x{0056}" => "\x56", # LATIN CAPITAL LETTER V |
81
|
|
|
|
|
|
"\x{0057}" => "\x57", # LATIN CAPITAL LETTER W |
82
|
|
|
|
|
|
"\x{0058}" => "\x58", # LATIN CAPITAL LETTER X |
83
|
|
|
|
|
|
"\x{0059}" => "\x59", # LATIN CAPITAL LETTER Y |
84
|
|
|
|
|
|
"\x{005A}" => "\x5A", # LATIN CAPITAL LETTER Z |
85
|
|
|
|
|
|
"\x{005F}" => "\x11", # LOW LINE |
86
|
|
|
|
|
|
"\x{0061}" => "\x61", # LATIN SMALL LETTER A |
87
|
|
|
|
|
|
"\x{0062}" => "\x62", # LATIN SMALL LETTER B |
88
|
|
|
|
|
|
"\x{0063}" => "\x63", # LATIN SMALL LETTER C |
89
|
|
|
|
|
|
"\x{0064}" => "\x64", # LATIN SMALL LETTER D |
90
|
|
|
|
|
|
"\x{0065}" => "\x65", # LATIN SMALL LETTER E |
91
|
|
|
|
|
|
"\x{0066}" => "\x66", # LATIN SMALL LETTER F |
92
|
|
|
|
|
|
"\x{0067}" => "\x67", # LATIN SMALL LETTER G |
93
|
|
|
|
|
|
"\x{0068}" => "\x68", # LATIN SMALL LETTER H |
94
|
|
|
|
|
|
"\x{0069}" => "\x69", # LATIN SMALL LETTER I |
95
|
|
|
|
|
|
"\x{006A}" => "\x6A", # LATIN SMALL LETTER J |
96
|
|
|
|
|
|
"\x{006B}" => "\x6B", # LATIN SMALL LETTER K |
97
|
|
|
|
|
|
"\x{006C}" => "\x6C", # LATIN SMALL LETTER L |
98
|
|
|
|
|
|
"\x{006D}" => "\x6D", # LATIN SMALL LETTER M |
99
|
|
|
|
|
|
"\x{006E}" => "\x6E", # LATIN SMALL LETTER N |
100
|
|
|
|
|
|
"\x{006F}" => "\x6F", # LATIN SMALL LETTER O |
101
|
|
|
|
|
|
"\x{0070}" => "\x70", # LATIN SMALL LETTER P |
102
|
|
|
|
|
|
"\x{0071}" => "\x71", # LATIN SMALL LETTER Q |
103
|
|
|
|
|
|
"\x{0072}" => "\x72", # LATIN SMALL LETTER R |
104
|
|
|
|
|
|
"\x{0073}" => "\x73", # LATIN SMALL LETTER S |
105
|
|
|
|
|
|
"\x{0074}" => "\x74", # LATIN SMALL LETTER T |
106
|
|
|
|
|
|
"\x{0075}" => "\x75", # LATIN SMALL LETTER U |
107
|
|
|
|
|
|
"\x{0076}" => "\x76", # LATIN SMALL LETTER V |
108
|
|
|
|
|
|
"\x{0077}" => "\x77", # LATIN SMALL LETTER W |
109
|
|
|
|
|
|
"\x{0078}" => "\x78", # LATIN SMALL LETTER X |
110
|
|
|
|
|
|
"\x{0079}" => "\x79", # LATIN SMALL LETTER Y |
111
|
|
|
|
|
|
"\x{007A}" => "\x7A", # LATIN SMALL LETTER Z |
112
|
|
|
|
|
|
"\x{000C}" => "\x1B\x0A", # FORM FEED |
113
|
|
|
|
|
|
"\x{005B}" => "\x1B\x3C", # LEFT SQUARE BRACKET |
114
|
|
|
|
|
|
"\x{005C}" => "\x1B\x2F", # REVERSE SOLIDUS |
115
|
|
|
|
|
|
"\x{005D}" => "\x1B\x3E", # RIGHT SQUARE BRACKET |
116
|
|
|
|
|
|
"\x{005E}" => "\x1B\x14", # CIRCUMFLEX ACCENT |
117
|
|
|
|
|
|
"\x{007B}" => "\x1B\x28", # LEFT CURLY BRACKET |
118
|
|
|
|
|
|
"\x{007C}" => "\x1B\x40", # VERTICAL LINE |
119
|
|
|
|
|
|
"\x{007D}" => "\x1B\x29", # RIGHT CURLY BRACKET |
120
|
|
|
|
|
|
"\x{007E}" => "\x1B\x3D", # TILDE |
121
|
|
|
|
|
|
"\x{00A0}" => "\x1B", # NO-BREAK SPACE |
122
|
|
|
|
|
|
"\x{00A1}" => "\x40", # INVERTED EXCLAMATION MARK |
123
|
|
|
|
|
|
"\x{00A3}" => "\x01", # POUND SIGN |
124
|
|
|
|
|
|
"\x{00A4}" => "\x24", # CURRENCY SIGN |
125
|
|
|
|
|
|
"\x{00A5}" => "\x03", # YEN SIGN |
126
|
|
|
|
|
|
"\x{00A7}" => "\x5F", # SECTION SIGN |
127
|
|
|
|
|
|
"\x{00BF}" => "\x60", # INVERTED QUESTION MARK |
128
|
|
|
|
|
|
"\x{00C4}" => "\x5B", # LATIN CAPITAL LETTER A WITH DIAERESIS |
129
|
|
|
|
|
|
"\x{00C5}" => "\x0E", # LATIN CAPITAL LETTER A WITH RING ABOVE |
130
|
|
|
|
|
|
"\x{00C6}" => "\x1C", # LATIN CAPITAL LETTER AE |
131
|
|
|
|
|
|
"\x{00C9}" => "\x1F", # LATIN CAPITAL LETTER E WITH ACUTE |
132
|
|
|
|
|
|
"\x{00D1}" => "\x5D", # LATIN CAPITAL LETTER N WITH TILDE |
133
|
|
|
|
|
|
"\x{00D6}" => "\x5C", # LATIN CAPITAL LETTER O WITH DIAERESIS |
134
|
|
|
|
|
|
"\x{00D8}" => "\x0B", # LATIN CAPITAL LETTER O WITH STROKE |
135
|
|
|
|
|
|
"\x{00DC}" => "\x5E", # LATIN CAPITAL LETTER U WITH DIAERESIS |
136
|
|
|
|
|
|
"\x{00DF}" => "\x1E", # LATIN SMALL LETTER SHARP S |
137
|
|
|
|
|
|
"\x{00E0}" => "\x7F", # LATIN SMALL LETTER A WITH GRAVE |
138
|
|
|
|
|
|
"\x{00E4}" => "\x7B", # LATIN SMALL LETTER A WITH DIAERESIS |
139
|
|
|
|
|
|
"\x{00E5}" => "\x0F", # LATIN SMALL LETTER A WITH RING ABOVE |
140
|
|
|
|
|
|
"\x{00E6}" => "\x1D", # LATIN SMALL LETTER AE |
141
|
|
|
|
|
|
#"\x{00E7}" => "\x09", # LATIN SMALL LETTER C WITH CEDILLA |
142
|
|
|
|
|
|
"\x{00C7}" => "\x09", # LATIN CAPITAL LETTER C WITH CEDILLA |
143
|
|
|
|
|
|
"\x{00E8}" => "\x04", # LATIN SMALL LETTER E WITH GRAVE |
144
|
|
|
|
|
|
"\x{00E9}" => "\x05", # LATIN SMALL LETTER E WITH ACUTE |
145
|
|
|
|
|
|
"\x{00EC}" => "\x07", # LATIN SMALL LETTER I WITH GRAVE |
146
|
|
|
|
|
|
"\x{00F1}" => "\x7D", # LATIN SMALL LETTER N WITH TILDE |
147
|
|
|
|
|
|
"\x{00F2}" => "\x08", # LATIN SMALL LETTER O WITH GRAVE |
148
|
|
|
|
|
|
"\x{00F6}" => "\x7C", # LATIN SMALL LETTER O WITH DIAERESIS |
149
|
|
|
|
|
|
"\x{00F8}" => "\x0C", # LATIN SMALL LETTER O WITH STROKE |
150
|
|
|
|
|
|
"\x{00F9}" => "\x06", # LATIN SMALL LETTER U WITH GRAVE |
151
|
|
|
|
|
|
"\x{00FC}" => "\x7E", # LATIN SMALL LETTER U WITH DIAERESIS |
152
|
|
|
|
|
|
"\x{0393}" => "\x13", # GREEK CAPITAL LETTER GAMMA |
153
|
|
|
|
|
|
"\x{0394}" => "\x10", # GREEK CAPITAL LETTER DELTA |
154
|
|
|
|
|
|
"\x{0398}" => "\x19", # GREEK CAPITAL LETTER THETA |
155
|
|
|
|
|
|
"\x{039B}" => "\x14", # GREEK CAPITAL LETTER LAMDA |
156
|
|
|
|
|
|
"\x{039E}" => "\x1A", # GREEK CAPITAL LETTER XI |
157
|
|
|
|
|
|
"\x{03A0}" => "\x16", # GREEK CAPITAL LETTER PI |
158
|
|
|
|
|
|
"\x{03A3}" => "\x18", # GREEK CAPITAL LETTER SIGMA |
159
|
|
|
|
|
|
"\x{03A6}" => "\x12", # GREEK CAPITAL LETTER PHI |
160
|
|
|
|
|
|
"\x{03A8}" => "\x17", # GREEK CAPITAL LETTER PSI |
161
|
|
|
|
|
|
"\x{03A9}" => "\x15", # GREEK CAPITAL LETTER OMEGA |
162
|
|
|
|
|
|
"\x{20AC}" => "\x1B\x65", # EURO SIGN |
163
|
|
|
|
|
|
); |
164
|
|
|
|
|
|
our %GSM2UNI = reverse %UNI2GSM; |
165
|
|
|
|
|
|
our $ESC = "\x1b"; |
166
|
|
|
|
|
|
our $ATMARK = "\x40"; |
167
|
|
|
|
|
|
our $FBCHAR = "\x3F"; |
168
|
|
|
|
|
|
our $NBSP = "\x{00A0}"; |
169
|
|
|
|
|
|
|
170
|
|
|
|
|
|
#define ERR_DECODE_NOMAP "%s \"\\x%02" UVXf "\" does not map to Unicode" |
171
|
|
|
|
|
|
|
172
|
|
|
|
|
|
sub decode ($$;$) { |
173
|
758
|
|
|
758
|
552510
|
my ( $obj, $bytes, $chk ) = @_; |
174
|
758
|
|
|
|
2610
|
my $str = substr($bytes, 0, 0); # to propagate taintedness; |
175
|
758
|
|
|
|
3080
|
while ( length $bytes ) { |
176
|
884
|
|
|
|
2762
|
my $c = substr( $bytes, 0, 1, '' ); |
177
|
884
|
|
|
|
1894
|
my $u; |
178
|
884
|
100
|
|
|
3804
|
if ( $c eq "\x00" ) { |
|
|
100
|
|
|
|
|
179
|
131
|
|
|
|
442
|
my $c2 = substr( $bytes, 0, 1, '' ); |
180
|
131
|
50
|
|
|
1805
|
$u = |
|
|
100
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
100
|
|
|
|
|
181
|
|
|
|
|
|
!length $c2 ? $ATMARK |
182
|
|
|
|
|
|
: $c2 eq "\x00" ? "\x{0000}" |
183
|
|
|
|
|
|
: exists $GSM2UNI{$c2} ? $ATMARK . $GSM2UNI{$c2} |
184
|
|
|
|
|
|
: $chk |
185
|
|
|
|
|
|
? croak sprintf( "\\x%02X\\x%02X does not map to Unicode", |
186
|
|
|
|
|
|
ord($c), ord($c2) ) |
187
|
|
|
|
|
|
: $ATMARK . $FBCHAR; |
188
|
|
|
|
|
|
|
189
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
elsif ( $c eq $ESC ) { |
191
|
132
|
|
|
|
487
|
my $c2 = substr( $bytes, 0, 1, '' ); |
192
|
132
|
50
|
|
|
1522
|
$u = |
|
|
100
|
|
|
|
|
|
|
100
|
|
|
|
|
193
|
|
|
|
|
|
exists $GSM2UNI{ $c . $c2 } ? $GSM2UNI{ $c . $c2 } |
194
|
|
|
|
|
|
: exists $GSM2UNI{$c2} ? $NBSP . $GSM2UNI{$c2} |
195
|
|
|
|
|
|
: $chk |
196
|
|
|
|
|
|
? croak sprintf( "\\x%02X\\x%02X does not map to Unicode", |
197
|
|
|
|
|
|
ord($c), ord($c2) ) |
198
|
|
|
|
|
|
: $NBSP . $FBCHAR; |
199
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
else { |
201
|
621
|
50
|
|
|
45858
|
$u = |
|
|
50
|
|
|
|
|
|
|
100
|
|
|
|
|
202
|
|
|
|
|
|
exists $GSM2UNI{$c} |
203
|
|
|
|
|
|
? $GSM2UNI{$c} |
204
|
|
|
|
|
|
: $chk ? ref $chk eq 'CODE' |
205
|
|
|
|
|
|
? $chk->( ord $c ) |
206
|
|
|
|
|
|
: croak sprintf( "\\x%02X does not map to Unicode", ord($c) ) |
207
|
|
|
|
|
|
: $FBCHAR; |
208
|
|
|
|
|
|
} |
209
|
756
|
|
|
|
3804
|
$str .= $u; |
210
|
|
|
|
|
|
} |
211
|
630
|
100
|
|
|
2057
|
$_[1] = $bytes if $chk; |
212
|
630
|
|
|
|
2894
|
return $str; |
213
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
215
|
|
|
|
|
|
#define ERR_ENCODE_NOMAP "\"\\x{%04" UVxf "}\" does not map to %s" |
216
|
|
|
|
|
|
|
217
|
|
|
|
|
|
sub encode($$;$) { |
218
|
267
|
|
|
267
|
177219
|
my ( $obj, $str, $chk ) = @_; |
219
|
267
|
|
|
|
1365
|
my $bytes = substr($str, 0, 0); # to propagate taintedness |
220
|
267
|
|
|
|
1251
|
while ( length $str ) { |
221
|
395
|
|
|
|
1968
|
my $u = substr( $str, 0, 1, '' ); |
222
|
395
|
|
|
|
749
|
my $c; |
223
|
395
|
50
|
|
|
3706
|
$bytes .= |
|
|
50
|
|
|
|
|
|
|
100
|
|
|
|
|
224
|
|
|
|
|
|
exists $UNI2GSM{$u} |
225
|
|
|
|
|
|
? $UNI2GSM{$u} |
226
|
|
|
|
|
|
: $chk ? ref $chk eq 'CODE' |
227
|
|
|
|
|
|
? $chk->( ord($u) ) |
228
|
|
|
|
|
|
: croak sprintf( "\\x{%04x} does not map to %s", |
229
|
|
|
|
|
|
ord($u), $obj->name ) |
230
|
|
|
|
|
|
: $FBCHAR; |
231
|
|
|
|
|
|
} |
232
|
139
|
100
|
|
|
658
|
$_[1] = $str if $chk; |
233
|
139
|
|
|
|
745
|
return $bytes; |
234
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
236
|
|
|
|
|
|
1; |
237
|
|
|
|
|
|
__END__ |