line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package ZMachine::ZSCII 0.005; |
2
|
1
|
|
|
1
|
|
134790
|
use 5.14.0; |
|
1
|
|
|
|
|
6
|
|
3
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
22
|
|
4
|
|
|
|
|
|
|
# ABSTRACT: an encoder/decoder for Z-Machine text |
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
4
|
use Carp (); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
14
|
|
7
|
1
|
|
|
1
|
|
3
|
use charnames ':full'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
15
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
#pod =head1 OVERVIEW |
10
|
|
|
|
|
|
|
#pod |
11
|
|
|
|
|
|
|
#pod ZMachine::ZSCII is a class for objects that are encoders/decoders of Z-Machine |
12
|
|
|
|
|
|
|
#pod text. Right now, ZMachine::ZSCII only implements Version 5 (and thus 7 and 8), |
13
|
|
|
|
|
|
|
#pod and even that partially. There is no abbreviation support yet. |
14
|
|
|
|
|
|
|
#pod |
15
|
|
|
|
|
|
|
#pod =head2 How Z-Machine Text Works |
16
|
|
|
|
|
|
|
#pod |
17
|
|
|
|
|
|
|
#pod The Z-Machine's text strings are composed of ZSCII characters. There are 1024 |
18
|
|
|
|
|
|
|
#pod ZSCII codepoints, although only bottom eight bits worth are ever used. |
19
|
|
|
|
|
|
|
#pod Codepoints 0x20 through 0x7E are identical with the same codepoints in ASCII or |
20
|
|
|
|
|
|
|
#pod Unicode. |
21
|
|
|
|
|
|
|
#pod |
22
|
|
|
|
|
|
|
#pod ZSCII codepoints are then encoded as strings of five-bit Z-characters. The |
23
|
|
|
|
|
|
|
#pod most common ZSCII characters, the lowercase English alphabet, can be encoded |
24
|
|
|
|
|
|
|
#pod with one Z-character. Uppercase letters, numbers, and common punctuation |
25
|
|
|
|
|
|
|
#pod ZSCII characters require two Z-characters each. Any other ZSCII character can |
26
|
|
|
|
|
|
|
#pod be encoded with four Z-characters. |
27
|
|
|
|
|
|
|
#pod |
28
|
|
|
|
|
|
|
#pod For storage on disk or in memory, the five-bit Z-characters are packed |
29
|
|
|
|
|
|
|
#pod together, three in a word, and laid out in bytestrings. The last word in a |
30
|
|
|
|
|
|
|
#pod string has its top bit set to mark the ending. When a bytestring would end |
31
|
|
|
|
|
|
|
#pod with out enough Z-characters to pack a full word, it is padded. |
32
|
|
|
|
|
|
|
#pod (ZMachine::ZSCII pads with Z-character 0x05, a shift character.) |
33
|
|
|
|
|
|
|
#pod |
34
|
|
|
|
|
|
|
#pod Later versions of the Z-Machine allow the mapping of ZSCII codepoints to |
35
|
|
|
|
|
|
|
#pod Unicode codepoints to be customized. ZMachine::ZSCII does not yet support this |
36
|
|
|
|
|
|
|
#pod feature. |
37
|
|
|
|
|
|
|
#pod |
38
|
|
|
|
|
|
|
#pod ZMachine::ZSCII I allow conversion between all four relevant |
39
|
|
|
|
|
|
|
#pod representations: Unicode text, ZSCII text, Z-character strings, and packed |
40
|
|
|
|
|
|
|
#pod Z-character bytestrings. All four forms are represented by Perl strings. |
41
|
|
|
|
|
|
|
#pod |
42
|
|
|
|
|
|
|
#pod =cut |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
my %DEFAULT_ZSCII = ( |
45
|
|
|
|
|
|
|
chr(0x00) => "\N{NULL}", |
46
|
|
|
|
|
|
|
chr(0x08) => "\N{DELETE}", |
47
|
|
|
|
|
|
|
chr(0x0D) => "\x0D", |
48
|
|
|
|
|
|
|
chr(0x1B) => "\N{ESCAPE}", |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
(map {; chr $_ => chr $_ } (0x20 .. 0x7E)), # ASCII maps over |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# 0x09B - 0x0FB are the "extra characters" and need Unicode translation table |
53
|
|
|
|
|
|
|
# 0x0FF - 0x3FF are undefined and never (?) used |
54
|
|
|
|
|
|
|
); |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
# We can use these characters below because they all (save for the magic A2-C6) |
57
|
|
|
|
|
|
|
# are the same in Unicode/ASCII/ZSCII. -- rjbs, 2013-01-18 |
58
|
|
|
|
|
|
|
my $DEFAULT_ALPHABET = join(q{}, |
59
|
|
|
|
|
|
|
'a' .. 'z', # A0 |
60
|
|
|
|
|
|
|
'A' .. 'Z', # A1 |
61
|
|
|
|
|
|
|
( # A2 |
62
|
|
|
|
|
|
|
"\0", # special: read 2 chars for 10-bit zscii character |
63
|
|
|
|
|
|
|
"\x0D", |
64
|
|
|
|
|
|
|
(0 .. 9), |
65
|
1
|
|
|
1
|
|
13018
|
do { no warnings 'qw'; qw[ . , ! ? _ # ' " / \ - : ( ) ] }, |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
2551
|
|
66
|
|
|
|
|
|
|
), |
67
|
|
|
|
|
|
|
); |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
my @DEFAULT_EXTRA = map chr hex, qw( |
70
|
|
|
|
|
|
|
E4 F6 FC C4 D6 DC DF BB AB EB EF FF CB CF E1 E9 |
71
|
|
|
|
|
|
|
ED F3 FA FD C1 C9 CD D3 DA DD E0 E8 EC F2 F9 C0 |
72
|
|
|
|
|
|
|
C8 CC D2 D9 |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
E2 EA EE F4 FB C2 CA CE D4 DB E5 C5 F8 D8 E3 F1 |
75
|
|
|
|
|
|
|
F5 C3 D1 D5 E6 C6 E7 C7 FE F0 DE D0 A3 153 152 A1 |
76
|
|
|
|
|
|
|
BF |
77
|
|
|
|
|
|
|
); |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
sub _validate_alphabet { |
80
|
4
|
|
|
4
|
|
7
|
my (undef, $alphabet) = @_; |
81
|
|
|
|
|
|
|
|
82
|
4
|
50
|
|
|
|
8
|
Carp::croak("alphabet table was not 78 entries long") |
83
|
|
|
|
|
|
|
unless length $alphabet == 78; |
84
|
|
|
|
|
|
|
|
85
|
4
|
50
|
|
|
|
12
|
Carp::carp("alphabet character 52 not set to 0x000") |
86
|
|
|
|
|
|
|
unless substr($alphabet, 52, 1) eq chr(0); |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
Carp::croak("alphabet table contains characters over 0xFF") |
89
|
4
|
50
|
|
|
|
38
|
if grep {; ord > 0xFF } split //, $alphabet; |
|
312
|
|
|
|
|
389
|
|
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
sub _shortcuts_for { |
93
|
4
|
|
|
4
|
|
9
|
my ($self, $alphabet) = @_; |
94
|
|
|
|
|
|
|
|
95
|
4
|
|
|
|
|
9
|
$self->_validate_alphabet($alphabet); |
96
|
|
|
|
|
|
|
|
97
|
4
|
|
|
|
|
20
|
my %shortcut = (q{ } => chr(0)); |
98
|
|
|
|
|
|
|
|
99
|
4
|
|
|
|
|
12
|
for my $i (0 .. 2) { |
100
|
12
|
|
|
|
|
18
|
my $offset = $i * 26; |
101
|
12
|
100
|
|
|
|
21
|
my $prefix = $i ? chr(0x03 + $i) : ''; |
102
|
|
|
|
|
|
|
|
103
|
12
|
|
|
|
|
15
|
for my $j (0 .. 25) { |
104
|
312
|
100
|
100
|
|
|
506
|
next if $i == 2 and $j == 0; # that guy is magic! -- rjbs, 2013-01-18 |
105
|
|
|
|
|
|
|
|
106
|
308
|
|
|
|
|
510
|
$shortcut{ substr($alphabet, $offset + $j, 1) } = $prefix . chr($j + 6); |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
|
110
|
4
|
|
|
|
|
10
|
return \%shortcut; |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
#pod =method new |
114
|
|
|
|
|
|
|
#pod |
115
|
|
|
|
|
|
|
#pod my $z = ZMachine::ZSCII->new; |
116
|
|
|
|
|
|
|
#pod my $z = ZMachine::ZSCII->new(\%arg); |
117
|
|
|
|
|
|
|
#pod my $z = ZMachine::ZSCII->new($version); |
118
|
|
|
|
|
|
|
#pod |
119
|
|
|
|
|
|
|
#pod This returns a new codec. If the only argument is a number, it is treated as a |
120
|
|
|
|
|
|
|
#pod version specification. If no arguments are given, a Version 5 codec is made. |
121
|
|
|
|
|
|
|
#pod |
122
|
|
|
|
|
|
|
#pod Valid named arguments are: |
123
|
|
|
|
|
|
|
#pod |
124
|
|
|
|
|
|
|
#pod =begin :list |
125
|
|
|
|
|
|
|
#pod |
126
|
|
|
|
|
|
|
#pod = version |
127
|
|
|
|
|
|
|
#pod |
128
|
|
|
|
|
|
|
#pod The number of the Z-Machine targeted; at present, only 5, 7, or 8 are permitted |
129
|
|
|
|
|
|
|
#pod values. |
130
|
|
|
|
|
|
|
#pod |
131
|
|
|
|
|
|
|
#pod = extra_characters |
132
|
|
|
|
|
|
|
#pod |
133
|
|
|
|
|
|
|
#pod This is a reference to an array of between 0 and 97 Unicode characters. These |
134
|
|
|
|
|
|
|
#pod will be the characters to which ZSCII characters 155 through 251. They may not |
135
|
|
|
|
|
|
|
#pod duplicate any characters represented by the default ZSCII set. No Unicode |
136
|
|
|
|
|
|
|
#pod codepoint above U+FFFF is permitted, as it would not be representable in the |
137
|
|
|
|
|
|
|
#pod Z-Machine Unicode substitution table. |
138
|
|
|
|
|
|
|
#pod |
139
|
|
|
|
|
|
|
#pod If no extra characters are given, the default table is used. |
140
|
|
|
|
|
|
|
#pod |
141
|
|
|
|
|
|
|
#pod = alphabet |
142
|
|
|
|
|
|
|
#pod |
143
|
|
|
|
|
|
|
#pod This is a string of 78 characters, representing the three 26-character |
144
|
|
|
|
|
|
|
#pod alphabets used to encode ZSCII compactly into Z-characters. The first 26 |
145
|
|
|
|
|
|
|
#pod characters are alphabet 0, for the most common characters. The rest of the |
146
|
|
|
|
|
|
|
#pod characters are alphabets 1 and 2. |
147
|
|
|
|
|
|
|
#pod |
148
|
|
|
|
|
|
|
#pod No character with a ZSCII value greater than 0xFF may be included in the |
149
|
|
|
|
|
|
|
#pod alphabet. Character 52 (A2's first character) should be NUL. |
150
|
|
|
|
|
|
|
#pod |
151
|
|
|
|
|
|
|
#pod If no alphabet is given, the default alphabet is used. |
152
|
|
|
|
|
|
|
#pod |
153
|
|
|
|
|
|
|
#pod = alphabet_is_unicode |
154
|
|
|
|
|
|
|
#pod |
155
|
|
|
|
|
|
|
#pod By default, the values in the C are assumed to be ZSCII characters, |
156
|
|
|
|
|
|
|
#pod so that the contents of the alphabet table from the Z-Machine's memory can be |
157
|
|
|
|
|
|
|
#pod used directly. The C option specifies that the characters |
158
|
|
|
|
|
|
|
#pod in the alphabet string are Unicode characters. They will be converted to ZSCII |
159
|
|
|
|
|
|
|
#pod internally by the C method, and if characters appear in the |
160
|
|
|
|
|
|
|
#pod alphabet that are not in the default ZSCII set or the extra characters, an |
161
|
|
|
|
|
|
|
#pod exception will be raised. |
162
|
|
|
|
|
|
|
#pod |
163
|
|
|
|
|
|
|
#pod =end :list |
164
|
|
|
|
|
|
|
#pod |
165
|
|
|
|
|
|
|
#pod =cut |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
sub new { |
168
|
5
|
|
|
5
|
1
|
6523
|
my ($class, $arg) = @_; |
169
|
|
|
|
|
|
|
|
170
|
5
|
50
|
|
|
|
15
|
if (! defined $arg) { |
171
|
0
|
|
|
|
|
0
|
$arg = { version => 5 }; |
172
|
5
|
100
|
|
|
|
13
|
} if (! ref $arg) { |
173
|
2
|
|
|
|
|
5
|
$arg = { version => $arg }; |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
|
176
|
5
|
|
|
|
|
13
|
my $guts = { version => $arg->{version} }; |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
Carp::croak("only Version 5, 7, and 8 ZSCII are supported at present") |
179
|
|
|
|
|
|
|
unless $guts->{version} == 5 |
180
|
|
|
|
|
|
|
or $guts->{version} == 7 |
181
|
5
|
50
|
66
|
|
|
96
|
or $guts->{version} == 8; |
|
|
|
66
|
|
|
|
|
182
|
|
|
|
|
|
|
|
183
|
4
|
|
|
|
|
220
|
$guts->{zscii} = { %DEFAULT_ZSCII }; |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
# Why is this an arrayref and not, like alphabets, a string? |
186
|
|
|
|
|
|
|
# Alphabets are strings because they're guaranteed to fit in bytestrings. |
187
|
|
|
|
|
|
|
# You can't put a ZSCII character over 0xFF in the alphabet, because it can't |
188
|
|
|
|
|
|
|
# be put in the story file's alphabet table! By using a string, it's easy to |
189
|
|
|
|
|
|
|
# just pass in the alphabet from memory to/from the codec. On the other |
190
|
|
|
|
|
|
|
# hand, the Unicode translation table stores Unicode codepoint values packed |
191
|
|
|
|
|
|
|
# into words, and it's not a good fit for use in the codec. Maybe a |
192
|
|
|
|
|
|
|
# ZMachine::Util will be useful for packing/unpacking Unicode translation |
193
|
|
|
|
|
|
|
# tables. |
194
|
|
|
|
|
|
|
$guts->{extra} = $arg->{extra_characters} |
195
|
4
|
|
100
|
|
|
24
|
|| \@DEFAULT_EXTRA; |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
Carp::confess("Unicode translation table exceeds maximum length of 97") |
198
|
4
|
50
|
|
|
|
9
|
if @{ $guts->{extra} } > 97; |
|
4
|
|
|
|
|
11
|
|
199
|
|
|
|
|
|
|
|
200
|
4
|
|
|
|
|
6
|
for (0 .. $#{ $guts->{extra} }) { |
|
4
|
|
|
|
|
12
|
|
201
|
|
|
|
|
|
|
Carp::confess("tried to add ambiguous Z->U mapping") |
202
|
78
|
50
|
|
|
|
121
|
if exists $guts->{zscii}{ chr(155 + $_) }; |
203
|
|
|
|
|
|
|
|
204
|
78
|
|
|
|
|
91
|
my $u_char = $guts->{extra}[$_]; |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
# Extra characters must go into the Unicode substitution table, which can |
207
|
|
|
|
|
|
|
# only represent characters with codepoints between 0 and 0xFFFF. See |
208
|
|
|
|
|
|
|
# Z-Machine Spec v1.1 ยง 3.8.4.2.1 |
209
|
78
|
50
|
|
|
|
109
|
Carp::confess("tried to add Unicode codepoint greater than U+FFFF") |
210
|
|
|
|
|
|
|
if ord($u_char) > 0xFFFF; |
211
|
|
|
|
|
|
|
|
212
|
78
|
|
|
|
|
142
|
$guts->{zscii}{ chr(155 + $_) } = $u_char; |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
|
215
|
4
|
|
|
|
|
12
|
$guts->{zscii_for} = { }; |
216
|
4
|
|
|
|
|
8
|
for my $zscii_char (sort keys %{ $guts->{zscii} }) { |
|
4
|
|
|
|
|
127
|
|
217
|
474
|
|
|
|
|
494
|
my $unicode_char = $guts->{zscii}{$zscii_char}; |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
Carp::confess("tried to add ambiguous U->Z mapping") |
220
|
474
|
50
|
|
|
|
587
|
if exists $guts->{zscii_for}{ $unicode_char }; |
221
|
|
|
|
|
|
|
|
222
|
474
|
|
|
|
|
638
|
$guts->{zscii_for}{ $unicode_char } = $zscii_char; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
4
|
|
|
|
|
20
|
my $self = bless $guts => $class; |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
# The default alphabet is entirely made up of characters that are the same in |
228
|
|
|
|
|
|
|
# Unicode and ZSCII. If a user wants to put "extra characters" into the |
229
|
|
|
|
|
|
|
# alphabet table, though, the alphabet should contain ZSCII values. When |
230
|
|
|
|
|
|
|
# we're building a ZMachine::ZSCII using the contents of the story file's |
231
|
|
|
|
|
|
|
# alphabet table, that's easy. If we're building a codec to *produce* a |
232
|
|
|
|
|
|
|
# story file, it's less trivial, because we don't want to think about the |
233
|
|
|
|
|
|
|
# specific ZSCII codepoints for the Unicode text we'll encode. |
234
|
|
|
|
|
|
|
# |
235
|
|
|
|
|
|
|
# We provide alphabet_is_unicode to let the user say "my alphabet is supplied |
236
|
|
|
|
|
|
|
# in Unicode, please convert it to ZSCII during construction." -- rjbs, |
237
|
|
|
|
|
|
|
# 2013-01-19 |
238
|
4
|
|
66
|
|
|
13
|
my $alphabet = $arg->{alphabet} || $DEFAULT_ALPHABET; |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
# It's okay if the user supplies alphabet_is_unicode but not alphabet, |
241
|
|
|
|
|
|
|
# because the default alphabet is all characters with the same value in both |
242
|
|
|
|
|
|
|
# character sets! -- rjbs, 2013-01-20 |
243
|
|
|
|
|
|
|
$alphabet = $self->unicode_to_zscii($alphabet) |
244
|
4
|
100
|
|
|
|
10
|
if $arg->{alphabet_is_unicode}; |
245
|
|
|
|
|
|
|
|
246
|
4
|
|
|
|
|
10
|
$self->{alphabet} = $alphabet; |
247
|
4
|
|
|
|
|
9
|
$self->{shortcut} = $class->_shortcuts_for( $self->{alphabet} ); |
248
|
|
|
|
|
|
|
|
249
|
4
|
|
|
|
|
11
|
return $self; |
250
|
|
|
|
|
|
|
} |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
#pod =method encode |
253
|
|
|
|
|
|
|
#pod |
254
|
|
|
|
|
|
|
#pod my $packed_zchars = $z->encode( $unicode_text ); |
255
|
|
|
|
|
|
|
#pod |
256
|
|
|
|
|
|
|
#pod This method takes a string of text and encodes it to a bytestring of packed |
257
|
|
|
|
|
|
|
#pod Z-characters. |
258
|
|
|
|
|
|
|
#pod |
259
|
|
|
|
|
|
|
#pod Internally, it converts the Unicode text to ZSCII, then to Z-characters, and |
260
|
|
|
|
|
|
|
#pod then packs them. Before this processing, any native newline characters (the |
261
|
|
|
|
|
|
|
#pod value of C<\n>) are converted to C to match the Z-Machine's use of |
262
|
|
|
|
|
|
|
#pod character 0x00D for newline. |
263
|
|
|
|
|
|
|
#pod |
264
|
|
|
|
|
|
|
#pod =cut |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
sub encode { |
267
|
3
|
|
|
3
|
1
|
1222
|
my ($self, $string) = @_; |
268
|
|
|
|
|
|
|
|
269
|
3
|
|
|
|
|
11
|
$string =~ s/\n/\x0D/g; |
270
|
|
|
|
|
|
|
|
271
|
3
|
|
|
|
|
9
|
my $zscii = $self->unicode_to_zscii($string); |
272
|
3
|
|
|
|
|
7
|
my $zchars = $self->zscii_to_zchars($zscii); |
273
|
|
|
|
|
|
|
|
274
|
3
|
|
|
|
|
10
|
return $self->pack_zchars($zchars); |
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
#pod =method decode |
278
|
|
|
|
|
|
|
#pod |
279
|
|
|
|
|
|
|
#pod my $text = $z->decode( $packed_zchars ); |
280
|
|
|
|
|
|
|
#pod |
281
|
|
|
|
|
|
|
#pod This method takes a bytestring of packed Z-characters and returns a string of |
282
|
|
|
|
|
|
|
#pod text. |
283
|
|
|
|
|
|
|
#pod |
284
|
|
|
|
|
|
|
#pod Internally, it unpacks the Z-characters, converts them to ZSCII, and then |
285
|
|
|
|
|
|
|
#pod converts those to Unicode. Any ZSCII characters 0x00D are converted to the |
286
|
|
|
|
|
|
|
#pod value of C<\n>. |
287
|
|
|
|
|
|
|
#pod |
288
|
|
|
|
|
|
|
#pod =cut |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
sub decode { |
291
|
3
|
|
|
3
|
1
|
2010
|
my ($self, $bytestring) = @_; |
292
|
|
|
|
|
|
|
|
293
|
3
|
|
|
|
|
8
|
my $zchars = $self->unpack_zchars( $bytestring ); |
294
|
3
|
|
|
|
|
7
|
my $zscii = $self->zchars_to_zscii( $zchars ); |
295
|
3
|
|
|
|
|
7
|
my $unicode = $self->zscii_to_unicode( $zscii ); |
296
|
|
|
|
|
|
|
|
297
|
3
|
|
|
|
|
10
|
$unicode =~ s/\x0D/\n/g; |
298
|
|
|
|
|
|
|
|
299
|
3
|
|
|
|
|
9
|
return $unicode; |
300
|
|
|
|
|
|
|
} |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
#pod =method unicode_to_zscii |
303
|
|
|
|
|
|
|
#pod |
304
|
|
|
|
|
|
|
#pod my $zscii_string = $z->unicode_to_zscii( $unicode_string ); |
305
|
|
|
|
|
|
|
#pod |
306
|
|
|
|
|
|
|
#pod This method converts a Unicode string to a ZSCII string, using the dialect of |
307
|
|
|
|
|
|
|
#pod ZSCII for the ZMachine::ZSCII's configuration. |
308
|
|
|
|
|
|
|
#pod |
309
|
|
|
|
|
|
|
#pod If the Unicode input contains any characters that cannot be mapped to ZSCII, an |
310
|
|
|
|
|
|
|
#pod exception is raised. |
311
|
|
|
|
|
|
|
#pod |
312
|
|
|
|
|
|
|
#pod =cut |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
sub unicode_to_zscii { |
315
|
14
|
|
|
14
|
1
|
7310
|
my ($self, $unicode_text) = @_; |
316
|
|
|
|
|
|
|
|
317
|
14
|
|
|
|
|
22
|
my $zscii = ''; |
318
|
14
|
|
|
|
|
34
|
for (0 .. length($unicode_text) - 1) { |
319
|
189
|
|
|
|
|
238
|
my $char = substr $unicode_text, $_, 1; |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
Carp::croak( |
322
|
|
|
|
|
|
|
sprintf "no ZSCII character available for Unicode U+%v05X <%s>", |
323
|
|
|
|
|
|
|
$char, |
324
|
|
|
|
|
|
|
charnames::viacode(ord $char), |
325
|
189
|
100
|
|
|
|
310
|
) unless defined( my $zscii_char = $self->{zscii_for}{ $char } ); |
326
|
|
|
|
|
|
|
|
327
|
188
|
|
|
|
|
228
|
$zscii .= $zscii_char; |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
|
330
|
13
|
|
|
|
|
34
|
return $zscii; |
331
|
|
|
|
|
|
|
} |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
#pod =method zscii_to_unicode |
334
|
|
|
|
|
|
|
#pod |
335
|
|
|
|
|
|
|
#pod my $unicode_string = $z->zscii_to_unicode( $zscii_string ); |
336
|
|
|
|
|
|
|
#pod |
337
|
|
|
|
|
|
|
#pod This method converts a ZSCII string to a Unicode string, using the dialect of |
338
|
|
|
|
|
|
|
#pod ZSCII for the ZMachine::ZSCII's configuration. |
339
|
|
|
|
|
|
|
#pod |
340
|
|
|
|
|
|
|
#pod If the ZSCII input contains any characters that cannot be mapped to Unicode, an |
341
|
|
|
|
|
|
|
#pod exception is raised. I
|
342
|
|
|
|
|
|
|
#pod replacement character instead.> |
343
|
|
|
|
|
|
|
#pod |
344
|
|
|
|
|
|
|
#pod =cut |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
sub zscii_to_unicode { |
347
|
4
|
|
|
4
|
1
|
474
|
my ($self, $zscii) = @_; |
348
|
|
|
|
|
|
|
|
349
|
4
|
|
|
|
|
5
|
my $unicode = ''; |
350
|
4
|
|
|
|
|
10
|
for (0 .. length($zscii) - 1) { |
351
|
49
|
|
|
|
|
52
|
my $char = substr $zscii, $_, 1; |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
Carp::croak( |
354
|
|
|
|
|
|
|
sprintf "no Unicode character available for ZSCII %#v05x", $char, |
355
|
49
|
50
|
|
|
|
83
|
) unless defined(my $unicode_char = $self->{zscii}{ $char }); |
356
|
|
|
|
|
|
|
|
357
|
49
|
|
|
|
|
57
|
$unicode .= $unicode_char; |
358
|
|
|
|
|
|
|
} |
359
|
|
|
|
|
|
|
|
360
|
4
|
|
|
|
|
6
|
return $unicode; |
361
|
|
|
|
|
|
|
} |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
#pod =method zscii_to_zchars |
364
|
|
|
|
|
|
|
#pod |
365
|
|
|
|
|
|
|
#pod my $zchars = $z->zscii_to_zchars( $zscii_string ); |
366
|
|
|
|
|
|
|
#pod |
367
|
|
|
|
|
|
|
#pod Given a string of ZSCII characters, this method will return a (unpacked) string |
368
|
|
|
|
|
|
|
#pod of Z-characters. |
369
|
|
|
|
|
|
|
#pod |
370
|
|
|
|
|
|
|
#pod It will raise an exception on ZSCII codepoints that cannot be represented as |
371
|
|
|
|
|
|
|
#pod Z-characters, which should not be possible with legal ZSCII. |
372
|
|
|
|
|
|
|
#pod |
373
|
|
|
|
|
|
|
#pod =cut |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
sub zscii_to_zchars { |
376
|
10
|
|
|
10
|
1
|
5491
|
my ($self, $zscii) = @_; |
377
|
|
|
|
|
|
|
|
378
|
10
|
50
|
|
|
|
21
|
return '' unless length $zscii; |
379
|
|
|
|
|
|
|
|
380
|
10
|
|
|
|
|
14
|
my $zchars = ''; |
381
|
10
|
|
|
|
|
19
|
for (0 .. length($zscii) - 1) { |
382
|
103
|
|
|
|
|
115
|
my $zscii_char = substr($zscii, $_, 1); |
383
|
103
|
100
|
|
|
|
159
|
if (defined (my $shortcut = $self->{shortcut}{ $zscii_char })) { |
384
|
92
|
|
|
|
|
90
|
$zchars .= $shortcut; |
385
|
92
|
|
|
|
|
110
|
next; |
386
|
|
|
|
|
|
|
} |
387
|
|
|
|
|
|
|
|
388
|
11
|
|
|
|
|
16
|
my $ord = ord $zscii_char; |
389
|
|
|
|
|
|
|
|
390
|
11
|
50
|
|
|
|
18
|
if ($ord >= 1024) { |
391
|
0
|
|
|
|
|
0
|
Carp::croak( |
392
|
|
|
|
|
|
|
sprintf "can't encode ZSCII codepoint %#v05x in Z-characters", |
393
|
|
|
|
|
|
|
$zscii_char |
394
|
|
|
|
|
|
|
); |
395
|
|
|
|
|
|
|
} |
396
|
|
|
|
|
|
|
|
397
|
11
|
|
|
|
|
16
|
my $top = ($ord & 0b1111100000) >> 5; |
398
|
11
|
|
|
|
|
13
|
my $bot = ($ord & 0b0000011111); |
399
|
|
|
|
|
|
|
|
400
|
11
|
|
|
|
|
14
|
$zchars .= "\x05\x06"; # The escape code for a ten-bit ZSCII character. |
401
|
11
|
|
|
|
|
22
|
$zchars .= chr($top) . chr($bot); |
402
|
|
|
|
|
|
|
} |
403
|
|
|
|
|
|
|
|
404
|
10
|
|
|
|
|
21
|
return $zchars; |
405
|
|
|
|
|
|
|
} |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
#pod =method zchars_to_zscii |
408
|
|
|
|
|
|
|
#pod |
409
|
|
|
|
|
|
|
#pod my $zscii = $z->zchars_to_zscii( $zchars_string, \%arg ); |
410
|
|
|
|
|
|
|
#pod |
411
|
|
|
|
|
|
|
#pod Given a string of (unpacked) Z-characters, this method will return a string of |
412
|
|
|
|
|
|
|
#pod ZSCII characters. |
413
|
|
|
|
|
|
|
#pod |
414
|
|
|
|
|
|
|
#pod It will raise an exception when the right thing to do can't be determined. |
415
|
|
|
|
|
|
|
#pod Right now, that could mean lots of things. |
416
|
|
|
|
|
|
|
#pod |
417
|
|
|
|
|
|
|
#pod Valid arguments are: |
418
|
|
|
|
|
|
|
#pod |
419
|
|
|
|
|
|
|
#pod =begin :list |
420
|
|
|
|
|
|
|
#pod |
421
|
|
|
|
|
|
|
#pod = allow_early_termination |
422
|
|
|
|
|
|
|
#pod |
423
|
|
|
|
|
|
|
#pod If C is true, no exception is thrown if the |
424
|
|
|
|
|
|
|
#pod Z-character string ends in the middle of a four z-character sequence. This is |
425
|
|
|
|
|
|
|
#pod useful when dealing with dictionary words. |
426
|
|
|
|
|
|
|
#pod |
427
|
|
|
|
|
|
|
#pod =end :list |
428
|
|
|
|
|
|
|
#pod |
429
|
|
|
|
|
|
|
#pod =cut |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
sub zchars_to_zscii { |
432
|
7
|
|
|
7
|
1
|
2504
|
my ($self, $zchars, $arg) = @_; |
433
|
7
|
|
100
|
|
|
30
|
$arg ||= {}; |
434
|
|
|
|
|
|
|
|
435
|
7
|
|
|
|
|
10
|
my $text = ''; |
436
|
7
|
|
|
|
|
10
|
my $alphabet = 0; |
437
|
|
|
|
|
|
|
|
438
|
7
|
|
|
|
|
21
|
while (length( my $char = substr $zchars, 0, 1, '')) { |
439
|
104
|
|
|
|
|
109
|
my $ord = ord $char; |
440
|
|
|
|
|
|
|
|
441
|
104
|
100
|
|
|
|
143
|
if ($ord == 0) { $text .= q{ }; next; } |
|
3
|
|
|
|
|
3
|
|
|
3
|
|
|
|
|
5
|
|
442
|
|
|
|
|
|
|
|
443
|
101
|
100
|
|
|
|
158
|
if ($ord == 0x04) { $alphabet = 1; next } |
|
7
|
100
|
|
|
|
7
|
|
|
7
|
|
|
|
|
12
|
|
444
|
23
|
|
|
|
|
25
|
elsif ($ord == 0x05) { $alphabet = 2; next } |
|
23
|
|
|
|
|
35
|
|
445
|
|
|
|
|
|
|
|
446
|
71
|
100
|
100
|
|
|
132
|
if ($alphabet == 2 && $ord == 0x06) { |
447
|
12
|
|
|
|
|
20
|
my $next_two = substr $zchars, 0, 2, ''; |
448
|
12
|
100
|
|
|
|
18
|
if (length $next_two != 2) { |
449
|
2
|
100
|
|
|
|
6
|
last if $arg->{allow_early_termination}; |
450
|
1
|
|
|
|
|
124
|
Carp::croak("ten-bit ZSCII encoding segment terminated early") |
451
|
|
|
|
|
|
|
} |
452
|
|
|
|
|
|
|
|
453
|
10
|
|
|
|
|
17
|
my $value = ord(substr $next_two, 0, 1) << 5 |
454
|
|
|
|
|
|
|
| ord(substr $next_two, 1, 1); |
455
|
|
|
|
|
|
|
|
456
|
10
|
|
|
|
|
12
|
$text .= chr $value; |
457
|
10
|
|
|
|
|
11
|
$alphabet = 0; |
458
|
10
|
|
|
|
|
19
|
next; |
459
|
|
|
|
|
|
|
} |
460
|
|
|
|
|
|
|
|
461
|
59
|
50
|
33
|
|
|
125
|
if ($ord >= 0x06 && $ord <= 0x1F) { |
462
|
59
|
|
|
|
|
102
|
$text .= substr $self->{alphabet}, (26 * $alphabet) + $ord - 6, 1; |
463
|
59
|
|
|
|
|
58
|
$alphabet = 0; |
464
|
59
|
|
|
|
|
96
|
next; |
465
|
|
|
|
|
|
|
} |
466
|
|
|
|
|
|
|
|
467
|
0
|
|
|
|
|
0
|
Carp::croak("unknown zchar <$char> encountered in alphabet <$alphabet>"); |
468
|
|
|
|
|
|
|
} |
469
|
|
|
|
|
|
|
|
470
|
6
|
|
|
|
|
16
|
return $text; |
471
|
|
|
|
|
|
|
} |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
#pod =method make_dict_length |
474
|
|
|
|
|
|
|
#pod |
475
|
|
|
|
|
|
|
#pod my $zchars = $z->make_dict_length( $zchars_string ) |
476
|
|
|
|
|
|
|
#pod |
477
|
|
|
|
|
|
|
#pod This method returns the Z-character string fit to dictionary length for the |
478
|
|
|
|
|
|
|
#pod Z-machine version being handled. It will trim excess characters or pad with |
479
|
|
|
|
|
|
|
#pod Z-character 5 to be the right length. |
480
|
|
|
|
|
|
|
#pod |
481
|
|
|
|
|
|
|
#pod When converting such strings back to ZSCII, you should pass the |
482
|
|
|
|
|
|
|
#pod C to C, as a four-Z-character |
483
|
|
|
|
|
|
|
#pod sequence may have been terminated early. |
484
|
|
|
|
|
|
|
#pod |
485
|
|
|
|
|
|
|
#pod =cut |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
sub make_dict_length { |
488
|
3
|
|
|
3
|
1
|
1424
|
my ($self, $zchars) = @_; |
489
|
|
|
|
|
|
|
|
490
|
3
|
50
|
|
|
|
8
|
my $length = $self->{version} >= 5 ? 9 : 6; |
491
|
3
|
|
|
|
|
7
|
$zchars = substr $zchars, 0, $length; |
492
|
3
|
|
|
|
|
7
|
$zchars .= "\x05" x ($length - length($zchars)); |
493
|
|
|
|
|
|
|
|
494
|
3
|
|
|
|
|
6
|
return $zchars; |
495
|
|
|
|
|
|
|
} |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
#pod =method pack_zchars |
498
|
|
|
|
|
|
|
#pod |
499
|
|
|
|
|
|
|
#pod my $packed_zchars = $z->pack_zchars( $zchars_string ); |
500
|
|
|
|
|
|
|
#pod |
501
|
|
|
|
|
|
|
#pod This method takes a string of unpacked Z-characters and packs them into a |
502
|
|
|
|
|
|
|
#pod bytestring with three Z-characters per word. The final word will have its top |
503
|
|
|
|
|
|
|
#pod bit set. |
504
|
|
|
|
|
|
|
#pod |
505
|
|
|
|
|
|
|
#pod =cut |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
sub pack_zchars { |
508
|
4
|
|
|
4
|
1
|
880
|
my ($self, $zchars) = @_; |
509
|
|
|
|
|
|
|
|
510
|
4
|
|
|
|
|
6
|
my $bytestring = ''; |
511
|
|
|
|
|
|
|
|
512
|
4
|
|
|
|
|
13
|
while (my $substr = substr $zchars, 0, 3, '') { |
513
|
31
|
|
|
|
|
48
|
$substr .= chr(5) until length $substr == 3; |
514
|
|
|
|
|
|
|
|
515
|
31
|
|
|
|
|
48
|
my $value = ord(substr($substr, 0, 1)) << 10 |
516
|
|
|
|
|
|
|
| ord(substr($substr, 1, 1)) << 5 |
517
|
|
|
|
|
|
|
| ord(substr($substr, 2, 1)); |
518
|
|
|
|
|
|
|
|
519
|
31
|
100
|
|
|
|
44
|
$value |= (0x8000) if ! length $zchars; |
520
|
|
|
|
|
|
|
|
521
|
31
|
|
|
|
|
69
|
$bytestring .= pack 'n', $value; |
522
|
|
|
|
|
|
|
} |
523
|
|
|
|
|
|
|
|
524
|
4
|
|
|
|
|
10
|
return $bytestring; |
525
|
|
|
|
|
|
|
} |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
#pod =method unpack_zchars |
528
|
|
|
|
|
|
|
#pod |
529
|
|
|
|
|
|
|
#pod my $zchars_string = $z->pack_zchars( $packed_zchars ); |
530
|
|
|
|
|
|
|
#pod |
531
|
|
|
|
|
|
|
#pod Given a bytestring of packed Z-characters, this method will unpack them into a |
532
|
|
|
|
|
|
|
#pod string of unpacked Z-characters that aren't packed anymore because they're |
533
|
|
|
|
|
|
|
#pod unpacked instead of packed. |
534
|
|
|
|
|
|
|
#pod |
535
|
|
|
|
|
|
|
#pod Exceptions are raised if the input bytestring isn't made of an even number of |
536
|
|
|
|
|
|
|
#pod octets, or if the string continues past the first word with its top bit set. |
537
|
|
|
|
|
|
|
#pod |
538
|
|
|
|
|
|
|
#pod =cut |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
sub unpack_zchars { |
541
|
5
|
|
|
5
|
1
|
1009
|
my ($self, $bytestring) = @_; |
542
|
|
|
|
|
|
|
|
543
|
5
|
50
|
|
|
|
16
|
Carp::croak("bytestring of packed zchars is not an even number of bytes") |
544
|
|
|
|
|
|
|
if length($bytestring) % 2; |
545
|
|
|
|
|
|
|
|
546
|
5
|
|
|
|
|
12
|
my $terminate; |
547
|
5
|
|
|
|
|
7
|
my $zchars = ''; |
548
|
5
|
|
|
|
|
15
|
while (my $word = substr $bytestring, 0, 2, '') { |
549
|
|
|
|
|
|
|
# XXX: Probably allow this to warn and `last` -- rjbs, 2013-01-18 |
550
|
37
|
50
|
|
|
|
57
|
Carp::croak("input continues after terminating byte") if $terminate; |
551
|
|
|
|
|
|
|
|
552
|
37
|
|
|
|
|
52
|
my $n = unpack 'n', $word; |
553
|
37
|
|
|
|
|
40
|
$terminate = $n & 0x8000; |
554
|
|
|
|
|
|
|
|
555
|
37
|
|
|
|
|
50
|
my $c1 = chr( ($n & 0b0111110000000000) >> 10 ); |
556
|
37
|
|
|
|
|
40
|
my $c2 = chr( ($n & 0b0000001111100000) >> 5 ); |
557
|
37
|
|
|
|
|
39
|
my $c3 = chr( ($n & 0b0000000000011111) ); |
558
|
|
|
|
|
|
|
|
559
|
37
|
|
|
|
|
70
|
$zchars .= "$c1$c2$c3"; |
560
|
|
|
|
|
|
|
} |
561
|
|
|
|
|
|
|
|
562
|
5
|
|
|
|
|
15
|
return $zchars; |
563
|
|
|
|
|
|
|
} |
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
1; |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
__END__ |