line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Smaz; |
2
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
121083
|
use 5.006; |
|
2
|
|
|
|
|
12
|
|
4
|
2
|
|
|
2
|
|
9
|
use strict; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
58
|
|
5
|
2
|
|
|
2
|
|
10
|
use warnings; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
48
|
|
6
|
2
|
|
|
2
|
|
1084
|
use utf8; |
|
2
|
|
|
|
|
26
|
|
|
2
|
|
|
|
|
12
|
|
7
|
|
|
|
|
|
|
our $VERSION = '1.02'; |
8
|
2
|
|
|
2
|
|
882
|
use open ":std", ":encoding(UTF-8)"; |
|
2
|
|
|
|
|
2160
|
|
|
2
|
|
|
|
|
15
|
|
9
|
2
|
|
|
2
|
|
22583
|
use base 'Import::Export'; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
891
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
our %EX = ( |
12
|
|
|
|
|
|
|
'smaz_compress' => [qw/all/], |
13
|
|
|
|
|
|
|
'smaz_decompress' => [qw/all/], |
14
|
|
|
|
|
|
|
); |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
our (%CODEBOOK, %REVERSE_CODEBOOK); |
17
|
|
|
|
|
|
|
BEGIN { |
18
|
2
|
|
|
2
|
|
32592
|
%CODEBOOK = ( |
19
|
|
|
|
|
|
|
" "=> 0, |
20
|
|
|
|
|
|
|
"the"=> 1, |
21
|
|
|
|
|
|
|
"e"=> 2, |
22
|
|
|
|
|
|
|
"t"=> 3, |
23
|
|
|
|
|
|
|
"a"=> 4, |
24
|
|
|
|
|
|
|
"of"=> 5, |
25
|
|
|
|
|
|
|
"o"=> 6, |
26
|
|
|
|
|
|
|
"and"=> 7, |
27
|
|
|
|
|
|
|
"i"=> 8, |
28
|
|
|
|
|
|
|
"n"=> 9, |
29
|
|
|
|
|
|
|
"s"=> 10, |
30
|
|
|
|
|
|
|
"e "=> 11, |
31
|
|
|
|
|
|
|
"r"=> 12, |
32
|
|
|
|
|
|
|
" th"=> 13, |
33
|
|
|
|
|
|
|
" t"=> 14, |
34
|
|
|
|
|
|
|
"in"=> 15, |
35
|
|
|
|
|
|
|
"he"=> 16, |
36
|
|
|
|
|
|
|
"th"=> 17, |
37
|
|
|
|
|
|
|
"h"=> 18, |
38
|
|
|
|
|
|
|
"he "=> 19, |
39
|
|
|
|
|
|
|
"to"=> 20, |
40
|
|
|
|
|
|
|
"\r\n"=> 21, |
41
|
|
|
|
|
|
|
"l"=> 22, |
42
|
|
|
|
|
|
|
"s "=> 23, |
43
|
|
|
|
|
|
|
"d"=> 24, |
44
|
|
|
|
|
|
|
" a"=> 25, |
45
|
|
|
|
|
|
|
"an"=> 26, |
46
|
|
|
|
|
|
|
"er"=> 27, |
47
|
|
|
|
|
|
|
"c"=> 28, |
48
|
|
|
|
|
|
|
" o"=> 29, |
49
|
|
|
|
|
|
|
"d "=> 30, |
50
|
|
|
|
|
|
|
"on"=> 31, |
51
|
|
|
|
|
|
|
" of"=> 32, |
52
|
|
|
|
|
|
|
"re"=> 33, |
53
|
|
|
|
|
|
|
"of "=> 34, |
54
|
|
|
|
|
|
|
"t "=> 35, |
55
|
|
|
|
|
|
|
", "=> 36, |
56
|
|
|
|
|
|
|
"is"=> 37, |
57
|
|
|
|
|
|
|
"u"=> 38, |
58
|
|
|
|
|
|
|
"at"=> 39, |
59
|
|
|
|
|
|
|
" "=> 40, |
60
|
|
|
|
|
|
|
"n "=> 41, |
61
|
|
|
|
|
|
|
"or"=> 42, |
62
|
|
|
|
|
|
|
"which"=> 43, |
63
|
|
|
|
|
|
|
"f"=> 44, |
64
|
|
|
|
|
|
|
"m"=> 45, |
65
|
|
|
|
|
|
|
"as"=> 46, |
66
|
|
|
|
|
|
|
"it"=> 47, |
67
|
|
|
|
|
|
|
"that"=> 48, |
68
|
|
|
|
|
|
|
"\n"=> 49, |
69
|
|
|
|
|
|
|
"was"=> 50, |
70
|
|
|
|
|
|
|
"en"=> 51, |
71
|
|
|
|
|
|
|
" "=> 52, |
72
|
|
|
|
|
|
|
" w"=> 53, |
73
|
|
|
|
|
|
|
"es"=> 54, |
74
|
|
|
|
|
|
|
" an"=> 55, |
75
|
|
|
|
|
|
|
" i"=> 56, |
76
|
|
|
|
|
|
|
"\r"=> 57, |
77
|
|
|
|
|
|
|
"f "=> 58, |
78
|
|
|
|
|
|
|
"g"=> 59, |
79
|
|
|
|
|
|
|
"p"=> 60, |
80
|
|
|
|
|
|
|
"nd"=> 61, |
81
|
|
|
|
|
|
|
" s"=> 62, |
82
|
|
|
|
|
|
|
"nd "=> 63, |
83
|
|
|
|
|
|
|
"ed "=> 64, |
84
|
|
|
|
|
|
|
"w"=> 65, |
85
|
|
|
|
|
|
|
"ed"=> 66, |
86
|
|
|
|
|
|
|
"http=>//"=> 67, |
87
|
|
|
|
|
|
|
"for"=> 68, |
88
|
|
|
|
|
|
|
"te"=> 69, |
89
|
|
|
|
|
|
|
"ing"=> 70, |
90
|
|
|
|
|
|
|
"y "=> 71, |
91
|
|
|
|
|
|
|
"The"=> 72, |
92
|
|
|
|
|
|
|
" c"=> 73, |
93
|
|
|
|
|
|
|
"ti"=> 74, |
94
|
|
|
|
|
|
|
"r "=> 75, |
95
|
|
|
|
|
|
|
"his"=> 76, |
96
|
|
|
|
|
|
|
"st"=> 77, |
97
|
|
|
|
|
|
|
" in"=> 78, |
98
|
|
|
|
|
|
|
"ar"=> 79, |
99
|
|
|
|
|
|
|
"nt"=> 80, |
100
|
|
|
|
|
|
|
","=> 81, |
101
|
|
|
|
|
|
|
" to"=> 82, |
102
|
|
|
|
|
|
|
"y"=> 83, |
103
|
|
|
|
|
|
|
"ng"=> 84, |
104
|
|
|
|
|
|
|
" h"=> 85, |
105
|
|
|
|
|
|
|
"with"=> 86, |
106
|
|
|
|
|
|
|
"le"=> 87, |
107
|
|
|
|
|
|
|
"al"=> 88, |
108
|
|
|
|
|
|
|
"to "=> 89, |
109
|
|
|
|
|
|
|
"b"=> 90, |
110
|
|
|
|
|
|
|
"ou"=> 91, |
111
|
|
|
|
|
|
|
"be"=> 92, |
112
|
|
|
|
|
|
|
"were"=> 93, |
113
|
|
|
|
|
|
|
" b"=> 94, |
114
|
|
|
|
|
|
|
"se"=> 95, |
115
|
|
|
|
|
|
|
"o "=> 96, |
116
|
|
|
|
|
|
|
"ent"=> 97, |
117
|
|
|
|
|
|
|
"ha"=> 98, |
118
|
|
|
|
|
|
|
"ng "=> 99, |
119
|
|
|
|
|
|
|
"their"=> 100, |
120
|
|
|
|
|
|
|
"\""=> 101, |
121
|
|
|
|
|
|
|
"hi"=> 102, |
122
|
|
|
|
|
|
|
"from"=> 103, |
123
|
|
|
|
|
|
|
" f"=> 104, |
124
|
|
|
|
|
|
|
"in "=> 105, |
125
|
|
|
|
|
|
|
"de"=> 106, |
126
|
|
|
|
|
|
|
"ion"=> 107, |
127
|
|
|
|
|
|
|
"me"=> 108, |
128
|
|
|
|
|
|
|
"v"=> 109, |
129
|
|
|
|
|
|
|
"."=> 110, |
130
|
|
|
|
|
|
|
"ve"=> 111, |
131
|
|
|
|
|
|
|
"all"=> 112, |
132
|
|
|
|
|
|
|
"re "=> 113, |
133
|
|
|
|
|
|
|
"ri"=> 114, |
134
|
|
|
|
|
|
|
"ro"=> 115, |
135
|
|
|
|
|
|
|
"is "=> 116, |
136
|
|
|
|
|
|
|
"co"=> 117, |
137
|
|
|
|
|
|
|
"f t"=> 118, |
138
|
|
|
|
|
|
|
"are"=> 119, |
139
|
|
|
|
|
|
|
"ea"=> 120, |
140
|
|
|
|
|
|
|
". "=> 121, |
141
|
|
|
|
|
|
|
"her"=> 122, |
142
|
|
|
|
|
|
|
" m"=> 123, |
143
|
|
|
|
|
|
|
"er "=> 124, |
144
|
|
|
|
|
|
|
" p"=> 125, |
145
|
|
|
|
|
|
|
"es "=> 126, |
146
|
|
|
|
|
|
|
"by"=> 127, |
147
|
|
|
|
|
|
|
"they"=> 128, |
148
|
|
|
|
|
|
|
"di"=> 129, |
149
|
|
|
|
|
|
|
"ra"=> 130, |
150
|
|
|
|
|
|
|
"ic"=> 131, |
151
|
|
|
|
|
|
|
"not"=> 132, |
152
|
|
|
|
|
|
|
"s,"=> 133, |
153
|
|
|
|
|
|
|
"d t"=> 134, |
154
|
|
|
|
|
|
|
"at "=> 135, |
155
|
|
|
|
|
|
|
"ce"=> 136, |
156
|
|
|
|
|
|
|
"la"=> 137, |
157
|
|
|
|
|
|
|
"h "=> 138, |
158
|
|
|
|
|
|
|
"ne"=> 139, |
159
|
|
|
|
|
|
|
"as "=> 140, |
160
|
|
|
|
|
|
|
"tio"=> 141, |
161
|
|
|
|
|
|
|
"on "=> 142, |
162
|
|
|
|
|
|
|
"n t"=> 143, |
163
|
|
|
|
|
|
|
"io"=> 144, |
164
|
|
|
|
|
|
|
"we"=> 145, |
165
|
|
|
|
|
|
|
" a "=> 146, |
166
|
|
|
|
|
|
|
"om"=> 147, |
167
|
|
|
|
|
|
|
", a"=> 148, |
168
|
|
|
|
|
|
|
"s o"=> 149, |
169
|
|
|
|
|
|
|
"ur"=> 150, |
170
|
|
|
|
|
|
|
"li"=> 151, |
171
|
|
|
|
|
|
|
"ll"=> 152, |
172
|
|
|
|
|
|
|
"ch"=> 153, |
173
|
|
|
|
|
|
|
"had"=> 154, |
174
|
|
|
|
|
|
|
"this"=> 155, |
175
|
|
|
|
|
|
|
"e t"=> 156, |
176
|
|
|
|
|
|
|
"g "=> 157, |
177
|
|
|
|
|
|
|
"e\r\n"=> 158, |
178
|
|
|
|
|
|
|
" wh"=> 159, |
179
|
|
|
|
|
|
|
"ere"=> 160, |
180
|
|
|
|
|
|
|
" co"=> 161, |
181
|
|
|
|
|
|
|
"e o"=> 162, |
182
|
|
|
|
|
|
|
"a "=> 163, |
183
|
|
|
|
|
|
|
"us"=> 164, |
184
|
|
|
|
|
|
|
" d"=> 165, |
185
|
|
|
|
|
|
|
"ss"=> 166, |
186
|
|
|
|
|
|
|
"\n\r\n"=> 167, |
187
|
|
|
|
|
|
|
"\r\n\r"=> 168, |
188
|
|
|
|
|
|
|
"=\""=> 169, |
189
|
|
|
|
|
|
|
" be"=> 170, |
190
|
|
|
|
|
|
|
" e"=> 171, |
191
|
|
|
|
|
|
|
"s a"=> 172, |
192
|
|
|
|
|
|
|
"ma"=> 173, |
193
|
|
|
|
|
|
|
"one"=> 174, |
194
|
|
|
|
|
|
|
"t t"=> 175, |
195
|
|
|
|
|
|
|
"or "=> 176, |
196
|
|
|
|
|
|
|
"but"=> 177, |
197
|
|
|
|
|
|
|
"el"=> 178, |
198
|
|
|
|
|
|
|
"so"=> 179, |
199
|
|
|
|
|
|
|
"l "=> 180, |
200
|
|
|
|
|
|
|
"e s"=> 181, |
201
|
|
|
|
|
|
|
"s,"=> 182, |
202
|
|
|
|
|
|
|
"no"=> 183, |
203
|
|
|
|
|
|
|
"ter"=> 184, |
204
|
|
|
|
|
|
|
" wa"=> 185, |
205
|
|
|
|
|
|
|
"iv"=> 186, |
206
|
|
|
|
|
|
|
"ho"=> 187, |
207
|
|
|
|
|
|
|
"e a"=> 188, |
208
|
|
|
|
|
|
|
" r"=> 189, |
209
|
|
|
|
|
|
|
"hat"=> 190, |
210
|
|
|
|
|
|
|
"s t"=> 191, |
211
|
|
|
|
|
|
|
"ns"=> 192, |
212
|
|
|
|
|
|
|
"ch "=> 193, |
213
|
|
|
|
|
|
|
"wh"=> 194, |
214
|
|
|
|
|
|
|
"tr"=> 195, |
215
|
|
|
|
|
|
|
"ut"=> 196, |
216
|
|
|
|
|
|
|
"/"=> 197, |
217
|
|
|
|
|
|
|
"have"=> 198, |
218
|
|
|
|
|
|
|
"ly "=> 199, |
219
|
|
|
|
|
|
|
"ta"=> 200, |
220
|
|
|
|
|
|
|
" ha"=> 201, |
221
|
|
|
|
|
|
|
" on"=> 202, |
222
|
|
|
|
|
|
|
"tha"=> 203, |
223
|
|
|
|
|
|
|
"-"=> 204, |
224
|
|
|
|
|
|
|
" l"=> 205, |
225
|
|
|
|
|
|
|
"ati"=> 206, |
226
|
|
|
|
|
|
|
"en "=> 207, |
227
|
|
|
|
|
|
|
"pe"=> 208, |
228
|
|
|
|
|
|
|
" re"=> 209, |
229
|
|
|
|
|
|
|
"there"=> 210, |
230
|
|
|
|
|
|
|
"ass"=> 211, |
231
|
|
|
|
|
|
|
"si"=> 212, |
232
|
|
|
|
|
|
|
" fo"=> 213, |
233
|
|
|
|
|
|
|
"wa"=> 214, |
234
|
|
|
|
|
|
|
"ec"=> 215, |
235
|
|
|
|
|
|
|
"our"=> 216, |
236
|
|
|
|
|
|
|
"who"=> 217, |
237
|
|
|
|
|
|
|
"its"=> 218, |
238
|
|
|
|
|
|
|
"z"=> 219, |
239
|
|
|
|
|
|
|
"fo"=> 220, |
240
|
|
|
|
|
|
|
"rs"=> 221, |
241
|
|
|
|
|
|
|
">"=> 222, |
242
|
|
|
|
|
|
|
"ot"=> 223, |
243
|
|
|
|
|
|
|
"un"=> 224, |
244
|
|
|
|
|
|
|
"<"=> 225, |
245
|
|
|
|
|
|
|
"im"=> 226, |
246
|
|
|
|
|
|
|
"th "=> 227, |
247
|
|
|
|
|
|
|
"nc"=> 228, |
248
|
|
|
|
|
|
|
"ate"=> 229, |
249
|
|
|
|
|
|
|
"><"=> 230, |
250
|
|
|
|
|
|
|
"ver"=> 231, |
251
|
|
|
|
|
|
|
"ad"=> 232, |
252
|
|
|
|
|
|
|
" we"=> 233, |
253
|
|
|
|
|
|
|
"ly"=> 234, |
254
|
|
|
|
|
|
|
"ee"=> 235, |
255
|
|
|
|
|
|
|
" n"=> 236, |
256
|
|
|
|
|
|
|
"id"=> 237, |
257
|
|
|
|
|
|
|
" cl"=> 238, |
258
|
|
|
|
|
|
|
"ac"=> 239, |
259
|
|
|
|
|
|
|
"il"=> 240, |
260
|
|
|
|
|
|
|
""=> 241, |
261
|
|
|
|
|
|
|
"rt"=> 242, |
262
|
|
|
|
|
|
|
" wi"=> 243, |
263
|
|
|
|
|
|
|
"div"=> 244, |
264
|
|
|
|
|
|
|
"e, "=> 245, |
265
|
|
|
|
|
|
|
" it"=> 246, |
266
|
|
|
|
|
|
|
"whi"=> 247, |
267
|
|
|
|
|
|
|
" ma"=> 248, |
268
|
|
|
|
|
|
|
"ge"=> 249, |
269
|
|
|
|
|
|
|
"x"=> 250, |
270
|
|
|
|
|
|
|
"e c"=> 251, |
271
|
|
|
|
|
|
|
"men"=> 252, |
272
|
|
|
|
|
|
|
".com"=> 253 |
273
|
|
|
|
|
|
|
); |
274
|
2
|
|
|
|
|
73
|
%REVERSE_CODEBOOK = map { $CODEBOOK{$_} => $_ } keys %CODEBOOK; |
|
506
|
|
|
|
|
2063
|
|
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
sub flush_verbatim { |
278
|
109
|
|
|
109
|
0
|
118
|
my $verbatim = shift; |
279
|
109
|
100
|
|
|
|
571
|
return (((length($verbatim) > 1) |
280
|
|
|
|
|
|
|
? ( chr(255), chr(length($verbatim) - 1) ) |
281
|
|
|
|
|
|
|
: chr(254)), split '', $verbatim); |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
sub smaz_compress { |
285
|
19
|
|
|
19
|
1
|
7845
|
my ($input, $verbatim, $input_index, @output) = (shift, '', 0); |
286
|
19
|
|
|
|
|
41
|
while ($input_index < length $input) { |
287
|
1345
|
100
|
|
|
|
1899
|
my ($encoded, $j, $i) = (0, (length($input) - $input_index) < 7 ? (length($input) - $input_index) : 7); |
288
|
1345
|
50
|
|
|
|
2101
|
for ($j = ($i = $j); $j <= 0 ? $i < 0 : $i > 0; $j = ($j <= 0) ? $i++ : $i--) { |
|
|
50
|
|
|
|
|
|
289
|
9119
|
|
|
|
|
9861
|
my $code = $CODEBOOK{substr($input, $input_index, $j)}; |
290
|
9119
|
100
|
|
|
|
17630
|
if (defined $code) { |
291
|
215
|
100
|
|
|
|
255
|
if ($verbatim) { |
292
|
103
|
|
|
|
|
125
|
push @output, flush_verbatim($verbatim); |
293
|
103
|
|
|
|
|
148
|
$verbatim = ''; |
294
|
|
|
|
|
|
|
} |
295
|
215
|
|
|
|
|
289
|
push @output, chr($code); |
296
|
215
|
|
|
|
|
216
|
$encoded = $input_index += $j; |
297
|
215
|
|
|
|
|
218
|
last; |
298
|
|
|
|
|
|
|
} |
299
|
|
|
|
|
|
|
} |
300
|
1345
|
100
|
|
|
|
1668
|
if (!$encoded) { |
301
|
1130
|
|
|
|
|
1163
|
$verbatim .= substr $input, $input_index++, 1; |
302
|
1130
|
100
|
|
|
|
1846
|
if (length($verbatim) == 256) { |
303
|
2
|
|
|
|
|
6
|
push @output, flush_verbatim($verbatim); |
304
|
2
|
|
|
|
|
18
|
$verbatim = ''; |
305
|
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
} |
307
|
|
|
|
|
|
|
} |
308
|
19
|
100
|
|
|
|
27
|
push @output, flush_verbatim($verbatim) if ($verbatim); |
309
|
19
|
|
|
|
|
161
|
return join('', @output); |
310
|
|
|
|
|
|
|
} |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
sub smaz_decompress { |
313
|
19
|
|
|
19
|
1
|
40
|
my ($str_input, $output, $i, $ii) = (shift, '', 0); |
314
|
19
|
|
|
|
|
155
|
my @input = map { ord($_) } split "", $str_input; |
|
1524
|
|
|
|
|
1505
|
|
315
|
19
|
|
|
|
|
97
|
while ($i < scalar @input) { |
316
|
324
|
100
|
|
|
|
482
|
if ($input[$i] == 254) { |
|
|
100
|
|
|
|
|
|
317
|
39
|
50
|
|
|
|
50
|
die 'Malformed SMAZ' if ($i + 1 > scalar @input); |
318
|
39
|
|
|
|
|
55
|
$output .= substr($str_input, $i + 1, 1); |
319
|
39
|
|
|
|
|
52
|
$i += 2; |
320
|
|
|
|
|
|
|
} elsif ($input[$i] == 255) { |
321
|
70
|
50
|
|
|
|
108
|
die 'Malformed SMAZ' if ($i + $input[$i + 1] + 2 >= scalar @input); |
322
|
70
|
|
|
|
|
74
|
my $ref = $input[$i + 1] + 1; |
323
|
70
|
|
33
|
|
|
98
|
for (my $j = ($ii = 0); $ii < $ref; $j = (0 <= $ref) && ($ii += 1)) { |
324
|
1091
|
|
|
|
|
2311
|
$output .= substr($str_input, $i + 2 + $j, 1); |
325
|
|
|
|
|
|
|
} |
326
|
70
|
|
|
|
|
121
|
$i += 3 + $input[$i + 1]; |
327
|
|
|
|
|
|
|
} else { |
328
|
215
|
|
|
|
|
397
|
$output .= $REVERSE_CODEBOOK{$input[$i++]}; |
329
|
|
|
|
|
|
|
} |
330
|
|
|
|
|
|
|
} |
331
|
19
|
|
|
|
|
89
|
return $output; |
332
|
|
|
|
|
|
|
} |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
__END__ |