line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package OCR::PerfectCR; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# ABOVE the 'use strict' line! |
4
|
|
|
|
|
|
|
$VERSION = 0.03; |
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
808
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
43
|
|
7
|
1
|
|
|
1
|
|
6
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
38
|
|
8
|
1
|
|
|
1
|
|
969
|
use IO::File; |
|
1
|
|
|
|
|
14078
|
|
|
1
|
|
|
|
|
162
|
|
9
|
1
|
|
|
1
|
|
452
|
use GD; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
use Digest::MD5 'md5_hex'; |
11
|
|
|
|
|
|
|
use Graphics::ColorObject; |
12
|
|
|
|
|
|
|
use Carp 'croak'; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
OCR::PerfectCR - Perfect OCR (if you have perfect input). |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
use OCR::PerfectCR; |
21
|
|
|
|
|
|
|
use GD; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
my $recognizer = OCR::PerfectCR->new; |
24
|
|
|
|
|
|
|
$recognizer->load_charmap_file("charmap"); |
25
|
|
|
|
|
|
|
my $image = GD::Image->new("example.png") or die "Can't open example.png: $!"; |
26
|
|
|
|
|
|
|
my $string = $recognizer->recognize($image); |
27
|
|
|
|
|
|
|
$recognizer->save_charmap_file("charmap"); |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head1 DESCRIPTION |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
OCR::PerfectCR is a fast, highly accurate "optical" character recognition |
33
|
|
|
|
|
|
|
engine requiring minimal training. How does it manage this, despite |
34
|
|
|
|
|
|
|
being written in pure perl? By ignoring most of the problems. |
35
|
|
|
|
|
|
|
OCR::PerfectCR requires that your input is in perfect shape -- that it |
36
|
|
|
|
|
|
|
hasn't gone into the real world and been scanned, that each image |
37
|
|
|
|
|
|
|
represent one line of text, and nothing else, and most difficultly, |
38
|
|
|
|
|
|
|
that the font have a fairly wide spacing. This makes it very useful |
39
|
|
|
|
|
|
|
for converting image-based subtitle formats to text, and probably not |
40
|
|
|
|
|
|
|
much else. However, it is very good at doing that. |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
OCR::PerfectCR's knowledge about a particular font is encapsulated in a |
43
|
|
|
|
|
|
|
"charmap" file, which maps md5 sums of the canonical representation |
44
|
|
|
|
|
|
|
of a character (the first 32 characters of the line) to a string (the |
45
|
|
|
|
|
|
|
34th and onwards chars, to newline). |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
Most methods will die on error, rather then trying to recover and return undef. |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=cut |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=head2 $recognizer->load_charmap_file("charmap") |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
Loads a charmap file into memory. |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=cut |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
sub load_charmap_file { |
58
|
|
|
|
|
|
|
my $self = shift; |
59
|
|
|
|
|
|
|
my $filename = shift; |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# print "load_charmap_file($self, $filename);\n"; |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
my $charmapfile = IO::File->new("<".$filename) or |
64
|
|
|
|
|
|
|
croak "Couldn't open $filename: $!"; |
65
|
|
|
|
|
|
|
binmode($charmapfile, ':utf8'); |
66
|
|
|
|
|
|
|
local $_; |
67
|
|
|
|
|
|
|
while (<$charmapfile>) { |
68
|
|
|
|
|
|
|
chomp; |
69
|
|
|
|
|
|
|
next if !$_ or $_ =~ m/^#/; |
70
|
|
|
|
|
|
|
my ($md5, $value); |
71
|
|
|
|
|
|
|
$md5 = substr($_, 0, 32, ''); |
72
|
|
|
|
|
|
|
substr($_, 0, 1, ''); |
73
|
|
|
|
|
|
|
$value = $_; |
74
|
|
|
|
|
|
|
$self->{charmap}{$md5}=$value; |
75
|
|
|
|
|
|
|
} |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
return; |
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=head2 $recognizer->save_charmap_file("charmap") |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
Saves the charmap to a file. Charmap files are always saved and |
83
|
|
|
|
|
|
|
loaded in utf8. |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=cut |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
sub save_charmap_file { |
88
|
|
|
|
|
|
|
my ($recognizer, $filename) = @_; |
89
|
|
|
|
|
|
|
# print "save_charmap_file($recognizer, $filename);\n"; |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
my $charmapfile = IO::File->new(">$filename") or |
92
|
|
|
|
|
|
|
croak "Couldn't open $filename: $!"; |
93
|
|
|
|
|
|
|
my %images = %{$recognizer->{charmap}}; |
94
|
|
|
|
|
|
|
binmode($charmapfile, ':utf8'); |
95
|
|
|
|
|
|
|
{ |
96
|
|
|
|
|
|
|
no warnings 'uninitialized'; |
97
|
|
|
|
|
|
|
for (sort {$images{$a} cmp $images{$b} or |
98
|
|
|
|
|
|
|
$a cmp $b} |
99
|
|
|
|
|
|
|
keys %images) { |
100
|
|
|
|
|
|
|
my $v = $images{$_}; |
101
|
|
|
|
|
|
|
$charmapfile->print("$_ $v\n"); |
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=head2 $recognizer->recognize($image) (recognise is an alias for this) |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
Takes the image (a GD::Image object), and tries to convert it into |
109
|
|
|
|
|
|
|
text. In list context, returns a list of hashrefs, each having a |
110
|
|
|
|
|
|
|
C key, whose value is the string in the charmap for that image. |
111
|
|
|
|
|
|
|
There may also be a C (note the spelling) key, with a value |
112
|
|
|
|
|
|
|
between 0 and 360, representing the color of the text in degrees on |
113
|
|
|
|
|
|
|
the color wheel, or C meaning grey. The C being missing |
114
|
|
|
|
|
|
|
implies that there is nothing there but background -- that is, that |
115
|
|
|
|
|
|
|
it's whitespace. For non-whitespace characters, there is a key |
116
|
|
|
|
|
|
|
C, which gives the md5 sum of the character in canonical form -- |
117
|
|
|
|
|
|
|
that is, it's charmap entry. Other keys are purposefully not |
118
|
|
|
|
|
|
|
documented -- if you find them useful, I let me know by filing |
119
|
|
|
|
|
|
|
an RT request. |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
Characters not in the charmap will have their str set to C<"\x{FFFD}" |
122
|
|
|
|
|
|
|
eq "\N{REPLACEMENT CHARACTER}">, and will be added to the charmap. |
123
|
|
|
|
|
|
|
They will also be saved as png files named I.png in the current |
124
|
|
|
|
|
|
|
directory, so that they a human can look at them and ID them. |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=cut |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
sub recognize { |
130
|
|
|
|
|
|
|
chopup(@_, \&charimage); |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
# To avoid an "only used once" warning. |
133
|
|
|
|
|
|
|
*recognise = *recognize; |
134
|
|
|
|
|
|
|
*recognise = *recognize; |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=head2 OCR::PerfectCR->new(); |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
Just a boring constructor. No parameters. |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=cut |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
sub new { |
143
|
|
|
|
|
|
|
return bless {}, shift; |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=head1 BUGS |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
Please report bugs on L. If the bug /might possibly/ be because of your input file, please include it with the bug report. |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
=head1 AUTHOR & LICENSE |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
Copyright 2005 James Mastros, james@mastros.biz, JMASTROS, theorbtwo. (Those are all the same person.) |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
May be used and copied under the same terms as C itself. |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
Thanks, castaway, for being you, and diotalevi for a detailed review. |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
=cut |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
### Internal functions below here. |
161
|
|
|
|
|
|
|
sub charimage { |
162
|
|
|
|
|
|
|
my ($recognizer, $image, @bgrgb) = @_; |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
# print "charimage($recognizer, $image)\n"; |
165
|
|
|
|
|
|
|
($image, my $this) = image_to_grey($image, @bgrgb); |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
# printf "Got char image, size %d by %d\n", $image->getBounds; |
168
|
|
|
|
|
|
|
my $md5 = imagesum($image); |
169
|
|
|
|
|
|
|
$this->{md5} = $md5; |
170
|
|
|
|
|
|
|
if (!exists $recognizer->{charmap}{$md5}) { |
171
|
|
|
|
|
|
|
$recognizer->{charmap}{$md5} = "\x{FFFD}"; |
172
|
|
|
|
|
|
|
# print "md5: $md5\n"; |
173
|
|
|
|
|
|
|
# print "First time!\n"; |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
my $file = IO::File->new(">$md5.png") or die "Couldn't create $md5.png: $!"; |
176
|
|
|
|
|
|
|
binmode($file); |
177
|
|
|
|
|
|
|
$file->print($image->png); |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
#print "Known character: $images{$md5}\n"; |
181
|
|
|
|
|
|
|
#print $images{$md5}; |
182
|
|
|
|
|
|
|
$this->{str} = $recognizer->{charmap}{$md5}; |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
return $this; |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
my %rgb255_to_hsv; |
188
|
|
|
|
|
|
|
sub RGB255_to_HSV { |
189
|
|
|
|
|
|
|
my ($r, $g, $b) = @_; |
190
|
|
|
|
|
|
|
my $rgb = $r * 0x10000 + $g*0x100 + $b; |
191
|
|
|
|
|
|
|
if (!exists $rgb255_to_hsv{$rgb}) { |
192
|
|
|
|
|
|
|
$rgb255_to_hsv{$rgb} = Graphics::ColorObject->new_RGB255(\@_, space=>'PAL')->as_HSV; |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
return @{$rgb255_to_hsv{$rgb}}; |
195
|
|
|
|
|
|
|
} |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
my %hsv_to_rgb255; |
198
|
|
|
|
|
|
|
sub HSV_to_RGB255 { |
199
|
|
|
|
|
|
|
my ($h, $s, $v) = @_; |
200
|
|
|
|
|
|
|
my $hsv = "$h,$s,$v"; |
201
|
|
|
|
|
|
|
if (!exists $hsv_to_rgb255{$hsv}) { |
202
|
|
|
|
|
|
|
$hsv_to_rgb255{$hsv} = Graphics::ColorObject->new_HSV(\@_, space=>'PAL')->as_RGB255; |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
return @{$hsv_to_rgb255{$hsv}}; |
205
|
|
|
|
|
|
|
} |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
sub image_to_grey { |
208
|
|
|
|
|
|
|
my ($colorimage, @bgrgb) = @_; |
209
|
|
|
|
|
|
|
my $totalweight = 0; |
210
|
|
|
|
|
|
|
my $totalcolor = 0; |
211
|
|
|
|
|
|
|
my $maxval = 0; |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
my ($width, $height) = $colorimage->getBounds; |
214
|
|
|
|
|
|
|
my $bwimage = GD::Image->new($width, $height); |
215
|
|
|
|
|
|
|
my $black = $bwimage->colorResolve(0, 0, 0); |
216
|
|
|
|
|
|
|
my $white = $bwimage->colorResolve(255, 255, 255); |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
# Squash to greyscale; figure out what the whitest pixel value is. |
219
|
|
|
|
|
|
|
foreach my $x (0..$width) { |
220
|
|
|
|
|
|
|
foreach my $y (0..$height) { |
221
|
|
|
|
|
|
|
my ($r, $g, $b) = $colorimage->rgb($colorimage->getPixel($x, $y)); |
222
|
|
|
|
|
|
|
$r = abs($r - $bgrgb[0]); |
223
|
|
|
|
|
|
|
$g = abs($g - $bgrgb[1]); |
224
|
|
|
|
|
|
|
$b = abs($b - $bgrgb[1]); |
225
|
|
|
|
|
|
|
my ($h, $s, $v) = RGB255_to_HSV($r, $g, $b); |
226
|
|
|
|
|
|
|
$totalweight += $s; |
227
|
|
|
|
|
|
|
$totalcolor += $h * $s; |
228
|
|
|
|
|
|
|
$maxval = $v if $maxval < $v; |
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
# Adjust to put whitest value at 100%; squash to plain black and white. |
233
|
|
|
|
|
|
|
foreach my $x (0..$width) { |
234
|
|
|
|
|
|
|
foreach my $y (0..$height) { |
235
|
|
|
|
|
|
|
my ($r, $g, $b) = $colorimage->rgb($colorimage->getPixel($x, $y)); |
236
|
|
|
|
|
|
|
$r = abs($r - $bgrgb[0]); |
237
|
|
|
|
|
|
|
$g = abs($g - $bgrgb[1]); |
238
|
|
|
|
|
|
|
$b = abs($b - $bgrgb[1]); |
239
|
|
|
|
|
|
|
my ($h, $s, $v) = RGB255_to_HSV($r, $g, $b); |
240
|
|
|
|
|
|
|
if ($v/$maxval > .5) { |
241
|
|
|
|
|
|
|
$bwimage->setPixel($x, $y, $white); |
242
|
|
|
|
|
|
|
} else { |
243
|
|
|
|
|
|
|
$bwimage->setPixel($x, $y, $black); |
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
# print "Total color weight: ", $totalweight, "\n"; |
249
|
|
|
|
|
|
|
# print "Average color: ", $totalcolor/$totalweight, "\n"; |
250
|
|
|
|
|
|
|
my $avgcolor = sprintf("%.0f", $totalcolor/$totalweight); |
251
|
|
|
|
|
|
|
$avgcolor = undef if $totalweight < 1; |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
return $bwimage, {color => $avgcolor, bgrgb=>\@bgrgb}; |
254
|
|
|
|
|
|
|
} |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
sub chopup { |
257
|
|
|
|
|
|
|
my ($recognizer, $inimage, $imagefunc) = @_; |
258
|
|
|
|
|
|
|
# print "chopup($recognizer, $inimage, $imagefunc);\n"; |
259
|
|
|
|
|
|
|
my @string; |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
my $bgcolor = $inimage->getPixel(0,0); |
262
|
|
|
|
|
|
|
my (@bgrgb) = $inimage->rgb($bgcolor); |
263
|
|
|
|
|
|
|
print "Background color at index $bgcolor [@bgrgb]\n"; |
264
|
|
|
|
|
|
|
my ($width, $height) = $inimage->getBounds; |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
my $mincol=0; |
267
|
|
|
|
|
|
|
while ($mincol <= $width) { |
268
|
|
|
|
|
|
|
my ($startcol, $endcol); |
269
|
|
|
|
|
|
|
print "Finding bounds starting at $mincol\n"; |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
# Find left and right char boundry. |
272
|
|
|
|
|
|
|
for my $col ($mincol .. $width-1) { |
273
|
|
|
|
|
|
|
# print "Column $col: "; |
274
|
|
|
|
|
|
|
my $hasnonbg=0; |
275
|
|
|
|
|
|
|
for my $row (0 .. $height-1) { |
276
|
|
|
|
|
|
|
if ($inimage->getPixel($col, $row) != $bgcolor) { |
277
|
|
|
|
|
|
|
$hasnonbg=1; |
278
|
|
|
|
|
|
|
last; |
279
|
|
|
|
|
|
|
} |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
# print "$hasnonbg\n"; |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
if (not defined $startcol) { |
284
|
|
|
|
|
|
|
if ($hasnonbg) { |
285
|
|
|
|
|
|
|
$startcol = $col; |
286
|
|
|
|
|
|
|
} |
287
|
|
|
|
|
|
|
} else { |
288
|
|
|
|
|
|
|
if (!$hasnonbg) { |
289
|
|
|
|
|
|
|
$endcol = $col; |
290
|
|
|
|
|
|
|
last; |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
} |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
if (not defined $endcol) { |
296
|
|
|
|
|
|
|
$endcol = $width-1; |
297
|
|
|
|
|
|
|
} |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
if (not defined $startcol or |
300
|
|
|
|
|
|
|
$startcol >= $endcol) { |
301
|
|
|
|
|
|
|
# print "Couldn't find anything\n"; |
302
|
|
|
|
|
|
|
last; |
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
my ($startrow, $endrow); |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
# Find top boundry |
309
|
|
|
|
|
|
|
for my $row (0..$height-1) { |
310
|
|
|
|
|
|
|
my $hasnonbg=0; |
311
|
|
|
|
|
|
|
for my $col ($startcol..$endcol) { |
312
|
|
|
|
|
|
|
if ($inimage->getPixel($col, $row) != $bgcolor) { |
313
|
|
|
|
|
|
|
$hasnonbg=1; |
314
|
|
|
|
|
|
|
last; |
315
|
|
|
|
|
|
|
} |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
if ($hasnonbg) { |
318
|
|
|
|
|
|
|
$startrow = $row; |
319
|
|
|
|
|
|
|
last; |
320
|
|
|
|
|
|
|
} |
321
|
|
|
|
|
|
|
} |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
# Find bottom boundry. |
324
|
|
|
|
|
|
|
for my $row (reverse(0..$height-1)) { |
325
|
|
|
|
|
|
|
my $hasnonbg=0; |
326
|
|
|
|
|
|
|
for my $col ($startcol..$endcol) { |
327
|
|
|
|
|
|
|
if ($inimage->getPixel($col, $row) != $bgcolor) { |
328
|
|
|
|
|
|
|
$hasnonbg=1; |
329
|
|
|
|
|
|
|
last; |
330
|
|
|
|
|
|
|
} |
331
|
|
|
|
|
|
|
} |
332
|
|
|
|
|
|
|
if ($hasnonbg) { |
333
|
|
|
|
|
|
|
$endrow = $row; |
334
|
|
|
|
|
|
|
last; |
335
|
|
|
|
|
|
|
} |
336
|
|
|
|
|
|
|
} |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
print "Character at ($startcol, $startrow)-($endcol, $endrow)\n"; |
339
|
|
|
|
|
|
|
my $charimage = gdextract($inimage, $startcol, $startrow, $endcol, $endrow); |
340
|
|
|
|
|
|
|
my $this = $imagefunc->($recognizer, $charimage, @bgrgb); |
341
|
|
|
|
|
|
|
$this->{prespace} = $startcol - $mincol; |
342
|
|
|
|
|
|
|
$this->{startcol} = $startcol; |
343
|
|
|
|
|
|
|
# $this->{mincol} = $mincol; |
344
|
|
|
|
|
|
|
$this->{endcol} = $endcol; |
345
|
|
|
|
|
|
|
$this->{width} = $endcol - $startcol; |
346
|
|
|
|
|
|
|
$this->{chrwidth} = ($endcol - $startcol)/length($this->{str}); |
347
|
|
|
|
|
|
|
push @string, $this; |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
$mincol = $endcol; |
350
|
|
|
|
|
|
|
} |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
# print "\n"; |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
# for (1..$#string-1) { |
355
|
|
|
|
|
|
|
# my $prev = $string[$_-1]; |
356
|
|
|
|
|
|
|
# my $this = $string[$_]; |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
# print "Chars: $prev->{str} -- $this->{str}\n"; |
359
|
|
|
|
|
|
|
# print "Charwidths: $prev->{chrwidth} -- $this->{chrwidth}\n"; |
360
|
|
|
|
|
|
|
# print "Prespace: $this->{prespace}\n"; |
361
|
|
|
|
|
|
|
# print ("Metric: ", (($prev->{chrwidth}+$this->{chrwidth})/2)/$this->{prespace}, "\n"); |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
# } |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
# Insert spaces. |
366
|
|
|
|
|
|
|
@string = map { |
367
|
|
|
|
|
|
|
# The "6" here is mostly just a guess. |
368
|
|
|
|
|
|
|
# The ne '.' is just to fix up a common situation in the purticular |
369
|
|
|
|
|
|
|
# source I checked against the most. |
370
|
|
|
|
|
|
|
if ($_->{prespace} > $height/6 |
371
|
|
|
|
|
|
|
and $_->{str} ne '.') { |
372
|
|
|
|
|
|
|
({str=>" ", fake=>1}, $_); |
373
|
|
|
|
|
|
|
} else { |
374
|
|
|
|
|
|
|
$_; |
375
|
|
|
|
|
|
|
} |
376
|
|
|
|
|
|
|
} @string; |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
# print "Finished: ", join('', map { $_->{str} } @string), "\n"; |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
if (wantarray) { |
381
|
|
|
|
|
|
|
return @string; |
382
|
|
|
|
|
|
|
} else { |
383
|
|
|
|
|
|
|
return join "", map { $_->{str} } @string; |
384
|
|
|
|
|
|
|
} |
385
|
|
|
|
|
|
|
} |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
# Just a silly helper |
388
|
|
|
|
|
|
|
sub gdextract { |
389
|
|
|
|
|
|
|
my ($inimage, $x1, $y1, $x2, $y2) = @_; |
390
|
|
|
|
|
|
|
my $width = $x2-$x1 + 1; |
391
|
|
|
|
|
|
|
my $height = $y2-$y1 + 1; |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
my $outimage = GD::Image->new($width, $height); |
394
|
|
|
|
|
|
|
$outimage->copy($inimage, 0, 0, $x1, $y1, $width, $height); |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
return $outimage; |
397
|
|
|
|
|
|
|
} |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
# It appears that GD's ->png method doesn't always return exactly the |
400
|
|
|
|
|
|
|
# same string for the same image -- it depends on the version of GD, |
401
|
|
|
|
|
|
|
# or of libpng, or of libz, or... something. I want charmap files to |
402
|
|
|
|
|
|
|
# be portable, so I need a portable method, so we define our own. It |
403
|
|
|
|
|
|
|
# doesn't have to be small, just portable. |
404
|
|
|
|
|
|
|
# |
405
|
|
|
|
|
|
|
# Note to self: Everything should be packed N -- big-endian (network) u32. |
406
|
|
|
|
|
|
|
sub imagesum { |
407
|
|
|
|
|
|
|
my ($img) = @_; |
408
|
|
|
|
|
|
|
my $str; |
409
|
|
|
|
|
|
|
my ($w, $h) = $img->getBounds; |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
$str = pack('NN', $w, $h); |
412
|
|
|
|
|
|
|
for my $x (0..$w) { |
413
|
|
|
|
|
|
|
for my $y (0..$h) { |
414
|
|
|
|
|
|
|
$str .= pack('NNN', $img->rgb($img->getPixel($x, $y))); |
415
|
|
|
|
|
|
|
} |
416
|
|
|
|
|
|
|
} |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
return md5_hex($str); |
419
|
|
|
|
|
|
|
} |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
1; |