| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package OCR::PerfectCR; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# ABOVE the 'use strict' line! |
|
4
|
|
|
|
|
|
|
$VERSION = 0.03; |
|
5
|
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
808
|
use warnings; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
43
|
|
|
7
|
1
|
|
|
1
|
|
6
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
38
|
|
|
8
|
1
|
|
|
1
|
|
969
|
use IO::File; |
|
|
1
|
|
|
|
|
14078
|
|
|
|
1
|
|
|
|
|
162
|
|
|
9
|
1
|
|
|
1
|
|
452
|
use GD; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
use Digest::MD5 'md5_hex'; |
|
11
|
|
|
|
|
|
|
use Graphics::ColorObject; |
|
12
|
|
|
|
|
|
|
use Carp 'croak'; |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
OCR::PerfectCR - Perfect OCR (if you have perfect input). |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
use OCR::PerfectCR; |
|
21
|
|
|
|
|
|
|
use GD; |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
my $recognizer = OCR::PerfectCR->new; |
|
24
|
|
|
|
|
|
|
$recognizer->load_charmap_file("charmap"); |
|
25
|
|
|
|
|
|
|
my $image = GD::Image->new("example.png") or die "Can't open example.png: $!"; |
|
26
|
|
|
|
|
|
|
my $string = $recognizer->recognize($image); |
|
27
|
|
|
|
|
|
|
$recognizer->save_charmap_file("charmap"); |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
OCR::PerfectCR is a fast, highly accurate "optical" character recognition |
|
33
|
|
|
|
|
|
|
engine requiring minimal training. How does it manage this, despite |
|
34
|
|
|
|
|
|
|
being written in pure perl? By ignoring most of the problems. |
|
35
|
|
|
|
|
|
|
OCR::PerfectCR requires that your input is in perfect shape -- that it |
|
36
|
|
|
|
|
|
|
hasn't gone into the real world and been scanned, that each image |
|
37
|
|
|
|
|
|
|
represent one line of text, and nothing else, and most difficultly, |
|
38
|
|
|
|
|
|
|
that the font have a fairly wide spacing. This makes it very useful |
|
39
|
|
|
|
|
|
|
for converting image-based subtitle formats to text, and probably not |
|
40
|
|
|
|
|
|
|
much else. However, it is very good at doing that. |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
OCR::PerfectCR's knowledge about a particular font is encapsulated in a |
|
43
|
|
|
|
|
|
|
"charmap" file, which maps md5 sums of the canonical representation |
|
44
|
|
|
|
|
|
|
of a character (the first 32 characters of the line) to a string (the |
|
45
|
|
|
|
|
|
|
34th and onwards chars, to newline). |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
Most methods will die on error, rather then trying to recover and return undef. |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=cut |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=head2 $recognizer->load_charmap_file("charmap") |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
Loads a charmap file into memory. |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=cut |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
sub load_charmap_file { |
|
58
|
|
|
|
|
|
|
my $self = shift; |
|
59
|
|
|
|
|
|
|
my $filename = shift; |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# print "load_charmap_file($self, $filename);\n"; |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
my $charmapfile = IO::File->new("<".$filename) or |
|
64
|
|
|
|
|
|
|
croak "Couldn't open $filename: $!"; |
|
65
|
|
|
|
|
|
|
binmode($charmapfile, ':utf8'); |
|
66
|
|
|
|
|
|
|
local $_; |
|
67
|
|
|
|
|
|
|
while (<$charmapfile>) { |
|
68
|
|
|
|
|
|
|
chomp; |
|
69
|
|
|
|
|
|
|
next if !$_ or $_ =~ m/^#/; |
|
70
|
|
|
|
|
|
|
my ($md5, $value); |
|
71
|
|
|
|
|
|
|
$md5 = substr($_, 0, 32, ''); |
|
72
|
|
|
|
|
|
|
substr($_, 0, 1, ''); |
|
73
|
|
|
|
|
|
|
$value = $_; |
|
74
|
|
|
|
|
|
|
$self->{charmap}{$md5}=$value; |
|
75
|
|
|
|
|
|
|
} |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
return; |
|
78
|
|
|
|
|
|
|
} |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=head2 $recognizer->save_charmap_file("charmap") |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
Saves the charmap to a file. Charmap files are always saved and |
|
83
|
|
|
|
|
|
|
loaded in utf8. |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=cut |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
sub save_charmap_file { |
|
88
|
|
|
|
|
|
|
my ($recognizer, $filename) = @_; |
|
89
|
|
|
|
|
|
|
# print "save_charmap_file($recognizer, $filename);\n"; |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
my $charmapfile = IO::File->new(">$filename") or |
|
92
|
|
|
|
|
|
|
croak "Couldn't open $filename: $!"; |
|
93
|
|
|
|
|
|
|
my %images = %{$recognizer->{charmap}}; |
|
94
|
|
|
|
|
|
|
binmode($charmapfile, ':utf8'); |
|
95
|
|
|
|
|
|
|
{ |
|
96
|
|
|
|
|
|
|
no warnings 'uninitialized'; |
|
97
|
|
|
|
|
|
|
for (sort {$images{$a} cmp $images{$b} or |
|
98
|
|
|
|
|
|
|
$a cmp $b} |
|
99
|
|
|
|
|
|
|
keys %images) { |
|
100
|
|
|
|
|
|
|
my $v = $images{$_}; |
|
101
|
|
|
|
|
|
|
$charmapfile->print("$_ $v\n"); |
|
102
|
|
|
|
|
|
|
} |
|
103
|
|
|
|
|
|
|
} |
|
104
|
|
|
|
|
|
|
} |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=head2 $recognizer->recognize($image) (recognise is an alias for this) |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
Takes the image (a GD::Image object), and tries to convert it into |
|
109
|
|
|
|
|
|
|
text. In list context, returns a list of hashrefs, each having a |
|
110
|
|
|
|
|
|
|
C key, whose value is the string in the charmap for that image. |
|
111
|
|
|
|
|
|
|
There may also be a C (note the spelling) key, with a value |
|
112
|
|
|
|
|
|
|
between 0 and 360, representing the color of the text in degrees on |
|
113
|
|
|
|
|
|
|
the color wheel, or C meaning grey. The C being missing |
|
114
|
|
|
|
|
|
|
implies that there is nothing there but background -- that is, that |
|
115
|
|
|
|
|
|
|
it's whitespace. For non-whitespace characters, there is a key |
|
116
|
|
|
|
|
|
|
C, which gives the md5 sum of the character in canonical form -- |
|
117
|
|
|
|
|
|
|
that is, it's charmap entry. Other keys are purposefully not |
|
118
|
|
|
|
|
|
|
documented -- if you find them useful, I let me know by filing |
|
119
|
|
|
|
|
|
|
an RT request. |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
Characters not in the charmap will have their str set to C<"\x{FFFD}" |
|
122
|
|
|
|
|
|
|
eq "\N{REPLACEMENT CHARACTER}">, and will be added to the charmap. |
|
123
|
|
|
|
|
|
|
They will also be saved as png files named I.png in the current |
|
124
|
|
|
|
|
|
|
directory, so that they a human can look at them and ID them. |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=cut |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
sub recognize { |
|
130
|
|
|
|
|
|
|
chopup(@_, \&charimage); |
|
131
|
|
|
|
|
|
|
} |
|
132
|
|
|
|
|
|
|
# To avoid an "only used once" warning. |
|
133
|
|
|
|
|
|
|
*recognise = *recognize; |
|
134
|
|
|
|
|
|
|
*recognise = *recognize; |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=head2 OCR::PerfectCR->new(); |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
Just a boring constructor. No parameters. |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=cut |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
sub new { |
|
143
|
|
|
|
|
|
|
return bless {}, shift; |
|
144
|
|
|
|
|
|
|
} |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=head1 BUGS |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
Please report bugs on L. If the bug /might possibly/ be because of your input file, please include it with the bug report. |
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
=head1 AUTHOR & LICENSE |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
Copyright 2005 James Mastros, james@mastros.biz, JMASTROS, theorbtwo. (Those are all the same person.) |
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
May be used and copied under the same terms as C itself. |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
Thanks, castaway, for being you, and diotalevi for a detailed review. |
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
=cut |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
### Internal functions below here. |
|
161
|
|
|
|
|
|
|
sub charimage { |
|
162
|
|
|
|
|
|
|
my ($recognizer, $image, @bgrgb) = @_; |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
# print "charimage($recognizer, $image)\n"; |
|
165
|
|
|
|
|
|
|
($image, my $this) = image_to_grey($image, @bgrgb); |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
# printf "Got char image, size %d by %d\n", $image->getBounds; |
|
168
|
|
|
|
|
|
|
my $md5 = imagesum($image); |
|
169
|
|
|
|
|
|
|
$this->{md5} = $md5; |
|
170
|
|
|
|
|
|
|
if (!exists $recognizer->{charmap}{$md5}) { |
|
171
|
|
|
|
|
|
|
$recognizer->{charmap}{$md5} = "\x{FFFD}"; |
|
172
|
|
|
|
|
|
|
# print "md5: $md5\n"; |
|
173
|
|
|
|
|
|
|
# print "First time!\n"; |
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
my $file = IO::File->new(">$md5.png") or die "Couldn't create $md5.png: $!"; |
|
176
|
|
|
|
|
|
|
binmode($file); |
|
177
|
|
|
|
|
|
|
$file->print($image->png); |
|
178
|
|
|
|
|
|
|
} |
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
#print "Known character: $images{$md5}\n"; |
|
181
|
|
|
|
|
|
|
#print $images{$md5}; |
|
182
|
|
|
|
|
|
|
$this->{str} = $recognizer->{charmap}{$md5}; |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
return $this; |
|
185
|
|
|
|
|
|
|
} |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
my %rgb255_to_hsv; |
|
188
|
|
|
|
|
|
|
sub RGB255_to_HSV { |
|
189
|
|
|
|
|
|
|
my ($r, $g, $b) = @_; |
|
190
|
|
|
|
|
|
|
my $rgb = $r * 0x10000 + $g*0x100 + $b; |
|
191
|
|
|
|
|
|
|
if (!exists $rgb255_to_hsv{$rgb}) { |
|
192
|
|
|
|
|
|
|
$rgb255_to_hsv{$rgb} = Graphics::ColorObject->new_RGB255(\@_, space=>'PAL')->as_HSV; |
|
193
|
|
|
|
|
|
|
} |
|
194
|
|
|
|
|
|
|
return @{$rgb255_to_hsv{$rgb}}; |
|
195
|
|
|
|
|
|
|
} |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
my %hsv_to_rgb255; |
|
198
|
|
|
|
|
|
|
sub HSV_to_RGB255 { |
|
199
|
|
|
|
|
|
|
my ($h, $s, $v) = @_; |
|
200
|
|
|
|
|
|
|
my $hsv = "$h,$s,$v"; |
|
201
|
|
|
|
|
|
|
if (!exists $hsv_to_rgb255{$hsv}) { |
|
202
|
|
|
|
|
|
|
$hsv_to_rgb255{$hsv} = Graphics::ColorObject->new_HSV(\@_, space=>'PAL')->as_RGB255; |
|
203
|
|
|
|
|
|
|
} |
|
204
|
|
|
|
|
|
|
return @{$hsv_to_rgb255{$hsv}}; |
|
205
|
|
|
|
|
|
|
} |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
sub image_to_grey { |
|
208
|
|
|
|
|
|
|
my ($colorimage, @bgrgb) = @_; |
|
209
|
|
|
|
|
|
|
my $totalweight = 0; |
|
210
|
|
|
|
|
|
|
my $totalcolor = 0; |
|
211
|
|
|
|
|
|
|
my $maxval = 0; |
|
212
|
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
my ($width, $height) = $colorimage->getBounds; |
|
214
|
|
|
|
|
|
|
my $bwimage = GD::Image->new($width, $height); |
|
215
|
|
|
|
|
|
|
my $black = $bwimage->colorResolve(0, 0, 0); |
|
216
|
|
|
|
|
|
|
my $white = $bwimage->colorResolve(255, 255, 255); |
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
# Squash to greyscale; figure out what the whitest pixel value is. |
|
219
|
|
|
|
|
|
|
foreach my $x (0..$width) { |
|
220
|
|
|
|
|
|
|
foreach my $y (0..$height) { |
|
221
|
|
|
|
|
|
|
my ($r, $g, $b) = $colorimage->rgb($colorimage->getPixel($x, $y)); |
|
222
|
|
|
|
|
|
|
$r = abs($r - $bgrgb[0]); |
|
223
|
|
|
|
|
|
|
$g = abs($g - $bgrgb[1]); |
|
224
|
|
|
|
|
|
|
$b = abs($b - $bgrgb[1]); |
|
225
|
|
|
|
|
|
|
my ($h, $s, $v) = RGB255_to_HSV($r, $g, $b); |
|
226
|
|
|
|
|
|
|
$totalweight += $s; |
|
227
|
|
|
|
|
|
|
$totalcolor += $h * $s; |
|
228
|
|
|
|
|
|
|
$maxval = $v if $maxval < $v; |
|
229
|
|
|
|
|
|
|
} |
|
230
|
|
|
|
|
|
|
} |
|
231
|
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
# Adjust to put whitest value at 100%; squash to plain black and white. |
|
233
|
|
|
|
|
|
|
foreach my $x (0..$width) { |
|
234
|
|
|
|
|
|
|
foreach my $y (0..$height) { |
|
235
|
|
|
|
|
|
|
my ($r, $g, $b) = $colorimage->rgb($colorimage->getPixel($x, $y)); |
|
236
|
|
|
|
|
|
|
$r = abs($r - $bgrgb[0]); |
|
237
|
|
|
|
|
|
|
$g = abs($g - $bgrgb[1]); |
|
238
|
|
|
|
|
|
|
$b = abs($b - $bgrgb[1]); |
|
239
|
|
|
|
|
|
|
my ($h, $s, $v) = RGB255_to_HSV($r, $g, $b); |
|
240
|
|
|
|
|
|
|
if ($v/$maxval > .5) { |
|
241
|
|
|
|
|
|
|
$bwimage->setPixel($x, $y, $white); |
|
242
|
|
|
|
|
|
|
} else { |
|
243
|
|
|
|
|
|
|
$bwimage->setPixel($x, $y, $black); |
|
244
|
|
|
|
|
|
|
} |
|
245
|
|
|
|
|
|
|
} |
|
246
|
|
|
|
|
|
|
} |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
# print "Total color weight: ", $totalweight, "\n"; |
|
249
|
|
|
|
|
|
|
# print "Average color: ", $totalcolor/$totalweight, "\n"; |
|
250
|
|
|
|
|
|
|
my $avgcolor = sprintf("%.0f", $totalcolor/$totalweight); |
|
251
|
|
|
|
|
|
|
$avgcolor = undef if $totalweight < 1; |
|
252
|
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
return $bwimage, {color => $avgcolor, bgrgb=>\@bgrgb}; |
|
254
|
|
|
|
|
|
|
} |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
sub chopup { |
|
257
|
|
|
|
|
|
|
my ($recognizer, $inimage, $imagefunc) = @_; |
|
258
|
|
|
|
|
|
|
# print "chopup($recognizer, $inimage, $imagefunc);\n"; |
|
259
|
|
|
|
|
|
|
my @string; |
|
260
|
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
my $bgcolor = $inimage->getPixel(0,0); |
|
262
|
|
|
|
|
|
|
my (@bgrgb) = $inimage->rgb($bgcolor); |
|
263
|
|
|
|
|
|
|
print "Background color at index $bgcolor [@bgrgb]\n"; |
|
264
|
|
|
|
|
|
|
my ($width, $height) = $inimage->getBounds; |
|
265
|
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
my $mincol=0; |
|
267
|
|
|
|
|
|
|
while ($mincol <= $width) { |
|
268
|
|
|
|
|
|
|
my ($startcol, $endcol); |
|
269
|
|
|
|
|
|
|
print "Finding bounds starting at $mincol\n"; |
|
270
|
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
# Find left and right char boundry. |
|
272
|
|
|
|
|
|
|
for my $col ($mincol .. $width-1) { |
|
273
|
|
|
|
|
|
|
# print "Column $col: "; |
|
274
|
|
|
|
|
|
|
my $hasnonbg=0; |
|
275
|
|
|
|
|
|
|
for my $row (0 .. $height-1) { |
|
276
|
|
|
|
|
|
|
if ($inimage->getPixel($col, $row) != $bgcolor) { |
|
277
|
|
|
|
|
|
|
$hasnonbg=1; |
|
278
|
|
|
|
|
|
|
last; |
|
279
|
|
|
|
|
|
|
} |
|
280
|
|
|
|
|
|
|
} |
|
281
|
|
|
|
|
|
|
# print "$hasnonbg\n"; |
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
if (not defined $startcol) { |
|
284
|
|
|
|
|
|
|
if ($hasnonbg) { |
|
285
|
|
|
|
|
|
|
$startcol = $col; |
|
286
|
|
|
|
|
|
|
} |
|
287
|
|
|
|
|
|
|
} else { |
|
288
|
|
|
|
|
|
|
if (!$hasnonbg) { |
|
289
|
|
|
|
|
|
|
$endcol = $col; |
|
290
|
|
|
|
|
|
|
last; |
|
291
|
|
|
|
|
|
|
} |
|
292
|
|
|
|
|
|
|
} |
|
293
|
|
|
|
|
|
|
} |
|
294
|
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
if (not defined $endcol) { |
|
296
|
|
|
|
|
|
|
$endcol = $width-1; |
|
297
|
|
|
|
|
|
|
} |
|
298
|
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
if (not defined $startcol or |
|
300
|
|
|
|
|
|
|
$startcol >= $endcol) { |
|
301
|
|
|
|
|
|
|
# print "Couldn't find anything\n"; |
|
302
|
|
|
|
|
|
|
last; |
|
303
|
|
|
|
|
|
|
} |
|
304
|
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
my ($startrow, $endrow); |
|
307
|
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
# Find top boundry |
|
309
|
|
|
|
|
|
|
for my $row (0..$height-1) { |
|
310
|
|
|
|
|
|
|
my $hasnonbg=0; |
|
311
|
|
|
|
|
|
|
for my $col ($startcol..$endcol) { |
|
312
|
|
|
|
|
|
|
if ($inimage->getPixel($col, $row) != $bgcolor) { |
|
313
|
|
|
|
|
|
|
$hasnonbg=1; |
|
314
|
|
|
|
|
|
|
last; |
|
315
|
|
|
|
|
|
|
} |
|
316
|
|
|
|
|
|
|
} |
|
317
|
|
|
|
|
|
|
if ($hasnonbg) { |
|
318
|
|
|
|
|
|
|
$startrow = $row; |
|
319
|
|
|
|
|
|
|
last; |
|
320
|
|
|
|
|
|
|
} |
|
321
|
|
|
|
|
|
|
} |
|
322
|
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
# Find bottom boundry. |
|
324
|
|
|
|
|
|
|
for my $row (reverse(0..$height-1)) { |
|
325
|
|
|
|
|
|
|
my $hasnonbg=0; |
|
326
|
|
|
|
|
|
|
for my $col ($startcol..$endcol) { |
|
327
|
|
|
|
|
|
|
if ($inimage->getPixel($col, $row) != $bgcolor) { |
|
328
|
|
|
|
|
|
|
$hasnonbg=1; |
|
329
|
|
|
|
|
|
|
last; |
|
330
|
|
|
|
|
|
|
} |
|
331
|
|
|
|
|
|
|
} |
|
332
|
|
|
|
|
|
|
if ($hasnonbg) { |
|
333
|
|
|
|
|
|
|
$endrow = $row; |
|
334
|
|
|
|
|
|
|
last; |
|
335
|
|
|
|
|
|
|
} |
|
336
|
|
|
|
|
|
|
} |
|
337
|
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
print "Character at ($startcol, $startrow)-($endcol, $endrow)\n"; |
|
339
|
|
|
|
|
|
|
my $charimage = gdextract($inimage, $startcol, $startrow, $endcol, $endrow); |
|
340
|
|
|
|
|
|
|
my $this = $imagefunc->($recognizer, $charimage, @bgrgb); |
|
341
|
|
|
|
|
|
|
$this->{prespace} = $startcol - $mincol; |
|
342
|
|
|
|
|
|
|
$this->{startcol} = $startcol; |
|
343
|
|
|
|
|
|
|
# $this->{mincol} = $mincol; |
|
344
|
|
|
|
|
|
|
$this->{endcol} = $endcol; |
|
345
|
|
|
|
|
|
|
$this->{width} = $endcol - $startcol; |
|
346
|
|
|
|
|
|
|
$this->{chrwidth} = ($endcol - $startcol)/length($this->{str}); |
|
347
|
|
|
|
|
|
|
push @string, $this; |
|
348
|
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
$mincol = $endcol; |
|
350
|
|
|
|
|
|
|
} |
|
351
|
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
# print "\n"; |
|
353
|
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
# for (1..$#string-1) { |
|
355
|
|
|
|
|
|
|
# my $prev = $string[$_-1]; |
|
356
|
|
|
|
|
|
|
# my $this = $string[$_]; |
|
357
|
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
# print "Chars: $prev->{str} -- $this->{str}\n"; |
|
359
|
|
|
|
|
|
|
# print "Charwidths: $prev->{chrwidth} -- $this->{chrwidth}\n"; |
|
360
|
|
|
|
|
|
|
# print "Prespace: $this->{prespace}\n"; |
|
361
|
|
|
|
|
|
|
# print ("Metric: ", (($prev->{chrwidth}+$this->{chrwidth})/2)/$this->{prespace}, "\n"); |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
# } |
|
364
|
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
# Insert spaces. |
|
366
|
|
|
|
|
|
|
@string = map { |
|
367
|
|
|
|
|
|
|
# The "6" here is mostly just a guess. |
|
368
|
|
|
|
|
|
|
# The ne '.' is just to fix up a common situation in the purticular |
|
369
|
|
|
|
|
|
|
# source I checked against the most. |
|
370
|
|
|
|
|
|
|
if ($_->{prespace} > $height/6 |
|
371
|
|
|
|
|
|
|
and $_->{str} ne '.') { |
|
372
|
|
|
|
|
|
|
({str=>" ", fake=>1}, $_); |
|
373
|
|
|
|
|
|
|
} else { |
|
374
|
|
|
|
|
|
|
$_; |
|
375
|
|
|
|
|
|
|
} |
|
376
|
|
|
|
|
|
|
} @string; |
|
377
|
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
# print "Finished: ", join('', map { $_->{str} } @string), "\n"; |
|
379
|
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
if (wantarray) { |
|
381
|
|
|
|
|
|
|
return @string; |
|
382
|
|
|
|
|
|
|
} else { |
|
383
|
|
|
|
|
|
|
return join "", map { $_->{str} } @string; |
|
384
|
|
|
|
|
|
|
} |
|
385
|
|
|
|
|
|
|
} |
|
386
|
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
# Just a silly helper |
|
388
|
|
|
|
|
|
|
sub gdextract { |
|
389
|
|
|
|
|
|
|
my ($inimage, $x1, $y1, $x2, $y2) = @_; |
|
390
|
|
|
|
|
|
|
my $width = $x2-$x1 + 1; |
|
391
|
|
|
|
|
|
|
my $height = $y2-$y1 + 1; |
|
392
|
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
my $outimage = GD::Image->new($width, $height); |
|
394
|
|
|
|
|
|
|
$outimage->copy($inimage, 0, 0, $x1, $y1, $width, $height); |
|
395
|
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
return $outimage; |
|
397
|
|
|
|
|
|
|
} |
|
398
|
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
# It appears that GD's ->png method doesn't always return exactly the |
|
400
|
|
|
|
|
|
|
# same string for the same image -- it depends on the version of GD, |
|
401
|
|
|
|
|
|
|
# or of libpng, or of libz, or... something. I want charmap files to |
|
402
|
|
|
|
|
|
|
# be portable, so I need a portable method, so we define our own. It |
|
403
|
|
|
|
|
|
|
# doesn't have to be small, just portable. |
|
404
|
|
|
|
|
|
|
# |
|
405
|
|
|
|
|
|
|
# Note to self: Everything should be packed N -- big-endian (network) u32. |
|
406
|
|
|
|
|
|
|
sub imagesum { |
|
407
|
|
|
|
|
|
|
my ($img) = @_; |
|
408
|
|
|
|
|
|
|
my $str; |
|
409
|
|
|
|
|
|
|
my ($w, $h) = $img->getBounds; |
|
410
|
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
$str = pack('NN', $w, $h); |
|
412
|
|
|
|
|
|
|
for my $x (0..$w) { |
|
413
|
|
|
|
|
|
|
for my $y (0..$h) { |
|
414
|
|
|
|
|
|
|
$str .= pack('NNN', $img->rgb($img->getPixel($x, $y))); |
|
415
|
|
|
|
|
|
|
} |
|
416
|
|
|
|
|
|
|
} |
|
417
|
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
return md5_hex($str); |
|
419
|
|
|
|
|
|
|
} |
|
420
|
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
1; |