line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package OCR::OcrSpace; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
69861
|
use 5.006; |
|
1
|
|
|
|
|
4
|
|
4
|
1
|
|
|
1
|
|
5
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
21
|
|
5
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
41
|
|
6
|
|
|
|
|
|
|
|
7
|
1
|
|
|
1
|
|
752
|
use LWP::UserAgent; |
|
1
|
|
|
|
|
50834
|
|
|
1
|
|
|
|
|
36
|
|
8
|
1
|
|
|
1
|
|
56
|
use Carp qw( carp confess croak ); |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
64
|
|
9
|
|
|
|
|
|
|
|
10
|
1
|
|
|
1
|
|
6
|
use vars qw($VERSION @EXPORT @ISA $BASE_URL); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
776
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
@ISA = qw(Exporter); |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
@EXPORT = qw( get_result $BASE_URL); |
15
|
|
|
|
|
|
|
############################################################ |
16
|
|
|
|
|
|
|
# DEFAULT base url |
17
|
|
|
|
|
|
|
############################################################ |
18
|
|
|
|
|
|
|
$BASE_URL = 'http://api.ocr.space/parse/image'; |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 NAME |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
Apr-2020 @ |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
OCR::OcrSpace - Perl Interface to access L |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
The free OCR API provides a simple way of parsing images and multi-page PDF documents (PDF OCR) and getting the extracted text results returned in a JSON format. |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
This module implemented the Post request only. |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
Extract text from images , pdf via ocr-space |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
=head1 VERSION |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
Version 0.01 |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
=cut |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
our $VERSION = '0.01'; |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head1 SYNOPSIS |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
#using object oriented interaface |
43
|
|
|
|
|
|
|
use OCR::OcrSpace; |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
my $ocrspace_obj = OCR::OcrSpace->new(); |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
my $param = { |
48
|
|
|
|
|
|
|
file => '/tmp/image.png', #full iamge path |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
or |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
url => 'http://imagedatabase.com/test.jpg' #image url to fetch from |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
or |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
base64Image => '' |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
#following optional parameter |
59
|
|
|
|
|
|
|
ocr_space_url => "https://api.ocr.space/parse/image", |
60
|
|
|
|
|
|
|
apikey => 'XXXXXXXXXXXXXXXXXX', #API Key (mandatory) |
61
|
|
|
|
|
|
|
isOverlayRequired =>'True', #optional |
62
|
|
|
|
|
|
|
language =>'eng' , #optional |
63
|
|
|
|
|
|
|
scale => 'True', #optional |
64
|
|
|
|
|
|
|
isTable => 'True', #optional |
65
|
|
|
|
|
|
|
OCREngine => 2, #optional |
66
|
|
|
|
|
|
|
filetype => 'PNG', #optional |
67
|
|
|
|
|
|
|
detectOrientation => 'False', #optional |
68
|
|
|
|
|
|
|
isCreateSearchablePdf => 'True', #optional |
69
|
|
|
|
|
|
|
isSearchablePdfHideTextLayer => 'True', #optional |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
}; |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
print $ocrspace_obj->get_result( $param ); |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
#using non-object oriented interaface |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
use OCR::OcrSpace; |
79
|
|
|
|
|
|
|
print get_result( $param ); |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
#since ocrSpace uses http as well as HTTPs you can always set the following varible before call |
84
|
|
|
|
|
|
|
$BASE_URL |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head1 EXPORT |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
#method |
89
|
|
|
|
|
|
|
get_result |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
#varible |
92
|
|
|
|
|
|
|
$BASE_URL |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=head1 SUBROUTINES/METHODS |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=head2 new |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
used to create a constructor of OCR::OcrSpace for object oriented mode |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=cut |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
sub new { |
103
|
1
|
|
|
1
|
1
|
606
|
my ( $class, $params ) = ( @_ ); |
104
|
|
|
|
|
|
|
|
105
|
1
|
|
|
|
|
4
|
return ( bless( {}, $class ) ); |
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=head2 get_result |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
params hash ref of following valid keys |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=over 13 |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=item * B { optional but required when using object oriented interface } |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=item * B { optional url if you want to use https mention url } |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
C scalar string ([Optional] Default L) |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=item * B { scalar string } |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
C API Key (send in the header) |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
get your key from here L |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=item * B |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
C |
129
|
|
|
|
|
|
|
You can use three methods to upload the input image or PDF. We recommend the URL method for file sizes > 10 MB for faster upload speeds. |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
url: URL of remote image file (Make sure it has the right content type) |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
file: Multipart encoded image file with filename |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
base64Image: Image as Base64 encoded string |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=item * B |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
C |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
[Optional] |
143
|
|
|
|
|
|
|
Arabic=ara |
144
|
|
|
|
|
|
|
Bulgarian=bul |
145
|
|
|
|
|
|
|
Chinese(Simplified)=chs |
146
|
|
|
|
|
|
|
Chinese(Traditional)=cht |
147
|
|
|
|
|
|
|
Croatian = hrv |
148
|
|
|
|
|
|
|
Czech = cze |
149
|
|
|
|
|
|
|
Danish = dan |
150
|
|
|
|
|
|
|
Dutch = dut |
151
|
|
|
|
|
|
|
English = eng |
152
|
|
|
|
|
|
|
Finnish = fin |
153
|
|
|
|
|
|
|
French = fre |
154
|
|
|
|
|
|
|
German = ger |
155
|
|
|
|
|
|
|
Greek = gre |
156
|
|
|
|
|
|
|
Hungarian = hun |
157
|
|
|
|
|
|
|
Korean = kor |
158
|
|
|
|
|
|
|
Italian = ita |
159
|
|
|
|
|
|
|
Japanese = jpn |
160
|
|
|
|
|
|
|
Polish = pol |
161
|
|
|
|
|
|
|
Portuguese = por |
162
|
|
|
|
|
|
|
Russian = rus |
163
|
|
|
|
|
|
|
Slovenian = slv |
164
|
|
|
|
|
|
|
Spanish = spa |
165
|
|
|
|
|
|
|
Swedish = swe |
166
|
|
|
|
|
|
|
Turkish = tur |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
Language used for OCR. If no language is specified, English eng is taken as default. |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
IMPORTANT: The language code has always 3-letters (not 2). So it is "eng" and not "en". |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
=item * B |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
C scalar string ([Optional] Boolean value) |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
Default = False |
178
|
|
|
|
|
|
|
If true, returns the coordinates of the bounding boxes for each word. If false, the OCR'ed text is returned only as a text block (this makes the JSON reponse smaller). Overlay data can be used, for example, to show text over the image. |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
=item * B |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
C scalar string (Optional] String value: PDF, GIF, PNG, JPG, TIF, BMP) |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
Overwrites the automatic file type detection based on content-type. Supported image file formats are png, jpg (jpeg), gif, tif (tiff) and bmp. For document ocr, the api supports the Adobe PDF format. Multi-page TIFF files are supported. |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=item * B |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
C scalar string ([Optional] true/false) |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
if set to true, the api autorotates the image correctly and sets the TextOrientation parameter in the JSON response. If the image is not rotated, then TextOrientation=0, otherwise it is the degree of the rotation, e. g. "270". |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
=item * B |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
C scalar string ([Optional] Boolean value) |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
Default = False |
201
|
|
|
|
|
|
|
If true, API generates a searchable PDF. This parameter automatically sets isOverlayRequired = true |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
=item * B |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
C scalar string ([Optional] Boolean value) |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
Default = False. If true, the text layer is hidden (not visible) |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
=item * B |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
C scalar string ([Optional] true/false) |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
If set to true, the api does some internal upscaling. This can improve the OCR result significantly, especially for low-resolution PDF scans. Note that the front page demo uses scale=true, but the API uses scale=false by default. See also this OCR forum post. |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
=item * B |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
C scalar string ([Optional] true/false) |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
If set to true, the OCR logic makes sure that the parsed text result is always returned line by line. This switch is recommended for table OCR, receipt OCR, invoice processing and all other type of input documents that have a table like structure. |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
=item * B |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
C scalar int ([Optional] 1 or 2) |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
The default is engine 1. OCR Engine 2 is a new image-processing method. |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
=back |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
=head2 Notes from L |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
Tip: When serving images from an Amazon AWS S3 bucket or a similar service for use with the "URL" parameter, make sure it has the right content type. It should not be "Content-Type:application/x-www-form-urlencoded" (which seems to be the default) but image/png or similar. Alternatively you can include the filetype parameter and tell the API directly what type of document you are sending (PNG, JPG, GIF, PDF). |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
New: If you need to detect the status of checkboxes, please contact us about the Optical Mark Recognition (OMR) (Beta) features. |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
Select the best OCR Engine |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
New: We implemented a second OCR engine with a different processing logic. It is better than the default engine (engine1) in certain cases. So we recommend that you try engine1 first (since it is faster), but if the OCR results are not perfect, please try the same document with engine2. You can use the new OCR engine with our free online OCR service on the front page, and with the API. |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
Features of OCR Engine 1: |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
- Supports more languages (including Asian languages like Chinese, Japanese and Korean) |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
- Faster |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
- Supports larger images |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
- PDF OCR and Searchable PDF creation support |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
- Multi-Page TIFF scan support |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
- Parameter: OCREngine=1 |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
Features of OCR Engine 2: |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
- Western Latin Character languages only (English, German, French,...) |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
- Language auto-detect (so it does not really matter what OCR language you select, as long as it uses Latin characters) |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
- Usually better at single number OCR and alphanumeric OCR (e. g. SUDOKO, Dot Matrix OCR, MRZ OCR,... ) |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
- Usually better at special characters OCR like @+-... |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
- Image size limit 5000px width and 5000px height |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
- Parameter: OCREngine=2 |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
- No PDF OCR and Offline OCR yet. If you need this, please contact us for an internal beta. |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
The returned OCR result JSON response is identical for both engines! So you can easily switch between both engines as needed. If you have any question about using Engine 1 or 2, please ask in our OCR API Forum. |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
=cut |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
sub get_result { |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
#can be simply done by discarding the $self |
284
|
|
|
|
|
|
|
# but keeping it like this to allow future maintaince if any |
285
|
1
|
|
|
1
|
|
321
|
my ( $params, $raw_request, $result ); |
286
|
1
|
50
|
|
|
|
5
|
if ( scalar @_ > 1 ) { |
287
|
0
|
|
|
|
|
0
|
my $self; |
288
|
0
|
|
|
|
|
0
|
( $self, $params ) = ( @_ ); |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
#validate the parameters and get |
291
|
0
|
|
|
|
|
0
|
$params = $self->_validate( $params ); |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
#Generate the request |
294
|
0
|
|
|
|
|
0
|
$raw_request = $self->_generate_request( $params ); |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
#send the request via gateway |
297
|
0
|
|
|
|
|
0
|
$result = $self->_process_request( $raw_request ); |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
} else { |
300
|
1
|
|
|
|
|
2
|
$params = shift; |
301
|
|
|
|
|
|
|
|
302
|
1
|
|
|
|
|
2
|
$params = _validate( $params ); |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
#Generate the request |
305
|
1
|
|
|
|
|
18
|
$raw_request = _generate_request( $params ); |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
#send the request via gateway |
308
|
1
|
|
|
|
|
8
|
$result = _process_request( $raw_request ); |
309
|
|
|
|
|
|
|
} |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
#retun |
312
|
1
|
|
50
|
|
|
539
|
return $result // undef; |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
=head2 Sample Ouput success |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
{"ParsedResults":[{"TextOverlay":{"Lines":[{"LineText":"Current","Words":[{"WordText":"Current","Left":11.666666030883789,"Top":59.166664123535156,"Height":14.999999046325684,"Width":54.999996185302734}],"MaxHeight":14.999999046325684,"MinTop":59.166664123535156},{"LineText":"59","Words":[{"WordText":"59","Left":32.5,"Top":239.99998474121094,"Height":20.833332061767578,"Width":29.166666030883789}],"MaxHeight":20.833332061767578,"MinTop":239.99998474121094}],"HasOverlay":true,"Message":"Total lines: "2"},"TextOrientation":"0","FileParseExitCode":1,"ParsedText":"Current\t\r\n59\t\r\n","ErrorMessage":"","ErrorDetails":""}],"OCRExitCode":1,"IsErroredOnProcessing":false,"ProcessingTimeInMilliseconds":"437","SearchablePDFURL":""} |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
=head2 Sample Ouput error |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
{"OCRExitCode":99,"IsErroredOnProcessing":true,"ErrorMessage":["Parameter name 'attributes' is invalid. Valid parameters: apikey,url,language,isoverlayrequired,base64image,iscreatesearchablepdf,issearchablepdfhidetextlayer,filetype,addressparsing,scale,detectorientation,istable,ocrengine,detectcheckbox,checkboxtemplate,checkboxtemplateregex","Please check if you need to URL encode the URL passed in request parameters."],"ProcessingTimeInMilliseconds":"0"} |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
=cut |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
#################### |
327
|
|
|
|
|
|
|
# internal function |
328
|
|
|
|
|
|
|
################### |
329
|
|
|
|
|
|
|
sub _generate_request { |
330
|
2
|
100
|
|
2
|
|
8
|
my $params = ( scalar( @_ ) > 1 ) ? $_[1] : shift; |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
my $request_hash = { |
333
|
|
|
|
|
|
|
url => $params->{endpoint}, |
334
|
|
|
|
|
|
|
body_param => $params->{body_param}, |
335
|
2
|
|
|
|
|
7
|
}; |
336
|
|
|
|
|
|
|
|
337
|
2
|
50
|
|
|
|
7
|
$request_hash->{file_path} = $params->{file} if ( defined $params->{file} ); |
338
|
|
|
|
|
|
|
|
339
|
2
|
|
|
|
|
4
|
return $request_hash; |
340
|
|
|
|
|
|
|
} |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
#################### |
343
|
|
|
|
|
|
|
# internal function |
344
|
|
|
|
|
|
|
################### |
345
|
|
|
|
|
|
|
sub _validate { |
346
|
2
|
100
|
|
2
|
|
426
|
my $params = ( scalar( @_ ) > 1 ) ? $_[1] : shift; |
347
|
2
|
50
|
|
|
|
8
|
carp "Required parameter `apikey` not passed" unless ( defined $params->{apikey} ); |
348
|
|
|
|
|
|
|
carp "Required parameter `url or file or base64Image` not passed" |
349
|
2
|
50
|
33
|
|
|
14
|
unless ( defined( $params->{url} || $params->{file} || $params->{base64Image} ) ); |
350
|
|
|
|
|
|
|
|
351
|
2
|
|
33
|
|
|
8
|
my $valid_params = { endpoint => $params->{ocr_space_url} // $BASE_URL, }; |
352
|
2
|
100
|
|
|
|
6
|
$valid_params->{url} = $params->{url} if ( defined $params->{url} ); |
353
|
2
|
100
|
|
|
|
6
|
$valid_params->{base64Image} = $params->{base64Image} if ( defined $params->{base64Image} ); |
354
|
2
|
50
|
|
|
|
6
|
if ( defined $params->{file} ) { |
355
|
0
|
0
|
|
|
|
0
|
if ( -f $params->{file} ) { |
356
|
0
|
|
|
|
|
0
|
$valid_params->{file} = $params->{file}; |
357
|
|
|
|
|
|
|
} else { |
358
|
0
|
|
|
|
|
0
|
carp "Unable to open file $params->{file} \n"; |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
} |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
#add optional keys |
363
|
2
|
|
|
|
|
6
|
foreach ( |
364
|
|
|
|
|
|
|
qw/ |
365
|
|
|
|
|
|
|
language isOverlayRequired filetype |
366
|
|
|
|
|
|
|
detectOrientation isCreateSearchablePdf url |
367
|
|
|
|
|
|
|
isSearchablePdfHideTextLayer scale base64Image |
368
|
|
|
|
|
|
|
isTable OCREngine apikey/ |
369
|
|
|
|
|
|
|
) |
370
|
|
|
|
|
|
|
{ |
371
|
24
|
100
|
|
|
|
61
|
$valid_params->{body_param}->{$_} = $params->{$_} if ( defined $params->{$_} ); |
372
|
|
|
|
|
|
|
} |
373
|
2
|
|
|
|
|
6
|
return $valid_params; |
374
|
|
|
|
|
|
|
} |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
#################### |
377
|
|
|
|
|
|
|
# internal function |
378
|
|
|
|
|
|
|
################### |
379
|
|
|
|
|
|
|
sub _process_request { |
380
|
1
|
50
|
|
1
|
|
5
|
my $params = ( scalar( @_ ) > 1 ) ? $_[1] : shift; |
381
|
|
|
|
|
|
|
|
382
|
1
|
|
|
|
|
2
|
my $file = $params->{file_path}; |
383
|
1
|
|
|
|
|
2
|
my $endpoint = $params->{url}; |
384
|
|
|
|
|
|
|
|
385
|
1
|
|
|
|
|
2
|
my ( $res, $body, $header, $content ); |
386
|
|
|
|
|
|
|
|
387
|
1
|
50
|
33
|
|
|
17
|
if ( defined $params->{body_param} && uc( ref( $params->{body_param} ) ) eq 'HASH' ) { |
388
|
1
|
|
|
|
|
2
|
foreach ( keys %{ $params->{body_param} } ) { |
|
1
|
|
|
|
|
7
|
|
389
|
11
|
|
|
|
|
21
|
push( @$content, ( $_ => $params->{body_param}->{$_} ) ); |
390
|
|
|
|
|
|
|
} |
391
|
|
|
|
|
|
|
} |
392
|
|
|
|
|
|
|
|
393
|
1
|
50
|
|
|
|
4
|
if ( $file ) { |
394
|
0
|
|
|
|
|
0
|
push( @$content, ( file => [$file] ) ); |
395
|
|
|
|
|
|
|
} |
396
|
|
|
|
|
|
|
|
397
|
1
|
|
|
|
|
6
|
my $ua = LWP::UserAgent->new(); |
398
|
|
|
|
|
|
|
|
399
|
1
|
|
|
|
|
2963
|
$ua->env_proxy; |
400
|
|
|
|
|
|
|
|
401
|
1
|
50
|
|
|
|
15654
|
if ( defined $params->{header} ) { |
402
|
0
|
|
|
|
|
0
|
$header = $params->{header}; |
403
|
0
|
|
|
|
|
0
|
$ua->default_header( %$header ); |
404
|
|
|
|
|
|
|
} |
405
|
|
|
|
|
|
|
|
406
|
1
|
|
|
|
|
5
|
foreach ( 1 .. 3 ) { |
407
|
1
|
|
|
|
|
6
|
$res = $ua->post( |
408
|
|
|
|
|
|
|
$endpoint, |
409
|
|
|
|
|
|
|
Content_Type => 'multipart/form-data', |
410
|
|
|
|
|
|
|
Content => $content, |
411
|
|
|
|
|
|
|
); |
412
|
|
|
|
|
|
|
|
413
|
1
|
50
|
|
|
|
593165
|
if ( $res->is_success ) { |
414
|
0
|
|
|
|
|
0
|
return $res->content; |
415
|
|
|
|
|
|
|
} else { |
416
|
1
|
|
|
|
|
21
|
return $res->status_line; |
417
|
|
|
|
|
|
|
} |
418
|
|
|
|
|
|
|
} |
419
|
|
|
|
|
|
|
} |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=head1 AUTHOR |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
sushrut pajai, C<< >> |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
=head1 BUGS |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
Please report any bugs or feature requests to C, or through |
428
|
|
|
|
|
|
|
the web interface at L. I will be notified, and then you'll |
429
|
|
|
|
|
|
|
automatically be notified of progress on your bug as I make changes. |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
=head1 SUPPORT |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
perldoc OCR::OcrSpace |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
You can also look for information at: |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
=over 4 |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker (report bugs here) |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
L |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
L |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
=item * CPAN Ratings |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
L |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
=item * Search CPAN |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
L |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=back |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
This software is copyright (c) 2020 by sushrut pajai. |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
470
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
=cut |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
1; # End of OCR::OcrSpace |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
__END__ |