line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# $Id: /mirror/perl/File-Extract/trunk/lib/File/Extract/PDF.pm 4210 2007-10-27T13:43:07.499967Z daisuke $ |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# Copyright (c) 2005 Daisuke Maki |
4
|
|
|
|
|
|
|
# All rights reserved. |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
package File::Extract::PDF; |
7
|
2
|
|
|
2
|
|
13
|
use strict; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
99
|
|
8
|
2
|
|
|
2
|
|
13
|
use base qw(File::Extract::Base); |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
200
|
|
9
|
2
|
|
|
2
|
|
3830
|
use CAM::PDF; |
|
2
|
|
|
|
|
118249
|
|
|
2
|
|
|
|
|
89
|
|
10
|
2
|
|
|
2
|
|
30
|
use File::Extract::Result; |
|
2
|
|
|
|
|
25
|
|
|
2
|
|
|
|
|
312
|
|
11
|
|
|
|
|
|
|
|
12
|
2
|
|
|
2
|
1
|
9
|
sub mime_type { 'application/pdf' } |
13
|
|
|
|
|
|
|
sub extract |
14
|
|
|
|
|
|
|
{ |
15
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
16
|
0
|
|
|
|
|
|
my $file = shift; |
17
|
|
|
|
|
|
|
|
18
|
0
|
|
|
|
|
|
my $doc = CAM::PDF->new($file); |
19
|
0
|
|
|
|
|
|
my $text = ''; |
20
|
|
|
|
|
|
|
|
21
|
0
|
|
|
|
|
|
foreach my $p (1..$doc->numPages()) { |
22
|
0
|
|
|
|
|
|
$text .= $doc->getPageText($p); |
23
|
|
|
|
|
|
|
} |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
return File::Extract::Result->new( |
26
|
0
|
|
0
|
|
|
|
text => eval { $self->recode($text) } || $text, |
27
|
|
|
|
|
|
|
filename => $file, |
28
|
|
|
|
|
|
|
mime_type => $self->mime_type |
29
|
|
|
|
|
|
|
); |
30
|
|
|
|
|
|
|
} |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
1; |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
__END__ |