line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package App::optex::textconv::msdoc; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '0.14'; |
4
|
|
|
|
|
|
|
|
5
|
1
|
|
|
1
|
|
12
|
use v5.14; |
|
1
|
|
|
|
|
3
|
|
6
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
25
|
|
7
|
1
|
|
|
1
|
|
5
|
use Carp; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
76
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
## |
10
|
|
|
|
|
|
|
## Import to_text() and get_list() for backward compatibility. |
11
|
|
|
|
|
|
|
## |
12
|
|
|
|
|
|
|
our @EXPORT_OK = qw(to_text get_list); |
13
|
1
|
|
|
1
|
|
556
|
use App::optex::textconv::ooxml::regex qw(to_text get_list); |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
11
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
require App::optex::textconv::ooxml; |
16
|
|
|
|
|
|
|
|
17
|
1
|
|
|
1
|
|
10
|
use App::optex::textconv::Converter 'import'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
7
|
|
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
our @CONVERTER = ( |
20
|
|
|
|
|
|
|
[ qr/\.doc$/ => \&extract_doc ], |
21
|
|
|
|
|
|
|
@App::optex::textconv::ooxml::CONVERTER, |
22
|
|
|
|
|
|
|
); |
23
|
|
|
|
|
|
|
|
24
|
1
|
|
|
1
|
|
551
|
use Text::Extract::Word; |
|
1
|
|
|
|
|
23870
|
|
|
1
|
|
|
|
|
53
|
|
25
|
1
|
|
|
1
|
|
8
|
use Encode; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
171
|
|
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
sub extract_doc { |
28
|
0
|
|
|
0
|
0
|
|
my $file = shift; |
29
|
0
|
0
|
|
|
|
|
my $type = ($file =~ /\.(doc)$/)[0] or return; |
30
|
0
|
|
0
|
|
|
|
my $text = Text::Extract::Word->new($file)->get_text() // die; |
31
|
0
|
0
|
|
|
|
|
$text = encode 'utf8', $text if utf8::is_utf8($text); |
32
|
0
|
|
|
|
|
|
$text; |
33
|
|
|
|
|
|
|
} |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
1; |