line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# $Id: /mirror/perl/File-Extract/trunk/lib/File/Extract/HTML.pm 4210 2007-10-27T13:43:07.499967Z daisuke $ |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# Copyright (c) 2005 Daisuke Maki |
4
|
|
|
|
|
|
|
# All rights reserved. |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
package File::Extract::HTML; |
7
|
2
|
|
|
2
|
|
14
|
use strict; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
85
|
|
8
|
2
|
|
|
2
|
|
14
|
use base qw(File::Extract::Base); |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
500
|
|
9
|
2
|
|
|
2
|
|
3104
|
use HTML::TreeBuilder; |
|
2
|
|
|
|
|
96921
|
|
|
2
|
|
|
|
|
32
|
|
10
|
|
|
|
|
|
|
|
11
|
2
|
|
|
2
|
1
|
10
|
sub mime_type { 'text/html' } |
12
|
|
|
|
|
|
|
sub extract |
13
|
|
|
|
|
|
|
{ |
14
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
15
|
0
|
|
|
|
|
|
my $file = shift; |
16
|
|
|
|
|
|
|
|
17
|
0
|
|
|
|
|
|
my $text; |
18
|
0
|
|
|
|
|
|
my $tree = HTML::TreeBuilder->new; |
19
|
0
|
|
|
|
|
|
$tree->parse_file($file); |
20
|
|
|
|
|
|
|
|
21
|
0
|
|
|
|
|
|
$text = $tree->as_text; |
22
|
0
|
|
|
|
|
|
$tree->delete; |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
my $r = File::Extract::Result->new( |
25
|
0
|
|
0
|
|
|
|
text => eval { $self->recode($text) } || $text, |
26
|
|
|
|
|
|
|
filename => $file, |
27
|
|
|
|
|
|
|
mime_type => $self->mime_type, |
28
|
|
|
|
|
|
|
); |
29
|
0
|
|
|
|
|
|
return $r; |
30
|
|
|
|
|
|
|
} |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
1; |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
__END__ |