| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | # $Id: /mirror/perl/File-Extract/trunk/lib/File/Extract/HTML.pm 4210 2007-10-27T13:43:07.499967Z daisuke  $ | 
| 2 |  |  |  |  |  |  | # | 
| 3 |  |  |  |  |  |  | # Copyright (c) 2005 Daisuke Maki | 
| 4 |  |  |  |  |  |  | # All rights reserved. | 
| 5 |  |  |  |  |  |  |  | 
| 6 |  |  |  |  |  |  | package File::Extract::HTML; | 
| 7 | 2 |  |  | 2 |  | 14 | use strict; | 
|  | 2 |  |  |  |  | 4 |  | 
|  | 2 |  |  |  |  | 85 |  | 
| 8 | 2 |  |  | 2 |  | 14 | use base qw(File::Extract::Base); | 
|  | 2 |  |  |  |  | 4 |  | 
|  | 2 |  |  |  |  | 500 |  | 
| 9 | 2 |  |  | 2 |  | 3104 | use HTML::TreeBuilder; | 
|  | 2 |  |  |  |  | 96921 |  | 
|  | 2 |  |  |  |  | 32 |  | 
| 10 |  |  |  |  |  |  |  | 
| 11 | 2 |  |  | 2 | 1 | 10 | sub mime_type { 'text/html' } | 
| 12 |  |  |  |  |  |  | sub extract | 
| 13 |  |  |  |  |  |  | { | 
| 14 | 0 |  |  | 0 | 1 |  | my $self = shift; | 
| 15 | 0 |  |  |  |  |  | my $file = shift; | 
| 16 |  |  |  |  |  |  |  | 
| 17 | 0 |  |  |  |  |  | my $text; | 
| 18 | 0 |  |  |  |  |  | my $tree = HTML::TreeBuilder->new; | 
| 19 | 0 |  |  |  |  |  | $tree->parse_file($file); | 
| 20 |  |  |  |  |  |  |  | 
| 21 | 0 |  |  |  |  |  | $text = $tree->as_text; | 
| 22 | 0 |  |  |  |  |  | $tree->delete; | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | my $r = File::Extract::Result->new( | 
| 25 | 0 |  | 0 |  |  |  | text      => eval { $self->recode($text) } || $text, | 
| 26 |  |  |  |  |  |  | filename  => $file, | 
| 27 |  |  |  |  |  |  | mime_type => $self->mime_type, | 
| 28 |  |  |  |  |  |  | ); | 
| 29 | 0 |  |  |  |  |  | return $r; | 
| 30 |  |  |  |  |  |  | } | 
| 31 |  |  |  |  |  |  |  | 
| 32 |  |  |  |  |  |  | 1; | 
| 33 |  |  |  |  |  |  |  | 
| 34 |  |  |  |  |  |  | __END__ |