line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HTML::SiteTear::PageFilter; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
7
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
39
|
|
4
|
1
|
|
|
1
|
|
7
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
32
|
|
5
|
1
|
|
|
1
|
|
6
|
use File::Basename; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
81
|
|
6
|
1
|
|
|
1
|
|
1019
|
use Encode; |
|
1
|
|
|
|
|
14154
|
|
|
1
|
|
|
|
|
102
|
|
7
|
1
|
|
|
1
|
|
801
|
use Encode::Guess; |
|
1
|
|
|
|
|
5278
|
|
|
1
|
|
|
|
|
572
|
|
8
|
1
|
|
|
1
|
|
81
|
use URI; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
25
|
|
9
|
|
|
|
|
|
|
#use Data::dumper; |
10
|
|
|
|
|
|
|
|
11
|
1
|
|
|
1
|
|
1033
|
use HTML::Parser 3.40; |
|
1
|
|
|
|
|
12469
|
|
|
1
|
|
|
|
|
59
|
|
12
|
1
|
|
|
1
|
|
1258
|
use HTML::HeadParser; |
|
1
|
|
|
|
|
1276
|
|
|
1
|
|
|
|
|
31
|
|
13
|
1
|
|
|
1
|
|
8
|
use base qw(HTML::Parser Class::Accessor); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
153
|
|
14
|
|
|
|
|
|
|
__PACKAGE__->mk_accessors(qw(has_remote_base |
15
|
|
|
|
|
|
|
page)); |
16
|
|
|
|
|
|
|
|
17
|
1
|
|
|
1
|
|
1067
|
use HTML::Copy; |
|
1
|
|
|
|
|
6100
|
|
|
1
|
|
|
|
|
11
|
|
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
our $VERSION = '1.43'; |
20
|
|
|
|
|
|
|
our @htmlSuffix = qw(.html .htm .xhtml); |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 NAME |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
HTML::SiteTear::PageFilter - change link pathes in HTML files. |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=head1 SYMPOSIS |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
use HTML::SiteTear::PageFilter; |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
# $page must be an instance of L. |
31
|
|
|
|
|
|
|
$filter = HTML::SiteTear::PageFilter->new($page); |
32
|
|
|
|
|
|
|
$fileter->parse_file(); |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=head1 DESCRIPTION |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
This module is to change link pathes in HTML files. It's a sub class of L. Internal use only. |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=head1 METHODS |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head2 new |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
$filter = HTML::SiteTear::PageFilter->new($page); |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
Make an instance of this moduel. $parent must be an instance of HTML::SiteTear::Root or HTML::SiteTear::Page. This method is called from $parent. |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=cut |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
sub new { |
49
|
0
|
|
|
0
|
1
|
|
my ($class, $page) = @_; |
50
|
0
|
|
|
|
|
|
my $parent = $class->SUPER::new(); |
51
|
0
|
|
|
|
|
|
my $self = bless $parent, $class; |
52
|
0
|
|
|
|
|
|
$self->page($page); |
53
|
0
|
|
|
|
|
|
$self->{'allow_abs_link'} = $page->source_root->allow_abs_link; |
54
|
0
|
|
|
|
|
|
$self->{'use_abs_link'} = 0; |
55
|
0
|
|
|
|
|
|
$self->has_remote_base(0); |
56
|
0
|
|
|
|
|
|
return $self; |
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=head2 parse_file |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
$filter->parse_file; |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
Parse the HTML file given by $page and change link pathes. The output data are retuned thru the method "write_data". |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=cut |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
sub parse_file { |
68
|
0
|
|
|
0
|
1
|
|
my ($self) = @_; |
69
|
0
|
|
|
|
|
|
my $p = HTML::Copy->new($self->page->source_path); |
70
|
0
|
|
|
|
|
|
$self->page->set_binmode($p->io_layer); |
71
|
0
|
|
|
|
|
|
$self->SUPER::parse($p->source_html); |
72
|
|
|
|
|
|
|
} |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=head1 SEE ALOSO |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
L, L, L, L |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=head1 AUTHOR |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
Tetsuro KURITA |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=cut |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
##== private methods |
85
|
|
|
|
|
|
|
sub output { |
86
|
0
|
|
|
0
|
0
|
|
my ($self, $data) = @_; |
87
|
0
|
|
|
|
|
|
$self->page->write_data($data); |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
##== overriding methods of HTML::Parser |
91
|
|
|
|
|
|
|
|
92
|
0
|
|
|
0
|
1
|
|
sub declaration { $_[0]->output("") } |
93
|
0
|
|
|
0
|
1
|
|
sub process { $_[0]->output($_[2]) } |
94
|
0
|
|
|
0
|
1
|
|
sub end { $_[0]->output($_[2]) } |
95
|
0
|
|
|
0
|
1
|
|
sub text { $_[0]->output($_[1]) } |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
sub comment { |
98
|
0
|
|
|
0
|
1
|
|
my ($self, $comment) = @_; |
99
|
|
|
|
|
|
|
|
100
|
0
|
0
|
|
|
|
|
if ($self->{'allow_abs_link'}) { |
101
|
0
|
0
|
|
|
|
|
if ($comment =~ /^\s*begin abs_link/) { |
|
|
0
|
|
|
|
|
|
102
|
0
|
|
|
|
|
|
$self->{'use_abs_link'} = 1; |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
} elsif($comment =~ /^\s*end abs_link/) { |
105
|
0
|
|
|
|
|
|
$self->{'use_abs_link'} = 0; |
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
|
109
|
0
|
|
|
|
|
|
$self->output(""); |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
sub start { |
113
|
0
|
|
|
0
|
1
|
|
my ($self, $tag, $attr_dict, $attr_names, $tag_text) = @_; |
114
|
0
|
|
|
|
|
|
my $page = $self->page; |
115
|
0
|
0
|
|
|
|
|
my $empty_tag_end = ($tag =~ /\/>$/) ? ' />' : '>'; |
116
|
|
|
|
|
|
|
|
117
|
0
|
0
|
|
|
|
|
if ($self->has_remote_base) { |
118
|
0
|
|
|
|
|
|
return $self->output($tag_text); |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
my $process_link = sub { |
122
|
0
|
|
|
0
|
|
|
my ($target_attr, $folder_name, $kind) = @_; |
123
|
0
|
0
|
|
|
|
|
if (my $link = $attr_dict->{$target_attr}) { |
124
|
0
|
0
|
|
|
|
|
if ($self->{'use_abs_link'}) { |
125
|
0
|
|
|
|
|
|
$attr_dict->{$target_attr} = $page->build_abs_url($link); |
126
|
|
|
|
|
|
|
} else { |
127
|
0
|
0
|
|
|
|
|
unless ($kind) {$kind = $folder_name}; |
|
0
|
|
|
|
|
|
|
128
|
0
|
|
|
|
|
|
$attr_dict->{$target_attr} |
129
|
|
|
|
|
|
|
= $page->change_path($link, $folder_name, $kind); |
130
|
|
|
|
|
|
|
} |
131
|
0
|
|
|
|
|
|
return HTML::Copy->build_attributes($attr_dict, $attr_names); |
132
|
|
|
|
|
|
|
} |
133
|
0
|
|
|
|
|
|
return (); |
134
|
0
|
|
|
|
|
|
}; |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
#treat image files |
137
|
0
|
0
|
|
|
|
|
if ($tag eq 'base') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
138
|
0
|
|
|
|
|
|
my $uri = URI->new($attr_dict->{'href'}); |
139
|
0
|
0
|
0
|
|
|
|
if (!($uri->scheme) or ($uri->scheme eq 'file')) { |
140
|
0
|
|
|
|
|
|
$page->base_uri($uri->abs($page->base_uri)); |
141
|
0
|
|
|
|
|
|
$tag_text = ''; |
142
|
|
|
|
|
|
|
} else { |
143
|
0
|
|
|
|
|
|
$self->has_remote_base(1); |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
} elsif ($tag eq 'img') { |
147
|
0
|
0
|
|
|
|
|
if (my $tag_attrs = &$process_link('src', $page->resource_folder_name)) { |
148
|
0
|
|
|
|
|
|
$tag_text = "<$tag $tag_attrs".$empty_tag_end; |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
} elsif ($tag eq 'body') { #background images |
152
|
0
|
0
|
|
|
|
|
if (my $tag_attrs = &$process_link('background', $page->resource_folder_name)) { |
153
|
0
|
|
|
|
|
|
$tag_text = "<$tag $tag_attrs>"; |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
#linked stylesheet |
157
|
|
|
|
|
|
|
elsif ($tag eq 'link') { |
158
|
0
|
|
|
|
|
|
my $folder_name = $page->resource_folder_name; |
159
|
0
|
|
|
|
|
|
my $kind = $folder_name; |
160
|
0
|
|
|
|
|
|
my $relation; |
161
|
0
|
0
|
|
|
|
|
if (defined( $relation = ($attr_dict ->{'rel'}) )){ |
162
|
0
|
|
|
|
|
|
$relation = lc $relation; |
163
|
0
|
0
|
|
|
|
|
if ($relation eq 'stylesheet') { |
164
|
0
|
|
|
|
|
|
$kind = 'css'; |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
|
168
|
0
|
0
|
|
|
|
|
if (my $tag_attrs = &$process_link('href', $folder_name, $kind)) { |
169
|
0
|
|
|
|
|
|
$tag_text = "<$tag $tag_attrs".$empty_tag_end; |
170
|
|
|
|
|
|
|
} |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
#frame |
173
|
|
|
|
|
|
|
elsif ($tag eq 'frame') { |
174
|
0
|
0
|
|
|
|
|
if (my $tag_attrs = &$process_link('src', $page->page_folder_name, 'page')) { |
175
|
0
|
|
|
|
|
|
$tag_text = "<$tag $tag_attrs".$empty_tag_end; |
176
|
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
#javascript |
179
|
|
|
|
|
|
|
elsif ($tag eq 'script') { |
180
|
0
|
0
|
|
|
|
|
if (my $tag_attrs = &$process_link('src', $page->resource_folder_name)) { |
181
|
0
|
|
|
|
|
|
$tag_text = "<$tag $tag_attrs>"; |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
} |
184
|
|
|
|
|
|
|
#link |
185
|
|
|
|
|
|
|
elsif ($tag eq 'a') { |
186
|
0
|
0
|
|
|
|
|
if ( exists($attr_dict->{'href'}) ) { |
187
|
0
|
|
|
|
|
|
my $href = $attr_dict->{'href'}; |
188
|
0
|
|
|
|
|
|
my $kind = 'page'; |
189
|
0
|
|
|
|
|
|
my $folder_name = $page->page_folder_name; |
190
|
0
|
0
|
|
|
|
|
if ($href !~/(.+)#(.*)/) { |
191
|
0
|
|
|
|
|
|
my @matchedSuffix = grep {$href =~ /\Q$_\E$/} @htmlSuffix; |
|
0
|
|
|
|
|
|
|
192
|
0
|
0
|
|
|
|
|
unless (@matchedSuffix) { |
193
|
0
|
|
|
|
|
|
$folder_name = $page->resource_folder_name; |
194
|
0
|
|
|
|
|
|
$kind = $folder_name; |
195
|
|
|
|
|
|
|
} |
196
|
|
|
|
|
|
|
} |
197
|
0
|
0
|
|
|
|
|
if (my $tag_attrs = &$process_link('href', $folder_name, $kind)) { |
198
|
0
|
|
|
|
|
|
$tag_text = "<$tag $tag_attrs>"; |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
|
203
|
0
|
|
|
|
|
|
$self->output($tag_text); |
204
|
|
|
|
|
|
|
} |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
1; |