| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package HTML::Encapsulate; |
|
2
|
2
|
|
|
2
|
|
18629
|
use warnings; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
60
|
|
|
3
|
2
|
|
|
2
|
|
10
|
use strict; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
44
|
|
|
4
|
2
|
|
|
2
|
|
9
|
use Carp; |
|
|
2
|
|
|
|
|
12
|
|
|
|
2
|
|
|
|
|
123
|
|
|
5
|
2
|
|
|
2
|
|
10
|
use PerlIO; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
14
|
|
|
6
|
2
|
|
|
2
|
|
52
|
use File::Path qw(mkpath); |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
100
|
|
|
7
|
2
|
|
|
2
|
|
9
|
use File::Spec; |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
41
|
|
|
8
|
2
|
|
|
2
|
|
10
|
use File::Spec::Unix; |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
58
|
|
|
9
|
2
|
|
|
2
|
|
8
|
use Carp qw(croak carp cluck confess); |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
97
|
|
|
10
|
2
|
|
|
2
|
|
9
|
use Exporter; |
|
|
2
|
|
|
|
|
2
|
|
|
|
2
|
|
|
|
|
58
|
|
|
11
|
2
|
|
|
2
|
|
949
|
use LWP::UserAgent; |
|
|
2
|
|
|
|
|
47361
|
|
|
|
2
|
|
|
|
|
67
|
|
|
12
|
2
|
|
|
2
|
|
1668
|
use HTML::TreeBuilder::XPath; |
|
|
2
|
|
|
|
|
145033
|
|
|
|
2
|
|
|
|
|
22
|
|
|
13
|
2
|
|
|
2
|
|
89
|
use Scalar::Util qw(blessed); |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
120
|
|
|
14
|
2
|
|
|
2
|
|
12
|
use URI; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
58
|
|
|
15
|
2
|
|
|
2
|
|
9
|
use HTML::Entities qw(decode_entities encode_entities); |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
182
|
|
|
16
|
2
|
|
|
2
|
|
861
|
use HTML::Tidy; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
use HTTP::Response::Encoding; |
|
18
|
|
|
|
|
|
|
use HTML::HeadParser; |
|
19
|
|
|
|
|
|
|
use HTTP::Headers::Util; |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
use version; our $VERSION = qv('0.3'); |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
our @EXPORT_OK = qw(download); |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# We don't want to inherit Exporter, we can't always import the import |
|
27
|
|
|
|
|
|
|
# method, so this is a workaround. |
|
28
|
|
|
|
|
|
|
sub import { goto &Exporter::import } |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
our %TIDY_OPTIONS = (lower_literals => 1, |
|
32
|
|
|
|
|
|
|
show_errors => 0, |
|
33
|
|
|
|
|
|
|
show_warnings => 0, |
|
34
|
|
|
|
|
|
|
tidy_mark => 0); |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
# url(blah) or url( 'blah' ) etc. |
|
38
|
|
|
|
|
|
|
my $QUOTED_STR = qr/ " ([^"]*) " | ' ([^']*) ' /x; |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
my $URL_RE = qr/ url \s* \( |
|
41
|
|
|
|
|
|
|
\s* (?: $QUOTED_STR | (.*?) ) \s* |
|
42
|
|
|
|
|
|
|
\) |
|
43
|
|
|
|
|
|
|
/ix; |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
my $IMPORT_RE = qr/ |
|
46
|
|
|
|
|
|
|
\@import (?: |
|
47
|
|
|
|
|
|
|
\s+ $URL_RE | # @import url(blah) with optional quotes |
|
48
|
|
|
|
|
|
|
\s* $QUOTED_STR | # @import "blah" or @import 'blah' |
|
49
|
|
|
|
|
|
|
\s+ (\S+) # @import blah |
|
50
|
|
|
|
|
|
|
) |
|
51
|
|
|
|
|
|
|
/xi; |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
sub _inner_html |
|
54
|
|
|
|
|
|
|
{ |
|
55
|
|
|
|
|
|
|
my $node = shift; |
|
56
|
|
|
|
|
|
|
join "", map { ref $_? $_->as_HTML : $_ } $node->content_list; |
|
57
|
|
|
|
|
|
|
} |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
sub _slurp |
|
60
|
|
|
|
|
|
|
{ |
|
61
|
|
|
|
|
|
|
my $path = shift; |
|
62
|
|
|
|
|
|
|
my $encoding = defined $_[0]? |
|
63
|
|
|
|
|
|
|
"encoding($_[0])" : ""; |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
local $/; |
|
66
|
|
|
|
|
|
|
confess "failed to open file '$path': $!" |
|
67
|
|
|
|
|
|
|
unless open my $fh, "<$encoding", $path; |
|
68
|
|
|
|
|
|
|
my $content = <$fh>; |
|
69
|
|
|
|
|
|
|
close $fh; |
|
70
|
|
|
|
|
|
|
return $content; |
|
71
|
|
|
|
|
|
|
} |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
sub _spit |
|
74
|
|
|
|
|
|
|
{ |
|
75
|
|
|
|
|
|
|
my $path = shift; |
|
76
|
|
|
|
|
|
|
my $content = shift; |
|
77
|
|
|
|
|
|
|
confess "failed to open file '$path': $!" unless open my $fh, ">", $path; |
|
78
|
|
|
|
|
|
|
print $fh $content; |
|
79
|
|
|
|
|
|
|
close $fh; |
|
80
|
|
|
|
|
|
|
} |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# This parses the charset from a HTML doc's HEAD section, if present, |
|
83
|
|
|
|
|
|
|
# |
|
84
|
|
|
|
|
|
|
# The code here is adapted from Tatsuhiko Miyagawa's here: |
|
85
|
|
|
|
|
|
|
# http://svn.bulknews.net/repos/public/HTTP-Response-Charset/trunk/lib/HTTP/Response/Charset.pm |
|
86
|
|
|
|
|
|
|
# |
|
87
|
|
|
|
|
|
|
# See also http://use.perl.org/~miyagawa/journal/31250 |
|
88
|
|
|
|
|
|
|
# HTTP::Response::Charset seems not to be on CPAN, however. |
|
89
|
|
|
|
|
|
|
{ |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
my $boms = [ |
|
92
|
|
|
|
|
|
|
'UTF-8' => "\x{ef}\x{bb}\x{bf}", |
|
93
|
|
|
|
|
|
|
'UTF-32BE' => "\x{0}\x{0}\x{fe}\x{ff}", |
|
94
|
|
|
|
|
|
|
'UTF-32LE' => "\x{ff}\x{fe}\x{0}\x{0}", |
|
95
|
|
|
|
|
|
|
'UTF-16BE' => "\x{fe}\x{ff}", |
|
96
|
|
|
|
|
|
|
'UTF-16LE' => "\x{ff}\x{fe}", |
|
97
|
|
|
|
|
|
|
]; |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
sub _detect_encoding |
|
101
|
|
|
|
|
|
|
{ |
|
102
|
|
|
|
|
|
|
my $filename = shift; |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
# 1) We assume the content has been identified as HTML, |
|
105
|
|
|
|
|
|
|
# and the Content-Type header already checked. |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
# Read in a max 4k chunk from the content; |
|
108
|
|
|
|
|
|
|
my $chunk; |
|
109
|
|
|
|
|
|
|
{ |
|
110
|
|
|
|
|
|
|
open my $fh, "<", $filename |
|
111
|
|
|
|
|
|
|
or Carp::confess "Failed to read file '$filename': $!"; |
|
112
|
|
|
|
|
|
|
read $fh, $chunk, 4096; # read up to 4k |
|
113
|
|
|
|
|
|
|
close $fh; |
|
114
|
|
|
|
|
|
|
} |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
# 2) Look for META head tags |
|
117
|
|
|
|
|
|
|
{ |
|
118
|
|
|
|
|
|
|
my $head_parser = HTML::HeadParser->new; |
|
119
|
|
|
|
|
|
|
$head_parser->parse($chunk); |
|
120
|
|
|
|
|
|
|
$head_parser->eof; |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
my $content_type = $head_parser->header('Content-Type'); |
|
123
|
|
|
|
|
|
|
return unless $content_type; |
|
124
|
|
|
|
|
|
|
my ($words) = HTTP::Headers::Util::split_header_words($content_type); |
|
125
|
|
|
|
|
|
|
my %param = @$words; |
|
126
|
|
|
|
|
|
|
return $param{charset}; |
|
127
|
|
|
|
|
|
|
} |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
# 3) If there's a UTF BOM set, look for it |
|
130
|
|
|
|
|
|
|
my $count = 0; |
|
131
|
|
|
|
|
|
|
while (my ($enc, $bom) = $boms->[$count++, $count++]) |
|
132
|
|
|
|
|
|
|
{ |
|
133
|
|
|
|
|
|
|
return $enc |
|
134
|
|
|
|
|
|
|
if $bom eq substr($chunk, 0, length $bom); |
|
135
|
|
|
|
|
|
|
} |
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
# 4) If it looks like an XML document, look for XML declaration |
|
138
|
|
|
|
|
|
|
if ($chunk =~ m!^<\?xml\s+version="1.0"\s+encoding="([\w\-]+)"\?>!) { |
|
139
|
|
|
|
|
|
|
return $1; |
|
140
|
|
|
|
|
|
|
} |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
# 5) If there's Encode::Detect module installed, try it |
|
143
|
|
|
|
|
|
|
if ( eval "use Encode::Detect::Detector" ) { |
|
144
|
|
|
|
|
|
|
my $charset = Encode::Detect::Detector::detect($chunk); |
|
145
|
|
|
|
|
|
|
return $charset if $charset; |
|
146
|
|
|
|
|
|
|
} |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
return; |
|
149
|
|
|
|
|
|
|
} |
|
150
|
|
|
|
|
|
|
} |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
# Constructor |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
sub new |
|
156
|
|
|
|
|
|
|
{ |
|
157
|
|
|
|
|
|
|
my $class = shift; |
|
158
|
|
|
|
|
|
|
croak "You must supply a matched set of key => value paramters" |
|
159
|
|
|
|
|
|
|
if @_ % 2; |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
my %options = @_; |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
unless (defined $options{ua}) |
|
164
|
|
|
|
|
|
|
{ |
|
165
|
|
|
|
|
|
|
# the default user agent should follow redirects |
|
166
|
|
|
|
|
|
|
my $ua = LWP::UserAgent->new( |
|
167
|
|
|
|
|
|
|
requests_redirectable => [qw(GET POST HEAD)] |
|
168
|
|
|
|
|
|
|
); |
|
169
|
|
|
|
|
|
|
$options{ua} = $ua; |
|
170
|
|
|
|
|
|
|
} |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
my $self = bless \%options, $class; |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
return $self; |
|
175
|
|
|
|
|
|
|
} |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
sub ua { @_>1 ? shift->{ua} = shift : shift->{ua} } |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
our $DEFAULT_INSTANCE; # lazily assigned within download |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub download |
|
182
|
|
|
|
|
|
|
{ |
|
183
|
|
|
|
|
|
|
my $self = shift; |
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
# An URI or HTTP::Request for the page we want |
|
186
|
|
|
|
|
|
|
my $request = shift; |
|
187
|
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
# Where to save things. A directory - the main file will be called |
|
189
|
|
|
|
|
|
|
# 'index.html' |
|
190
|
|
|
|
|
|
|
my $content_dir = shift; |
|
191
|
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
# A specialised UserAgent to use |
|
193
|
|
|
|
|
|
|
my $ua = shift; |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
if (!blessed($self) |
|
196
|
|
|
|
|
|
|
|| !$self->isa(__PACKAGE__)) |
|
197
|
|
|
|
|
|
|
{ # we're a function, readjust the paramters accordingly |
|
198
|
|
|
|
|
|
|
($self, $request, $content_dir, $ua) |
|
199
|
|
|
|
|
|
|
= ( ($DEFAULT_INSTANCE ||= __PACKAGE__->new), $self, $request, $content_dir, shift); |
|
200
|
|
|
|
|
|
|
} |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
# If no user agent supplied, use the instance's |
|
204
|
|
|
|
|
|
|
$ua ||= $self->{ua}; |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
croak "please supply an URL or HTTP::Request to download" |
|
207
|
|
|
|
|
|
|
unless $request; |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
$request = HTTP::Request->new(GET => $request) |
|
210
|
|
|
|
|
|
|
if (blessed $request && $request->isa('URI')) |
|
211
|
|
|
|
|
|
|
|| ref \$request eq 'SCALAR'; |
|
212
|
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
croak "first argument must be an URL or HTTP::Request instance" |
|
214
|
|
|
|
|
|
|
unless blessed $request and $request->isa('HTTP::Request'); |
|
215
|
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
croak "please supply a directory to copy into" |
|
217
|
|
|
|
|
|
|
unless $content_dir; |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
carp "warning, path '$content_dir' already exists, we may overwrite content" |
|
220
|
|
|
|
|
|
|
if -e $content_dir; |
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
# All seems in order, now proceed.... |
|
223
|
|
|
|
|
|
|
mkpath $content_dir; |
|
224
|
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
# First get the main document |
|
227
|
|
|
|
|
|
|
my $file = File::Spec->catdir($content_dir, "index.html"); |
|
228
|
|
|
|
|
|
|
my $response = $ua->request($request, $file); |
|
229
|
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
unless ($response and $response->is_success) |
|
231
|
|
|
|
|
|
|
{ |
|
232
|
|
|
|
|
|
|
croak "HTTP request failed: ". $response->status_line; |
|
233
|
|
|
|
|
|
|
} |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
# If it's not HTML, we can't understand it, so just leave it |
|
236
|
|
|
|
|
|
|
# unchanged. |
|
237
|
|
|
|
|
|
|
return unless $response->content_type =~ /html$/; |
|
238
|
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
# Otherwise, "localise" it.... |
|
241
|
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
# This will parse the HTML so we can get the links |
|
243
|
|
|
|
|
|
|
my $parser = HTML::TreeBuilder::XPath->new; |
|
244
|
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
# Get the encoding, if we can |
|
246
|
|
|
|
|
|
|
my $encoding = $response->encoding || _detect_encoding($file); |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
# HTML::Tidy does a better job of interpreting bad html than |
|
249
|
|
|
|
|
|
|
# HTML::TreeBuilder alone, so we pass it through that first. If |
|
250
|
|
|
|
|
|
|
# we don't, the resulting HTML obtained after HTML::TreeBuilder |
|
251
|
|
|
|
|
|
|
# has parsed it can be broken. |
|
252
|
|
|
|
|
|
|
{ |
|
253
|
|
|
|
|
|
|
my $tidy = HTML::Tidy->new(\%TIDY_OPTIONS); |
|
254
|
|
|
|
|
|
|
$tidy->ignore( text => qr/./ ); |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
my $content = _slurp($file, $encoding); |
|
258
|
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
{ |
|
260
|
|
|
|
|
|
|
no warnings 'redefine'; |
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
# HTML::Tidy insists on calling this function.... silence |
|
263
|
|
|
|
|
|
|
# it, locally |
|
264
|
|
|
|
|
|
|
local *Carp::carp = sub {}; |
|
265
|
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
$content = $tidy->clean($content); |
|
267
|
|
|
|
|
|
|
} |
|
268
|
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
$parser->parse($content); |
|
270
|
|
|
|
|
|
|
} |
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
my %seen; # We store URLs we've already processed in here |
|
273
|
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
# This will both download an URL's target and rewrite the URL to |
|
275
|
|
|
|
|
|
|
# point to the downloaded copy - here we refer to that process as |
|
276
|
|
|
|
|
|
|
# "localising" an url. |
|
277
|
|
|
|
|
|
|
my $localise_url = sub |
|
278
|
|
|
|
|
|
|
{ |
|
279
|
|
|
|
|
|
|
my $url = shift || croak "no url parameter supplied"; |
|
280
|
|
|
|
|
|
|
$url = URI->new_abs(decode_entities($url), $response->base) |
|
281
|
|
|
|
|
|
|
unless blessed $url; |
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
my $local_url = $seen{$url}; |
|
284
|
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
unless ($local_url) |
|
286
|
|
|
|
|
|
|
{ |
|
287
|
|
|
|
|
|
|
# FIXME check for inline URL images? (i.e. data:// urls) |
|
288
|
|
|
|
|
|
|
my ($ext) = $url->path =~ m$!; |
|
289
|
|
|
|
|
|
|
my $index = keys(%seen)+1; |
|
290
|
|
|
|
|
|
|
my $filename = $index; |
|
291
|
|
|
|
|
|
|
$filename .= ".$ext" |
|
292
|
|
|
|
|
|
|
if defined $ext; |
|
293
|
|
|
|
|
|
|
my $file = File::Spec->catfile($content_dir, $filename); |
|
294
|
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
# clean up things like '/../foo' which will cause an error |
|
297
|
|
|
|
|
|
|
# if passed to $ua->get |
|
298
|
|
|
|
|
|
|
my $url_path = File::Spec::Unix->canonpath($url->path); |
|
299
|
|
|
|
|
|
|
$url->path($url_path); |
|
300
|
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
$local_url = $seen{$url} = $filename; |
|
302
|
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
# print "downloading $url -> $file\n"; DEBUG |
|
304
|
|
|
|
|
|
|
my $response2 = $ua->get($url, ':content_file' => $file); |
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
carp "failed to download $url: ". $response2->status_line |
|
307
|
|
|
|
|
|
|
unless $response2->is_success |
|
308
|
|
|
|
|
|
|
&& -f $file; |
|
309
|
|
|
|
|
|
|
} |
|
310
|
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
return $local_url; |
|
312
|
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
}; |
|
314
|
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
# This will localise URLs in tag attributes |
|
316
|
|
|
|
|
|
|
my $process_attr = sub |
|
317
|
|
|
|
|
|
|
{ |
|
318
|
|
|
|
|
|
|
my ($attr) = @_; |
|
319
|
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
my $url = $attr->getValue; |
|
321
|
|
|
|
|
|
|
return unless $url ne ""; |
|
322
|
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
my $local_url = $localise_url->($url); |
|
324
|
|
|
|
|
|
|
# warn "url $url -> $local_url"; # DEBUG |
|
325
|
|
|
|
|
|
|
# rewrite the attribute |
|
326
|
|
|
|
|
|
|
$attr->getParentNode->attr($attr->getName, $local_url); |
|
327
|
|
|
|
|
|
|
}; |
|
328
|
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
# This will localise a stylesheet link |
|
330
|
|
|
|
|
|
|
my $localise_style_url = sub |
|
331
|
|
|
|
|
|
|
{ |
|
332
|
|
|
|
|
|
|
# note, CSS defines URLs to be relative to the stylesheet. |
|
333
|
|
|
|
|
|
|
my $base = shift || croak "you must supply a base url"; |
|
334
|
|
|
|
|
|
|
my $url = URI->new_abs(shift, $base); |
|
335
|
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
my $local_url = $localise_url->($url); |
|
337
|
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
$local_url = encode_entities($local_url); |
|
339
|
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
# warn "localising $url-> $local_url\n"; # DEBUG |
|
341
|
|
|
|
|
|
|
return "url($local_url)"; |
|
342
|
|
|
|
|
|
|
}; |
|
343
|
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
my $process_stylesheet; # defined later |
|
345
|
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
# This will localise a stylesheet @import link |
|
347
|
|
|
|
|
|
|
my $localise_import = sub |
|
348
|
|
|
|
|
|
|
{ |
|
349
|
|
|
|
|
|
|
my $base = shift; |
|
350
|
|
|
|
|
|
|
my $url = shift; |
|
351
|
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
my $local_url = $localise_url->($url); |
|
353
|
|
|
|
|
|
|
my $stylesheet_file = File::Spec->catdir($content_dir, $local_url); |
|
354
|
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
my $content = _slurp $stylesheet_file; |
|
356
|
|
|
|
|
|
|
$process_stylesheet->($base, $content); |
|
357
|
|
|
|
|
|
|
_spit $stylesheet_file, $content; |
|
358
|
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
# Note, we don't convert the url, since that will be done later |
|
360
|
|
|
|
|
|
|
return "\@import url($url)"; |
|
361
|
|
|
|
|
|
|
}; |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
# This function will localise an entire stylesheet's links. It |
|
364
|
|
|
|
|
|
|
# returns the number of things downloaded. |
|
365
|
|
|
|
|
|
|
$process_stylesheet = sub |
|
366
|
|
|
|
|
|
|
{ |
|
367
|
|
|
|
|
|
|
my $base = shift || croak "you must supply a base url"; |
|
368
|
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
# First, convert all '@import' statements to the '@import url()' form, |
|
370
|
|
|
|
|
|
|
# then localise all url() references. Return true if either has been applied. |
|
371
|
|
|
|
|
|
|
my @stylesheets = $_[0] =~ s/$IMPORT_RE/$localise_import->($base, $+)/ige; |
|
372
|
|
|
|
|
|
|
my @urls = $_[0] =~ s/$URL_RE/$localise_style_url->($base, $+)/ige; |
|
373
|
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
return @stylesheets + @urls; |
|
375
|
|
|
|
|
|
|
}; |
|
376
|
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
# This localises a |