line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HTML::Encapsulate; |
2
|
2
|
|
|
2
|
|
16026
|
use warnings; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
63
|
|
3
|
2
|
|
|
2
|
|
10
|
use strict; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
52
|
|
4
|
2
|
|
|
2
|
|
28
|
use Carp; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
171
|
|
5
|
2
|
|
|
2
|
|
11
|
use PerlIO; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
15
|
|
6
|
2
|
|
|
2
|
|
62
|
use File::Path qw(mkpath); |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
95
|
|
7
|
2
|
|
|
2
|
|
11
|
use File::Spec; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
54
|
|
8
|
2
|
|
|
2
|
|
10
|
use File::Spec::Unix; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
66
|
|
9
|
2
|
|
|
2
|
|
9
|
use Carp qw(croak carp cluck confess); |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
92
|
|
10
|
2
|
|
|
2
|
|
16
|
use Exporter; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
75
|
|
11
|
2
|
|
|
2
|
|
1124
|
use LWP::UserAgent; |
|
2
|
|
|
|
|
45277
|
|
|
2
|
|
|
|
|
54
|
|
12
|
2
|
|
|
2
|
|
1827
|
use HTML::TreeBuilder::XPath; |
|
2
|
|
|
|
|
149136
|
|
|
2
|
|
|
|
|
27
|
|
13
|
2
|
|
|
2
|
|
85
|
use Scalar::Util qw(blessed); |
|
2
|
|
|
|
|
6
|
|
|
2
|
|
|
|
|
174
|
|
14
|
2
|
|
|
2
|
|
11
|
use URI; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
57
|
|
15
|
2
|
|
|
2
|
|
11
|
use HTML::Entities qw(decode_entities encode_entities); |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
126
|
|
16
|
2
|
|
|
2
|
|
902
|
use HTML::Tidy; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
use HTTP::Response::Encoding; |
18
|
|
|
|
|
|
|
use HTML::HeadParser; |
19
|
|
|
|
|
|
|
use HTTP::Headers::Util; |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
use version; our $VERSION = qv('0.3'); |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
# We don't want to inherit Exporter, we can't always import the import |
25
|
|
|
|
|
|
|
# method, so this is a workaround. |
26
|
|
|
|
|
|
|
sub import { goto &Exporter::import } |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
our @EXPORT_OK = qw(download); |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
our %TIDY_OPTIONS = (lower_literals => 1, |
33
|
|
|
|
|
|
|
show_errors => 0, |
34
|
|
|
|
|
|
|
show_warnings => 0, |
35
|
|
|
|
|
|
|
tidy_mark => 0); |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# url(blah) or url( 'blah' ) etc. |
39
|
|
|
|
|
|
|
my $QUOTED_STR = qr/ " ([^"]*) " | ' ([^']*) ' /x; |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
my $URL_RE = qr/ url \s* \( |
42
|
|
|
|
|
|
|
\s* (?: $QUOTED_STR | (.*?) ) \s* |
43
|
|
|
|
|
|
|
\) |
44
|
|
|
|
|
|
|
/ix; |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
my $IMPORT_RE = qr/ |
47
|
|
|
|
|
|
|
\@import (?: |
48
|
|
|
|
|
|
|
\s+ $URL_RE | # @import url(blah) with optional quotes |
49
|
|
|
|
|
|
|
\s* $QUOTED_STR | # @import "blah" or @import 'blah' |
50
|
|
|
|
|
|
|
\s+ (\S+) # @import blah |
51
|
|
|
|
|
|
|
) |
52
|
|
|
|
|
|
|
/xi; |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
sub _inner_html |
55
|
|
|
|
|
|
|
{ |
56
|
|
|
|
|
|
|
my $node = shift; |
57
|
|
|
|
|
|
|
join "", map { ref $_? $_->as_HTML : $_ } $node->content_list; |
58
|
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
sub _slurp |
61
|
|
|
|
|
|
|
{ |
62
|
|
|
|
|
|
|
my $path = shift; |
63
|
|
|
|
|
|
|
my $encoding = defined $_[0]? |
64
|
|
|
|
|
|
|
"encoding($_[0])" : ""; |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
local $/; |
67
|
|
|
|
|
|
|
confess "failed to open file '$path': $!" |
68
|
|
|
|
|
|
|
unless open my $fh, "<$encoding", $path; |
69
|
|
|
|
|
|
|
my $content = <$fh>; |
70
|
|
|
|
|
|
|
close $fh; |
71
|
|
|
|
|
|
|
return $content; |
72
|
|
|
|
|
|
|
} |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
sub _spit |
75
|
|
|
|
|
|
|
{ |
76
|
|
|
|
|
|
|
my $path = shift; |
77
|
|
|
|
|
|
|
my $content = shift; |
78
|
|
|
|
|
|
|
confess "failed to open file '$path': $!" unless open my $fh, ">", $path; |
79
|
|
|
|
|
|
|
print $fh $content; |
80
|
|
|
|
|
|
|
close $fh; |
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
# This parses the charset from a HTML doc's HEAD section, if present, |
84
|
|
|
|
|
|
|
# |
85
|
|
|
|
|
|
|
# The code here is adapted from Tatsuhiko Miyagawa's here: |
86
|
|
|
|
|
|
|
# http://svn.bulknews.net/repos/public/HTTP-Response-Charset/trunk/lib/HTTP/Response/Charset.pm |
87
|
|
|
|
|
|
|
# |
88
|
|
|
|
|
|
|
# See also http://use.perl.org/~miyagawa/journal/31250 |
89
|
|
|
|
|
|
|
# HTTP::Response::Charset seems not to be on CPAN, however. |
90
|
|
|
|
|
|
|
{ |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
my $boms = [ |
93
|
|
|
|
|
|
|
'UTF-8' => "\x{ef}\x{bb}\x{bf}", |
94
|
|
|
|
|
|
|
'UTF-32BE' => "\x{0}\x{0}\x{fe}\x{ff}", |
95
|
|
|
|
|
|
|
'UTF-32LE' => "\x{ff}\x{fe}\x{0}\x{0}", |
96
|
|
|
|
|
|
|
'UTF-16BE' => "\x{fe}\x{ff}", |
97
|
|
|
|
|
|
|
'UTF-16LE' => "\x{ff}\x{fe}", |
98
|
|
|
|
|
|
|
]; |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
sub _detect_encoding |
102
|
|
|
|
|
|
|
{ |
103
|
|
|
|
|
|
|
my $filename = shift; |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
# 1) We assume the content has been identified as HTML, |
106
|
|
|
|
|
|
|
# and the Content-Type header already checked. |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
# Read in a max 4k chunk from the content; |
109
|
|
|
|
|
|
|
my $chunk; |
110
|
|
|
|
|
|
|
{ |
111
|
|
|
|
|
|
|
open my $fh, "<", $filename |
112
|
|
|
|
|
|
|
or Carp::confess "Failed to read file '$filename': $!"; |
113
|
|
|
|
|
|
|
read $fh, $chunk, 4096; # read up to 4k |
114
|
|
|
|
|
|
|
close $fh; |
115
|
|
|
|
|
|
|
} |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
# 2) Look for META head tags |
118
|
|
|
|
|
|
|
{ |
119
|
|
|
|
|
|
|
my $head_parser = HTML::HeadParser->new; |
120
|
|
|
|
|
|
|
$head_parser->parse($chunk); |
121
|
|
|
|
|
|
|
$head_parser->eof; |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
my $content_type = $head_parser->header('Content-Type'); |
124
|
|
|
|
|
|
|
return unless $content_type; |
125
|
|
|
|
|
|
|
my ($words) = HTTP::Headers::Util::split_header_words($content_type); |
126
|
|
|
|
|
|
|
my %param = @$words; |
127
|
|
|
|
|
|
|
return $param{charset}; |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
# 3) If there's a UTF BOM set, look for it |
131
|
|
|
|
|
|
|
my $count = 0; |
132
|
|
|
|
|
|
|
while (my ($enc, $bom) = $boms->[$count++, $count++]) |
133
|
|
|
|
|
|
|
{ |
134
|
|
|
|
|
|
|
return $enc |
135
|
|
|
|
|
|
|
if $bom eq substr($chunk, 0, length $bom); |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
# 4) If it looks like an XML document, look for XML declaration |
139
|
|
|
|
|
|
|
if ($chunk =~ m!^<\?xml\s+version="1.0"\s+encoding="([\w\-]+)"\?>!) { |
140
|
|
|
|
|
|
|
return $1; |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
# 5) If there's Encode::Detect module installed, try it |
144
|
|
|
|
|
|
|
if ( eval "use Encode::Detect::Detector" ) { |
145
|
|
|
|
|
|
|
my $charset = Encode::Detect::Detector::detect($chunk); |
146
|
|
|
|
|
|
|
return $charset if $charset; |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
return; |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
# Constructor |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
sub new |
157
|
|
|
|
|
|
|
{ |
158
|
|
|
|
|
|
|
my $class = shift; |
159
|
|
|
|
|
|
|
croak "You must supply a matched set of key => value paramters" |
160
|
|
|
|
|
|
|
if @_ % 2; |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
my %options = @_; |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
unless (defined $options{ua}) |
165
|
|
|
|
|
|
|
{ |
166
|
|
|
|
|
|
|
# the default user agent should follow redirects |
167
|
|
|
|
|
|
|
my $ua = LWP::UserAgent->new( |
168
|
|
|
|
|
|
|
requests_redirectable => [qw(GET POST HEAD)] |
169
|
|
|
|
|
|
|
); |
170
|
|
|
|
|
|
|
$options{ua} = $ua; |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
my $self = bless \%options, $class; |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
return $self; |
176
|
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
sub ua { @_>1 ? shift->{ua} = shift : shift->{ua} } |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
our $DEFAULT_INSTANCE; # lazily assigned within download |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
sub download |
183
|
|
|
|
|
|
|
{ |
184
|
|
|
|
|
|
|
my $self = shift; |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
# An URI or HTTP::Request for the page we want |
187
|
|
|
|
|
|
|
my $request = shift; |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
# Where to save things. A directory - the main file will be called |
190
|
|
|
|
|
|
|
# 'index.html' |
191
|
|
|
|
|
|
|
my $content_dir = shift; |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
# A specialised UserAgent to use |
194
|
|
|
|
|
|
|
my $ua = shift; |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
if (!blessed($self) |
197
|
|
|
|
|
|
|
|| !$self->isa(__PACKAGE__)) |
198
|
|
|
|
|
|
|
{ # we're a function, readjust the paramters accordingly |
199
|
|
|
|
|
|
|
($self, $request, $content_dir, $ua) |
200
|
|
|
|
|
|
|
= ( ($DEFAULT_INSTANCE ||= __PACKAGE__->new), $self, $request, $content_dir, shift); |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
# If no user agent supplied, use the instance's |
205
|
|
|
|
|
|
|
$ua ||= $self->{ua}; |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
croak "please supply an URL or HTTP::Request to download" |
208
|
|
|
|
|
|
|
unless $request; |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
$request = HTTP::Request->new(GET => $request) |
211
|
|
|
|
|
|
|
if (blessed $request && $request->isa('URI')) |
212
|
|
|
|
|
|
|
|| ref \$request eq 'SCALAR'; |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
croak "first argument must be an URL or HTTP::Request instance" |
215
|
|
|
|
|
|
|
unless blessed $request and $request->isa('HTTP::Request'); |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
croak "please supply a directory to copy into" |
218
|
|
|
|
|
|
|
unless $content_dir; |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
carp "warning, path '$content_dir' already exists, we may overwrite content" |
221
|
|
|
|
|
|
|
if -e $content_dir; |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
# All seems in order, now proceed.... |
224
|
|
|
|
|
|
|
mkpath $content_dir; |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
# First get the main document |
228
|
|
|
|
|
|
|
my $file = File::Spec->catdir($content_dir, "index.html"); |
229
|
|
|
|
|
|
|
my $response = $ua->request($request, $file); |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
unless ($response and $response->is_success) |
232
|
|
|
|
|
|
|
{ |
233
|
|
|
|
|
|
|
croak "HTTP request failed: ". $response->status_line; |
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
# If it's not HTML, we can't understand it, so just leave it |
237
|
|
|
|
|
|
|
# unchanged. |
238
|
|
|
|
|
|
|
return unless $response->content_type =~ /html$/; |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
# Otherwise, "localise" it.... |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
# This will parse the HTML so we can get the links |
244
|
|
|
|
|
|
|
my $parser = HTML::TreeBuilder::XPath->new; |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
# Get the encoding, if we can |
247
|
|
|
|
|
|
|
my $encoding = $response->encoding || _detect_encoding($file); |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
# HTML::Tidy does a better job of interpreting bad html than |
250
|
|
|
|
|
|
|
# HTML::TreeBuilder alone, so we pass it through that first. If |
251
|
|
|
|
|
|
|
# we don't, the resulting HTML obtained after HTML::TreeBuilder |
252
|
|
|
|
|
|
|
# has parsed it can be broken. |
253
|
|
|
|
|
|
|
{ |
254
|
|
|
|
|
|
|
my $tidy = HTML::Tidy->new(\%TIDY_OPTIONS); |
255
|
|
|
|
|
|
|
$tidy->ignore( text => qr/./ ); |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
my $content = _slurp($file, $encoding); |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
{ |
261
|
|
|
|
|
|
|
no warnings 'redefine'; |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
# HTML::Tidy insists on calling this function.... silence |
264
|
|
|
|
|
|
|
# it, locally |
265
|
|
|
|
|
|
|
local *Carp::carp = sub {}; |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
$content = $tidy->clean($content); |
268
|
|
|
|
|
|
|
} |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
$parser->parse($content); |
271
|
|
|
|
|
|
|
} |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
my %seen; # We store URLs we've already processed in here |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
# This will both download an URL's target and rewrite the URL to |
276
|
|
|
|
|
|
|
# point to the downloaded copy - here we refer to that process as |
277
|
|
|
|
|
|
|
# "localising" an url. |
278
|
|
|
|
|
|
|
my $localise_url = sub |
279
|
|
|
|
|
|
|
{ |
280
|
|
|
|
|
|
|
my $url = shift || croak "no url parameter supplied"; |
281
|
|
|
|
|
|
|
$url = URI->new_abs(decode_entities($url), $response->base) |
282
|
|
|
|
|
|
|
unless blessed $url; |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
my $local_url = $seen{$url}; |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
unless ($local_url) |
287
|
|
|
|
|
|
|
{ |
288
|
|
|
|
|
|
|
# FIXME check for inline URL images? (i.e. data:// urls) |
289
|
|
|
|
|
|
|
my ($ext) = $url->path =~ m![.]([^./]+)$!; |
290
|
|
|
|
|
|
|
my $index = keys(%seen)+1; |
291
|
|
|
|
|
|
|
my $filename = $index; |
292
|
|
|
|
|
|
|
$filename .= ".$ext" |
293
|
|
|
|
|
|
|
if defined $ext; |
294
|
|
|
|
|
|
|
my $file = File::Spec->catfile($content_dir, $filename); |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
# clean up things like '/../foo' which will cause an error |
298
|
|
|
|
|
|
|
# if passed to $ua->get |
299
|
|
|
|
|
|
|
my $url_path = File::Spec::Unix->canonpath($url->path); |
300
|
|
|
|
|
|
|
$url->path($url_path); |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
$local_url = $seen{$url} = $filename; |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
# print "downloading $url -> $file\n"; DEBUG |
305
|
|
|
|
|
|
|
my $response2 = $ua->get($url, ':content_file' => $file); |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
carp "failed to download $url: ". $response2->status_line |
308
|
|
|
|
|
|
|
unless $response2->is_success |
309
|
|
|
|
|
|
|
&& -f $file; |
310
|
|
|
|
|
|
|
} |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
return $local_url; |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
}; |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
# This will localise URLs in tag attributes |
317
|
|
|
|
|
|
|
my $process_attr = sub |
318
|
|
|
|
|
|
|
{ |
319
|
|
|
|
|
|
|
my ($attr) = @_; |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
my $url = $attr->getValue; |
322
|
|
|
|
|
|
|
return unless $url ne ""; |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
my $local_url = $localise_url->($url); |
325
|
|
|
|
|
|
|
# warn "url $url -> $local_url"; # DEBUG |
326
|
|
|
|
|
|
|
# rewrite the attribute |
327
|
|
|
|
|
|
|
$attr->getParentNode->attr($attr->getName, $local_url); |
328
|
|
|
|
|
|
|
}; |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
# This will localise a stylesheet link |
331
|
|
|
|
|
|
|
my $localise_style_url = sub |
332
|
|
|
|
|
|
|
{ |
333
|
|
|
|
|
|
|
# note, CSS defines URLs to be relative to the stylesheet. |
334
|
|
|
|
|
|
|
my $base = shift || croak "you must supply a base url"; |
335
|
|
|
|
|
|
|
my $url = URI->new_abs(shift, $base); |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
my $local_url = $localise_url->($url); |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
$local_url = encode_entities($local_url); |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
# warn "localising $url-> $local_url\n"; # DEBUG |
342
|
|
|
|
|
|
|
return "url($local_url)"; |
343
|
|
|
|
|
|
|
}; |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
my $process_stylesheet; # defined later |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
# This will localise a stylesheet @import link |
348
|
|
|
|
|
|
|
my $localise_import = sub |
349
|
|
|
|
|
|
|
{ |
350
|
|
|
|
|
|
|
my $base = shift; |
351
|
|
|
|
|
|
|
my $url = shift; |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
my $local_url = $localise_url->($url); |
354
|
|
|
|
|
|
|
my $stylesheet_file = File::Spec->catdir($content_dir, $local_url); |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
my $content = _slurp $stylesheet_file; |
357
|
|
|
|
|
|
|
$process_stylesheet->($base, $content); |
358
|
|
|
|
|
|
|
_spit $stylesheet_file, $content; |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
# Note, we don't convert the url, since that will be done later |
361
|
|
|
|
|
|
|
return "\@import url($url)"; |
362
|
|
|
|
|
|
|
}; |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
# This function will localise an entire stylesheet's links. It |
365
|
|
|
|
|
|
|
# returns the number of things downloaded. |
366
|
|
|
|
|
|
|
$process_stylesheet = sub |
367
|
|
|
|
|
|
|
{ |
368
|
|
|
|
|
|
|
my $base = shift || croak "you must supply a base url"; |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
# First, convert all '@import' statements to the '@import url()' form, |
371
|
|
|
|
|
|
|
# then localise all url() references. Return true if either has been applied. |
372
|
|
|
|
|
|
|
my @stylesheets = $_[0] =~ s/$IMPORT_RE/$localise_import->($base, $+)/ige; |
373
|
|
|
|
|
|
|
my @urls = $_[0] =~ s/$URL_RE/$localise_style_url->($base, $+)/ige; |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
return @stylesheets + @urls; |
376
|
|
|
|
|
|
|
}; |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
# This localises a |