line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HTML::Encapsulate; |
2
|
2
|
|
|
2
|
|
18629
|
use warnings; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
60
|
|
3
|
2
|
|
|
2
|
|
10
|
use strict; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
44
|
|
4
|
2
|
|
|
2
|
|
9
|
use Carp; |
|
2
|
|
|
|
|
12
|
|
|
2
|
|
|
|
|
123
|
|
5
|
2
|
|
|
2
|
|
10
|
use PerlIO; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
14
|
|
6
|
2
|
|
|
2
|
|
52
|
use File::Path qw(mkpath); |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
100
|
|
7
|
2
|
|
|
2
|
|
9
|
use File::Spec; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
41
|
|
8
|
2
|
|
|
2
|
|
10
|
use File::Spec::Unix; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
58
|
|
9
|
2
|
|
|
2
|
|
8
|
use Carp qw(croak carp cluck confess); |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
97
|
|
10
|
2
|
|
|
2
|
|
9
|
use Exporter; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
58
|
|
11
|
2
|
|
|
2
|
|
949
|
use LWP::UserAgent; |
|
2
|
|
|
|
|
47361
|
|
|
2
|
|
|
|
|
67
|
|
12
|
2
|
|
|
2
|
|
1668
|
use HTML::TreeBuilder::XPath; |
|
2
|
|
|
|
|
145033
|
|
|
2
|
|
|
|
|
22
|
|
13
|
2
|
|
|
2
|
|
89
|
use Scalar::Util qw(blessed); |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
120
|
|
14
|
2
|
|
|
2
|
|
12
|
use URI; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
58
|
|
15
|
2
|
|
|
2
|
|
9
|
use HTML::Entities qw(decode_entities encode_entities); |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
182
|
|
16
|
2
|
|
|
2
|
|
861
|
use HTML::Tidy; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
use HTTP::Response::Encoding; |
18
|
|
|
|
|
|
|
use HTML::HeadParser; |
19
|
|
|
|
|
|
|
use HTTP::Headers::Util; |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
use version; our $VERSION = qv('0.3'); |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
our @EXPORT_OK = qw(download); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# We don't want to inherit Exporter, we can't always import the import |
27
|
|
|
|
|
|
|
# method, so this is a workaround. |
28
|
|
|
|
|
|
|
sub import { goto &Exporter::import } |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
our %TIDY_OPTIONS = (lower_literals => 1, |
32
|
|
|
|
|
|
|
show_errors => 0, |
33
|
|
|
|
|
|
|
show_warnings => 0, |
34
|
|
|
|
|
|
|
tidy_mark => 0); |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
# url(blah) or url( 'blah' ) etc. |
38
|
|
|
|
|
|
|
my $QUOTED_STR = qr/ " ([^"]*) " | ' ([^']*) ' /x; |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
my $URL_RE = qr/ url \s* \( |
41
|
|
|
|
|
|
|
\s* (?: $QUOTED_STR | (.*?) ) \s* |
42
|
|
|
|
|
|
|
\) |
43
|
|
|
|
|
|
|
/ix; |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
my $IMPORT_RE = qr/ |
46
|
|
|
|
|
|
|
\@import (?: |
47
|
|
|
|
|
|
|
\s+ $URL_RE | # @import url(blah) with optional quotes |
48
|
|
|
|
|
|
|
\s* $QUOTED_STR | # @import "blah" or @import 'blah' |
49
|
|
|
|
|
|
|
\s+ (\S+) # @import blah |
50
|
|
|
|
|
|
|
) |
51
|
|
|
|
|
|
|
/xi; |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
sub _inner_html |
54
|
|
|
|
|
|
|
{ |
55
|
|
|
|
|
|
|
my $node = shift; |
56
|
|
|
|
|
|
|
join "", map { ref $_? $_->as_HTML : $_ } $node->content_list; |
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
sub _slurp |
60
|
|
|
|
|
|
|
{ |
61
|
|
|
|
|
|
|
my $path = shift; |
62
|
|
|
|
|
|
|
my $encoding = defined $_[0]? |
63
|
|
|
|
|
|
|
"encoding($_[0])" : ""; |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
local $/; |
66
|
|
|
|
|
|
|
confess "failed to open file '$path': $!" |
67
|
|
|
|
|
|
|
unless open my $fh, "<$encoding", $path; |
68
|
|
|
|
|
|
|
my $content = <$fh>; |
69
|
|
|
|
|
|
|
close $fh; |
70
|
|
|
|
|
|
|
return $content; |
71
|
|
|
|
|
|
|
} |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
sub _spit |
74
|
|
|
|
|
|
|
{ |
75
|
|
|
|
|
|
|
my $path = shift; |
76
|
|
|
|
|
|
|
my $content = shift; |
77
|
|
|
|
|
|
|
confess "failed to open file '$path': $!" unless open my $fh, ">", $path; |
78
|
|
|
|
|
|
|
print $fh $content; |
79
|
|
|
|
|
|
|
close $fh; |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# This parses the charset from a HTML doc's HEAD section, if present, |
83
|
|
|
|
|
|
|
# |
84
|
|
|
|
|
|
|
# The code here is adapted from Tatsuhiko Miyagawa's here: |
85
|
|
|
|
|
|
|
# http://svn.bulknews.net/repos/public/HTTP-Response-Charset/trunk/lib/HTTP/Response/Charset.pm |
86
|
|
|
|
|
|
|
# |
87
|
|
|
|
|
|
|
# See also http://use.perl.org/~miyagawa/journal/31250 |
88
|
|
|
|
|
|
|
# HTTP::Response::Charset seems not to be on CPAN, however. |
89
|
|
|
|
|
|
|
{ |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
my $boms = [ |
92
|
|
|
|
|
|
|
'UTF-8' => "\x{ef}\x{bb}\x{bf}", |
93
|
|
|
|
|
|
|
'UTF-32BE' => "\x{0}\x{0}\x{fe}\x{ff}", |
94
|
|
|
|
|
|
|
'UTF-32LE' => "\x{ff}\x{fe}\x{0}\x{0}", |
95
|
|
|
|
|
|
|
'UTF-16BE' => "\x{fe}\x{ff}", |
96
|
|
|
|
|
|
|
'UTF-16LE' => "\x{ff}\x{fe}", |
97
|
|
|
|
|
|
|
]; |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
sub _detect_encoding |
101
|
|
|
|
|
|
|
{ |
102
|
|
|
|
|
|
|
my $filename = shift; |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
# 1) We assume the content has been identified as HTML, |
105
|
|
|
|
|
|
|
# and the Content-Type header already checked. |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
# Read in a max 4k chunk from the content; |
108
|
|
|
|
|
|
|
my $chunk; |
109
|
|
|
|
|
|
|
{ |
110
|
|
|
|
|
|
|
open my $fh, "<", $filename |
111
|
|
|
|
|
|
|
or Carp::confess "Failed to read file '$filename': $!"; |
112
|
|
|
|
|
|
|
read $fh, $chunk, 4096; # read up to 4k |
113
|
|
|
|
|
|
|
close $fh; |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
# 2) Look for META head tags |
117
|
|
|
|
|
|
|
{ |
118
|
|
|
|
|
|
|
my $head_parser = HTML::HeadParser->new; |
119
|
|
|
|
|
|
|
$head_parser->parse($chunk); |
120
|
|
|
|
|
|
|
$head_parser->eof; |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
my $content_type = $head_parser->header('Content-Type'); |
123
|
|
|
|
|
|
|
return unless $content_type; |
124
|
|
|
|
|
|
|
my ($words) = HTTP::Headers::Util::split_header_words($content_type); |
125
|
|
|
|
|
|
|
my %param = @$words; |
126
|
|
|
|
|
|
|
return $param{charset}; |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
# 3) If there's a UTF BOM set, look for it |
130
|
|
|
|
|
|
|
my $count = 0; |
131
|
|
|
|
|
|
|
while (my ($enc, $bom) = $boms->[$count++, $count++]) |
132
|
|
|
|
|
|
|
{ |
133
|
|
|
|
|
|
|
return $enc |
134
|
|
|
|
|
|
|
if $bom eq substr($chunk, 0, length $bom); |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
# 4) If it looks like an XML document, look for XML declaration |
138
|
|
|
|
|
|
|
if ($chunk =~ m!^<\?xml\s+version="1.0"\s+encoding="([\w\-]+)"\?>!) { |
139
|
|
|
|
|
|
|
return $1; |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
# 5) If there's Encode::Detect module installed, try it |
143
|
|
|
|
|
|
|
if ( eval "use Encode::Detect::Detector" ) { |
144
|
|
|
|
|
|
|
my $charset = Encode::Detect::Detector::detect($chunk); |
145
|
|
|
|
|
|
|
return $charset if $charset; |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
return; |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
# Constructor |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
sub new |
156
|
|
|
|
|
|
|
{ |
157
|
|
|
|
|
|
|
my $class = shift; |
158
|
|
|
|
|
|
|
croak "You must supply a matched set of key => value paramters" |
159
|
|
|
|
|
|
|
if @_ % 2; |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
my %options = @_; |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
unless (defined $options{ua}) |
164
|
|
|
|
|
|
|
{ |
165
|
|
|
|
|
|
|
# the default user agent should follow redirects |
166
|
|
|
|
|
|
|
my $ua = LWP::UserAgent->new( |
167
|
|
|
|
|
|
|
requests_redirectable => [qw(GET POST HEAD)] |
168
|
|
|
|
|
|
|
); |
169
|
|
|
|
|
|
|
$options{ua} = $ua; |
170
|
|
|
|
|
|
|
} |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
my $self = bless \%options, $class; |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
return $self; |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
sub ua { @_>1 ? shift->{ua} = shift : shift->{ua} } |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
our $DEFAULT_INSTANCE; # lazily assigned within download |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub download |
182
|
|
|
|
|
|
|
{ |
183
|
|
|
|
|
|
|
my $self = shift; |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
# An URI or HTTP::Request for the page we want |
186
|
|
|
|
|
|
|
my $request = shift; |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
# Where to save things. A directory - the main file will be called |
189
|
|
|
|
|
|
|
# 'index.html' |
190
|
|
|
|
|
|
|
my $content_dir = shift; |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
# A specialised UserAgent to use |
193
|
|
|
|
|
|
|
my $ua = shift; |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
if (!blessed($self) |
196
|
|
|
|
|
|
|
|| !$self->isa(__PACKAGE__)) |
197
|
|
|
|
|
|
|
{ # we're a function, readjust the paramters accordingly |
198
|
|
|
|
|
|
|
($self, $request, $content_dir, $ua) |
199
|
|
|
|
|
|
|
= ( ($DEFAULT_INSTANCE ||= __PACKAGE__->new), $self, $request, $content_dir, shift); |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
# If no user agent supplied, use the instance's |
204
|
|
|
|
|
|
|
$ua ||= $self->{ua}; |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
croak "please supply an URL or HTTP::Request to download" |
207
|
|
|
|
|
|
|
unless $request; |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
$request = HTTP::Request->new(GET => $request) |
210
|
|
|
|
|
|
|
if (blessed $request && $request->isa('URI')) |
211
|
|
|
|
|
|
|
|| ref \$request eq 'SCALAR'; |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
croak "first argument must be an URL or HTTP::Request instance" |
214
|
|
|
|
|
|
|
unless blessed $request and $request->isa('HTTP::Request'); |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
croak "please supply a directory to copy into" |
217
|
|
|
|
|
|
|
unless $content_dir; |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
carp "warning, path '$content_dir' already exists, we may overwrite content" |
220
|
|
|
|
|
|
|
if -e $content_dir; |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
# All seems in order, now proceed.... |
223
|
|
|
|
|
|
|
mkpath $content_dir; |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
# First get the main document |
227
|
|
|
|
|
|
|
my $file = File::Spec->catdir($content_dir, "index.html"); |
228
|
|
|
|
|
|
|
my $response = $ua->request($request, $file); |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
unless ($response and $response->is_success) |
231
|
|
|
|
|
|
|
{ |
232
|
|
|
|
|
|
|
croak "HTTP request failed: ". $response->status_line; |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
# If it's not HTML, we can't understand it, so just leave it |
236
|
|
|
|
|
|
|
# unchanged. |
237
|
|
|
|
|
|
|
return unless $response->content_type =~ /html$/; |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
# Otherwise, "localise" it.... |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
# This will parse the HTML so we can get the links |
243
|
|
|
|
|
|
|
my $parser = HTML::TreeBuilder::XPath->new; |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
# Get the encoding, if we can |
246
|
|
|
|
|
|
|
my $encoding = $response->encoding || _detect_encoding($file); |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
# HTML::Tidy does a better job of interpreting bad html than |
249
|
|
|
|
|
|
|
# HTML::TreeBuilder alone, so we pass it through that first. If |
250
|
|
|
|
|
|
|
# we don't, the resulting HTML obtained after HTML::TreeBuilder |
251
|
|
|
|
|
|
|
# has parsed it can be broken. |
252
|
|
|
|
|
|
|
{ |
253
|
|
|
|
|
|
|
my $tidy = HTML::Tidy->new(\%TIDY_OPTIONS); |
254
|
|
|
|
|
|
|
$tidy->ignore( text => qr/./ ); |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
my $content = _slurp($file, $encoding); |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
{ |
260
|
|
|
|
|
|
|
no warnings 'redefine'; |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
# HTML::Tidy insists on calling this function.... silence |
263
|
|
|
|
|
|
|
# it, locally |
264
|
|
|
|
|
|
|
local *Carp::carp = sub {}; |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
$content = $tidy->clean($content); |
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
$parser->parse($content); |
270
|
|
|
|
|
|
|
} |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
my %seen; # We store URLs we've already processed in here |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
# This will both download an URL's target and rewrite the URL to |
275
|
|
|
|
|
|
|
# point to the downloaded copy - here we refer to that process as |
276
|
|
|
|
|
|
|
# "localising" an url. |
277
|
|
|
|
|
|
|
my $localise_url = sub |
278
|
|
|
|
|
|
|
{ |
279
|
|
|
|
|
|
|
my $url = shift || croak "no url parameter supplied"; |
280
|
|
|
|
|
|
|
$url = URI->new_abs(decode_entities($url), $response->base) |
281
|
|
|
|
|
|
|
unless blessed $url; |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
my $local_url = $seen{$url}; |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
unless ($local_url) |
286
|
|
|
|
|
|
|
{ |
287
|
|
|
|
|
|
|
# FIXME check for inline URL images? (i.e. data:// urls) |
288
|
|
|
|
|
|
|
my ($ext) = $url->path =~ m![.]([^./]+)$!; |
289
|
|
|
|
|
|
|
my $index = keys(%seen)+1; |
290
|
|
|
|
|
|
|
my $filename = $index; |
291
|
|
|
|
|
|
|
$filename .= ".$ext" |
292
|
|
|
|
|
|
|
if defined $ext; |
293
|
|
|
|
|
|
|
my $file = File::Spec->catfile($content_dir, $filename); |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
# clean up things like '/../foo' which will cause an error |
297
|
|
|
|
|
|
|
# if passed to $ua->get |
298
|
|
|
|
|
|
|
my $url_path = File::Spec::Unix->canonpath($url->path); |
299
|
|
|
|
|
|
|
$url->path($url_path); |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
$local_url = $seen{$url} = $filename; |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
# print "downloading $url -> $file\n"; DEBUG |
304
|
|
|
|
|
|
|
my $response2 = $ua->get($url, ':content_file' => $file); |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
carp "failed to download $url: ". $response2->status_line |
307
|
|
|
|
|
|
|
unless $response2->is_success |
308
|
|
|
|
|
|
|
&& -f $file; |
309
|
|
|
|
|
|
|
} |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
return $local_url; |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
}; |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
# This will localise URLs in tag attributes |
316
|
|
|
|
|
|
|
my $process_attr = sub |
317
|
|
|
|
|
|
|
{ |
318
|
|
|
|
|
|
|
my ($attr) = @_; |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
my $url = $attr->getValue; |
321
|
|
|
|
|
|
|
return unless $url ne ""; |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
my $local_url = $localise_url->($url); |
324
|
|
|
|
|
|
|
# warn "url $url -> $local_url"; # DEBUG |
325
|
|
|
|
|
|
|
# rewrite the attribute |
326
|
|
|
|
|
|
|
$attr->getParentNode->attr($attr->getName, $local_url); |
327
|
|
|
|
|
|
|
}; |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
# This will localise a stylesheet link |
330
|
|
|
|
|
|
|
my $localise_style_url = sub |
331
|
|
|
|
|
|
|
{ |
332
|
|
|
|
|
|
|
# note, CSS defines URLs to be relative to the stylesheet. |
333
|
|
|
|
|
|
|
my $base = shift || croak "you must supply a base url"; |
334
|
|
|
|
|
|
|
my $url = URI->new_abs(shift, $base); |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
my $local_url = $localise_url->($url); |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
$local_url = encode_entities($local_url); |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
# warn "localising $url-> $local_url\n"; # DEBUG |
341
|
|
|
|
|
|
|
return "url($local_url)"; |
342
|
|
|
|
|
|
|
}; |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
my $process_stylesheet; # defined later |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
# This will localise a stylesheet @import link |
347
|
|
|
|
|
|
|
my $localise_import = sub |
348
|
|
|
|
|
|
|
{ |
349
|
|
|
|
|
|
|
my $base = shift; |
350
|
|
|
|
|
|
|
my $url = shift; |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
my $local_url = $localise_url->($url); |
353
|
|
|
|
|
|
|
my $stylesheet_file = File::Spec->catdir($content_dir, $local_url); |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
my $content = _slurp $stylesheet_file; |
356
|
|
|
|
|
|
|
$process_stylesheet->($base, $content); |
357
|
|
|
|
|
|
|
_spit $stylesheet_file, $content; |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
# Note, we don't convert the url, since that will be done later |
360
|
|
|
|
|
|
|
return "\@import url($url)"; |
361
|
|
|
|
|
|
|
}; |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
# This function will localise an entire stylesheet's links. It |
364
|
|
|
|
|
|
|
# returns the number of things downloaded. |
365
|
|
|
|
|
|
|
$process_stylesheet = sub |
366
|
|
|
|
|
|
|
{ |
367
|
|
|
|
|
|
|
my $base = shift || croak "you must supply a base url"; |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
# First, convert all '@import' statements to the '@import url()' form, |
370
|
|
|
|
|
|
|
# then localise all url() references. Return true if either has been applied. |
371
|
|
|
|
|
|
|
my @stylesheets = $_[0] =~ s/$IMPORT_RE/$localise_import->($base, $+)/ige; |
372
|
|
|
|
|
|
|
my @urls = $_[0] =~ s/$URL_RE/$localise_style_url->($base, $+)/ige; |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
return @stylesheets + @urls; |
375
|
|
|
|
|
|
|
}; |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
# This localises a |