line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package CSS::DOM::Parser; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
$VERSION = '0.15'; |
4
|
|
|
|
|
|
|
|
5
|
22
|
|
|
22
|
|
137
|
use strict; use warnings; no warnings qw 'utf8 parenthesis'; |
|
22
|
|
|
22
|
|
44
|
|
|
22
|
|
|
22
|
|
882
|
|
|
22
|
|
|
|
|
133
|
|
|
22
|
|
|
|
|
50
|
|
|
22
|
|
|
|
|
737
|
|
|
22
|
|
|
|
|
114
|
|
|
22
|
|
|
|
|
47
|
|
|
22
|
|
|
|
|
1068
|
|
6
|
22
|
|
|
22
|
|
159
|
use re 'taint'; |
|
22
|
|
|
|
|
44
|
|
|
22
|
|
|
|
|
1199
|
|
7
|
|
|
|
|
|
|
|
8
|
22
|
|
|
22
|
|
117
|
use Carp 1.01 qw 'shortmess croak'; |
|
22
|
|
|
|
|
764
|
|
|
22
|
|
|
|
|
1846
|
|
9
|
22
|
|
|
22
|
|
6550
|
use CSS::DOM; |
|
22
|
|
|
|
|
51
|
|
|
22
|
|
|
|
|
592
|
|
10
|
22
|
|
|
22
|
|
19110
|
use CSS::DOM::Rule::Style; |
|
22
|
|
|
|
|
71
|
|
|
22
|
|
|
|
|
1142
|
|
11
|
22
|
|
|
22
|
|
14547
|
use CSS::DOM::Style; |
|
22
|
|
|
|
|
60
|
|
|
22
|
|
|
|
|
764
|
|
12
|
22
|
|
|
22
|
|
147
|
use CSS::DOM::Util 'unescape'; |
|
22
|
|
|
|
|
45
|
|
|
22
|
|
|
|
|
13608
|
|
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
our @CARP_NOT = qw "CSS::DOM CSS::DOM::Rule::Media"; |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
# Tokeniser regexps |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
my $token_re; |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# This one has to be outside the scope, because we need it in tokenise. |
22
|
|
|
|
|
|
|
my $_optspace = qr/[ \t\r\n\f]*/; |
23
|
|
|
|
|
|
|
{ |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# Vars beginning with _ here are not token regexps, but are used to |
26
|
|
|
|
|
|
|
# build them. |
27
|
|
|
|
|
|
|
my $_escape =qr/\\(?:[0-9a-f]{1,6}(?:\r\n|[ \n\r\t\f])?|[^\n\r\f0-9a-f])/i; |
28
|
|
|
|
|
|
|
my $_id_start = qr/[_a-zA-Z]|[^\0-\177]|$_escape/; |
29
|
|
|
|
|
|
|
my $_id_cont = qr/[_a-zA-Z0-9-]|[^\0-\177]|$_escape/; |
30
|
|
|
|
|
|
|
my $_nl = qr/\r\n?|[\n\f]/; |
31
|
|
|
|
|
|
|
my $_invalid_qq = qr/"[^\n\r\f\\"]*(?:(?:\\$_nl|$_escape)[^\n\r\f\\"]*)*/; |
32
|
|
|
|
|
|
|
my $_invalid_q = qr/'[^\n\r\f\\']*(?:(?:\\$_nl|$_escape)[^\n\r\f\\']*)*/; |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
my $ident = qr/-?$_id_start$_id_cont*/; |
35
|
|
|
|
|
|
|
my $at = qr/\@$ident/; |
36
|
|
|
|
|
|
|
my $str = qr/$_invalid_qq(?:"|\z)|$_invalid_q(?:'|\z)/; |
37
|
|
|
|
|
|
|
my $invalid = qr/$_invalid_qq|$_invalid_q/; |
38
|
|
|
|
|
|
|
my $hash = qr/#$_id_cont+/; |
39
|
|
|
|
|
|
|
my $num = qr/(?=\.?[0-9])[0-9]*(?:\.[0-9]*)?/; |
40
|
|
|
|
|
|
|
my $percent = qr/$num%/; |
41
|
|
|
|
|
|
|
my $dim = qr/$num$ident/; |
42
|
|
|
|
|
|
|
my $url = qr/url\($_optspace(?: |
43
|
|
|
|
|
|
|
$str |
44
|
|
|
|
|
|
|
| |
45
|
|
|
|
|
|
|
[^\0- "'()\\\x7f]*(?:$_escape[^\0- "'()\\\x7f]*)* |
46
|
|
|
|
|
|
|
)$_optspace(?:\)|\z)/x; |
47
|
|
|
|
|
|
|
my $uni_range = qr/U\+[0-9A-F?]{1,6}(?:-[0-9a-f]{1,6})?/i; |
48
|
|
|
|
|
|
|
my $space = qr/(?:[ \t\r\n\f]+|\/\*.*?(?:\*\/|\z))[ \t\r\n\f]* |
49
|
|
|
|
|
|
|
(?:\/\*.*?(?:\*\/|\z)[ \t\r\n\f]*)*/xs; |
50
|
|
|
|
|
|
|
my $function = qr/$ident\(/; |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# Literal tokens are as follows: |
53
|
|
|
|
|
|
|
# <!-- --> ; { } ( ) [ ] ~= |= , : |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# The order of some tokens is important. $url, $uni_range and $function |
56
|
|
|
|
|
|
|
# have to come before $ident. $url has to come before $function. $percent |
57
|
|
|
|
|
|
|
# and $dim have to come before $num. |
58
|
|
|
|
|
|
|
$token_re = qr/\G(?: |
59
|
|
|
|
|
|
|
($url)|($uni_range)|($function)|($ident)|($at)|($str)|($invalid)| |
60
|
|
|
|
|
|
|
($hash)|($percent)|($dim)|($num)|(<!--|-->)|(;)|({)|(})|(\()|(\)) |
61
|
|
|
|
|
|
|
|(\[)|(])|($space)|(~=)|(\|=)|(,)|(:)|(.) |
62
|
|
|
|
|
|
|
)/xs; |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
} # end of tokeniser regexps |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
# tokenise returns a string of token types in addition to the array of |
67
|
|
|
|
|
|
|
# tokens so that we can apply grammar rules using regexps. The types are |
68
|
|
|
|
|
|
|
# as follows: |
69
|
|
|
|
|
|
|
# u url |
70
|
|
|
|
|
|
|
# U unicode range |
71
|
|
|
|
|
|
|
# f function |
72
|
|
|
|
|
|
|
# i identifier |
73
|
|
|
|
|
|
|
# @ at keyword |
74
|
|
|
|
|
|
|
# ' string |
75
|
|
|
|
|
|
|
# " invalid string (unterminated) |
76
|
|
|
|
|
|
|
# # hash |
77
|
|
|
|
|
|
|
# % percentage |
78
|
|
|
|
|
|
|
# D dimension |
79
|
|
|
|
|
|
|
# 1 number (not 0, because we want it true) |
80
|
|
|
|
|
|
|
# < html comment delimiter |
81
|
|
|
|
|
|
|
# s space/comments |
82
|
|
|
|
|
|
|
# ~ ~= |
83
|
|
|
|
|
|
|
# | |= |
84
|
|
|
|
|
|
|
# d delimiter (miscellaneous character) |
85
|
|
|
|
|
|
|
# The characters ;{}()[],: represent themselves. The comma and colon are |
86
|
|
|
|
|
|
|
# actually delimiters according to the CSS 2.1 spec, but itâs more conveni- |
87
|
|
|
|
|
|
|
# ent to have them as their own tokens. |
88
|
|
|
|
|
|
|
# ~~~ It might actually make the code cleaner if we make them all their own |
89
|
|
|
|
|
|
|
# tokens, in which case we can provide a $delim_re for matching against a |
90
|
|
|
|
|
|
|
# token type string. |
91
|
|
|
|
|
|
|
|
92
|
1991
|
50
|
|
1991
|
0
|
5857
|
sub tokenise { warn caller unless defined $_[0];for (''.shift) { |
|
1991
|
|
|
|
|
7073
|
|
93
|
1991
|
|
|
|
|
4473
|
my($tokens,@tokens)=''; |
94
|
1991
|
|
|
|
|
268078
|
while(/$token_re/gc){ |
95
|
6967
|
|
|
|
|
195981
|
my $which = (grep defined $+[$_], 1..$#+)[0]; |
96
|
22
|
|
|
22
|
|
143
|
no strict 'refs'; |
|
22
|
|
|
|
|
47
|
|
|
22
|
|
|
|
|
927
|
|
97
|
6967
|
|
|
|
|
38170
|
push @tokens, $$which; |
98
|
22
|
|
|
22
|
|
239
|
no warnings qw]qw]; |
|
22
|
|
|
|
|
52
|
|
|
22
|
|
|
|
|
12348
|
|
99
|
6967
|
|
|
|
|
15982
|
$tokens .= |
100
|
|
|
|
|
|
|
qw/u U f i @ ' " # % D 1 < ; { } ( ) [ ] s ~ | , : d/ |
101
|
|
|
|
|
|
|
[$which-1]; |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# We need to close unterminated tokens for the sake of |
104
|
|
|
|
|
|
|
# serialisation. If we donât, then too many other parts of |
105
|
|
|
|
|
|
|
# the code base have to deal with it. |
106
|
6967
|
100
|
|
|
|
104361
|
if($tokens =~ /'\z/) { |
|
|
100
|
|
|
|
|
|
107
|
127
|
100
|
66
|
|
|
1953
|
$tokens[-1] =~ /^(')[^'\\]*(?:\\.[^'\\]*)*\z |
108
|
|
|
|
|
|
|
| |
109
|
|
|
|
|
|
|
^(")[^"\\]*(?:\\.[^"\\]*)*\z/xs |
110
|
|
|
|
|
|
|
and $tokens[-1] .= $1 || $2; |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
elsif($tokens =~ /u\z/) { |
113
|
75
|
|
|
|
|
913
|
(my $copy = $tokens[-1]) =~ s/^url\($_optspace(?: |
114
|
|
|
|
|
|
|
(')[^'\\]*(?:\\.[^'\\]*)* |
115
|
|
|
|
|
|
|
| |
116
|
|
|
|
|
|
|
(")[^"\\]*(?:\\.[^"\\]*)* |
117
|
|
|
|
|
|
|
| |
118
|
|
|
|
|
|
|
[^)\\]*(?:\\.[^)\\]*)* |
119
|
|
|
|
|
|
|
)//sox; |
120
|
75
|
|
66
|
|
|
380
|
my $str_delim = $1||$2; |
121
|
75
|
100
|
100
|
|
|
362
|
$str_delim and $copy!~s/^['"]$_optspace//o |
122
|
|
|
|
|
|
|
and $tokens[-1] .= $str_delim; |
123
|
75
|
100
|
|
|
|
910
|
$copy or $tokens[-1] .= ')'; |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
} |
126
|
|
|
|
|
|
|
# This canât ever happen: |
127
|
1991
|
50
|
66
|
|
|
14499
|
pos and pos() < length |
128
|
|
|
|
|
|
|
and die "CSS::DOM::Parser internal error (please report this):" |
129
|
|
|
|
|
|
|
." Can't tokenise " .substr $_,pos; |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
# close bracketed constructs: again, we do this here so that other |
132
|
|
|
|
|
|
|
# pieces of code scattered all over the place (including the reg- |
133
|
|
|
|
|
|
|
# exps below, which would need things like â(?:\)|\z)â) |
134
|
|
|
|
|
|
|
# donât have to. |
135
|
1991
|
|
|
|
|
12735
|
my $brack_count = (()=$tokens=~/[(f]/g)-(()=$tokens=~/\)/g) |
136
|
|
|
|
|
|
|
+ (()=$tokens=~/\[/g)-(()=$tokens=~/]/g) |
137
|
|
|
|
|
|
|
+ (()=$tokens=~/{/g)-(()=$tokens=~/}/g); |
138
|
1991
|
|
|
|
|
4810
|
my $tokens_copy = reverse $tokens; |
139
|
1991
|
|
|
|
|
7457
|
for(1..$brack_count) { |
140
|
74
|
|
|
|
|
320
|
$tokens_copy =~ s/.*?([[{(f])//; |
141
|
74
|
100
|
|
|
|
1297
|
push @tokens, $1 eq'['?']':$1 eq'{'?'}':')'; |
|
|
100
|
|
|
|
|
|
142
|
74
|
|
|
|
|
217
|
$tokens .= $tokens[-1]; |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
|
145
|
1991
|
|
|
|
|
11488
|
return $tokens,\@tokens, ; |
146
|
|
|
|
|
|
|
}} |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
# Each statement is either an @ rule or a ruleset (style rule) |
149
|
|
|
|
|
|
|
# @ rule syntax is |
150
|
|
|
|
|
|
|
# @ s? any* followed by block or ; |
151
|
|
|
|
|
|
|
# A block is { s? (any|block|@ s?|; s?)* } s? |
152
|
|
|
|
|
|
|
# ruleset syntax is |
153
|
|
|
|
|
|
|
# any* { s? [d,:]? ident s? : s? (any|block|@ s?)+ |
154
|
|
|
|
|
|
|
# (; s? [d,:]? ident s? : s? (any|block|@ s?)+)* } s? |
155
|
|
|
|
|
|
|
# "any" means |
156
|
|
|
|
|
|
|
# ( [i1%D'd,:u#U~|] | f s? any* \) | \(s? any \) | \[ s? any \] ) s? |
157
|
|
|
|
|
|
|
# Thatâs the âfuture-compatibleâ CSS syntax. Below, we sift out the valid |
158
|
|
|
|
|
|
|
# CSS 2.1 rules to put them in the right classes. Everything else goes in |
159
|
|
|
|
|
|
|
# âUnknownâ. |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
# Methods beginning with _parse truncate the arguments (a string of token |
162
|
|
|
|
|
|
|
# types and an array ref of tokens) and return an object. Whatâs left of |
163
|
|
|
|
|
|
|
# the args is whatever couldnât be parsed. If the args were parsed in their |
164
|
|
|
|
|
|
|
# entirety, they end up blank. |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
our $any_re; our $block_re; |
167
|
22
|
|
|
22
|
|
142
|
no warnings 'regexp'; |
|
22
|
|
|
|
|
53
|
|
|
22
|
|
|
|
|
145062
|
|
168
|
|
|
|
|
|
|
# Although we include invalid strings (") in the $any_re, they are not |
169
|
|
|
|
|
|
|
# actually valid, but cause the enclosing property declaration or rule to |
170
|
|
|
|
|
|
|
# be ignored. |
171
|
|
|
|
|
|
|
$any_re = |
172
|
|
|
|
|
|
|
qr/(?: |
173
|
|
|
|
|
|
|
[i1%D'"d,:u#U~|] |
174
|
|
|
|
|
|
|
| |
175
|
|
|
|
|
|
|
[f(]s?(??{$any_re})*\) |
176
|
|
|
|
|
|
|
| |
177
|
|
|
|
|
|
|
\[s?(??{$any_re})*] |
178
|
|
|
|
|
|
|
)s?/x; |
179
|
|
|
|
|
|
|
$block_re = |
180
|
|
|
|
|
|
|
qr/{s?(?:(??{$any_re})|(??{$block_re})|[\@;]s?)*}s?/; |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
sub tokenise_value { # This is for ::Style to use. It dies if there are |
183
|
|
|
|
|
|
|
# tokens left over. |
184
|
1054
|
|
|
1054
|
0
|
3440
|
my ($types, $tokens) = tokenise($_[0]); |
185
|
1054
|
100
|
|
|
|
125957
|
$types =~ /^s?(?:$any_re|$block_re|\@s?)*\z/ or die |
186
|
|
|
|
|
|
|
"Invalid property value: $_[0]"; |
187
|
1053
|
|
|
|
|
11204
|
return $types, $tokens; |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
sub parse { # Donât shift $_[0] off @_. Weâd end up copying it if we did |
191
|
|
|
|
|
|
|
# that--something we ought to avoid, in case itâs huge. |
192
|
74
|
|
|
74
|
0
|
189
|
my $pos = pos $_[0]; |
193
|
74
|
|
|
|
|
406
|
my(%args) = @_[1..$#_]; |
194
|
74
|
|
|
|
|
148
|
my $src; |
195
|
74
|
100
|
|
|
|
394
|
if( $args{qw[encoding_hint decode][exists $args{decode}]} ) { |
196
|
22
|
|
|
|
|
74
|
$src = _decode(@_); |
197
|
22
|
50
|
|
|
|
29061
|
defined $src or shift, return new CSS::DOM @_; |
198
|
|
|
|
|
|
|
} |
199
|
74
|
100
|
|
|
|
408
|
my($types,$tokens,) = tokenise defined $src ? $src : $_[0]; |
200
|
74
|
|
|
|
|
664
|
my $sheet = new CSS::DOM @_[1..$#_]; |
201
|
74
|
|
|
|
|
342
|
my $stmts = $sheet->cssRules; |
202
|
74
|
|
|
|
|
252
|
eval { for($types) { |
|
74
|
|
|
|
|
199
|
|
203
|
74
|
|
|
|
|
224
|
while($_) { |
204
|
103
|
100
|
|
|
|
533
|
s/^([s<]+)// |
205
|
|
|
|
|
|
|
and splice @$tokens, 0, length $1; |
206
|
103
|
|
|
|
|
209
|
my $tokcount = @$tokens; |
207
|
103
|
100
|
|
|
|
674
|
if(/^@/) { |
208
|
44
|
|
|
|
|
167
|
push @$stmts, |
209
|
|
|
|
|
|
|
_parse_at_rule($_,$tokens,$sheet); |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
else { |
212
|
59
|
|
|
|
|
207
|
push @$stmts, _parse_ruleset( |
213
|
|
|
|
|
|
|
$_,$tokens,$sheet |
214
|
|
|
|
|
|
|
); |
215
|
|
|
|
|
|
|
} |
216
|
96
|
100
|
|
|
|
553
|
if($tokcount == @$tokens) { |
217
|
10
|
100
|
|
|
|
54
|
$types and _expected("rule",$tokens) |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
}}; |
221
|
74
|
|
|
|
|
251
|
pos $_[0] = $pos; |
222
|
74
|
|
|
|
|
810
|
return $sheet; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
sub parse_statement { |
226
|
177
|
|
|
177
|
0
|
419
|
my $pos = pos $_[0]; |
227
|
177
|
|
|
|
|
496
|
my($types,$tokens,) = tokenise $_[0]; |
228
|
177
|
|
|
|
|
323
|
my $stmt; |
229
|
177
|
|
|
|
|
334
|
eval{ for($types) { |
|
177
|
|
|
|
|
382
|
|
230
|
177
|
100
|
|
|
|
567
|
s/^s// |
231
|
|
|
|
|
|
|
and shift @$tokens; |
232
|
177
|
100
|
|
|
|
613
|
if(/^@/) { |
233
|
146
|
|
|
|
|
495
|
$stmt = _parse_at_rule($_,$tokens,$_[1]); |
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
else { |
236
|
|
|
|
|
|
|
#use DDS; Dump [$_,$tokens]; |
237
|
31
|
100
|
|
|
|
123
|
$stmt = _parse_ruleset( |
238
|
|
|
|
|
|
|
$_,$tokens,$_[1] |
239
|
|
|
|
|
|
|
) or last; |
240
|
|
|
|
|
|
|
# use DDS; Dump $stmt; |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
}}; |
243
|
177
|
|
|
|
|
537
|
pos $_[0] = $pos; |
244
|
177
|
100
|
|
|
|
2398
|
$@ = length $types ? shortmess "Invalid CSS statement" |
|
|
50
|
|
|
|
|
|
245
|
|
|
|
|
|
|
: '' |
246
|
|
|
|
|
|
|
unless $@; |
247
|
177
|
|
|
|
|
1088
|
return $stmt; |
248
|
|
|
|
|
|
|
} |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
sub parse_style_declaration { |
251
|
398
|
|
|
398
|
0
|
786
|
my $pos = pos $_[0]; |
252
|
|
|
|
|
|
|
#use DDS; Dump tokenise $_[0]; pos $_[0] = $pos; |
253
|
398
|
|
|
|
|
1403
|
my @tokens = tokenise $_[0]; |
254
|
398
|
100
|
|
|
|
1683
|
$tokens[0] =~ s/^s// and shift @{$tokens[1]}; |
|
2
|
|
|
|
|
5
|
|
255
|
398
|
100
|
66
|
|
|
1680
|
$@ = ( |
256
|
|
|
|
|
|
|
my $style = _parse_style_declaration( |
257
|
|
|
|
|
|
|
@tokens,undef,@_[1..$#_] |
258
|
|
|
|
|
|
|
) and!$tokens[0] |
259
|
|
|
|
|
|
|
) ? '' : shortmess 'Invalid style declaration'; |
260
|
398
|
|
|
|
|
1101
|
pos $_[0] = $pos; |
261
|
398
|
|
|
|
|
2430
|
return $style; |
262
|
|
|
|
|
|
|
} |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
# This one will die if it fails to match a rule. We only call it when we |
265
|
|
|
|
|
|
|
# are certain that we could only have an @ rule. |
266
|
|
|
|
|
|
|
# This accepts as an optional third arg the parent rule or stylesheet. |
267
|
190
|
|
|
190
|
|
449
|
sub _parse_at_rule { for (shift) { for my $tokens (shift) { |
|
190
|
|
|
|
|
411
|
|
268
|
190
|
|
|
|
|
848
|
my $unesc_at = lc unescape(my $at = shift @$tokens); |
269
|
190
|
|
|
|
|
309
|
my $type; |
270
|
190
|
|
|
|
|
1080
|
s/^@//; |
271
|
190
|
100
|
100
|
|
|
2937
|
if($unesc_at eq '@media' |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
272
|
|
|
|
|
|
|
&& s/^(s?is?(?:,s?is?)*\{)//) { |
273
|
|
|
|
|
|
|
# Thereâs a good chance |
274
|
|
|
|
|
|
|
# this is a @media rule, |
275
|
|
|
|
|
|
|
# but if what follows this |
276
|
|
|
|
|
|
|
# regexp match turns out |
277
|
|
|
|
|
|
|
# not to be a valid set of |
278
|
|
|
|
|
|
|
# rulesets, we have an |
279
|
|
|
|
|
|
|
# unknown rule. |
280
|
66
|
|
|
|
|
170
|
my $header = $1; |
281
|
66
|
|
|
|
|
317
|
my @header = splice @$tokens, |
282
|
|
|
|
|
|
|
0, |
283
|
|
|
|
|
|
|
length $1; |
284
|
|
|
|
|
|
|
# set aside all body tokens in case this turns out to be |
285
|
|
|
|
|
|
|
# an unknown rule |
286
|
66
|
|
|
|
|
135
|
my ($body,@body); |
287
|
66
|
50
|
|
|
|
2655
|
"{$_" =~ /^$block_re/ |
288
|
|
|
|
|
|
|
? ($body = substr($_,0,$+[0]-1), |
289
|
|
|
|
|
|
|
@body = @$tokens[0..$+[0]-2]) |
290
|
|
|
|
|
|
|
: croak "Invalid block in \@media rule"; |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
#use DDS; Dump $body, \@body; |
293
|
|
|
|
|
|
|
# We need to record the number of tokens we have now, so |
294
|
|
|
|
|
|
|
# that, if we revert to âunknownâ status, we can remove the |
295
|
|
|
|
|
|
|
# right number of tokens. |
296
|
66
|
|
|
|
|
183
|
my $tokens_to_begin_with = length; |
297
|
66
|
100
|
|
|
|
314
|
s/^s// and shift @$tokens; |
298
|
66
|
|
|
|
|
112
|
my @rulesets; |
299
|
66
|
|
|
|
|
191
|
while($_) { |
300
|
94
|
|
100
|
|
|
320
|
push @rulesets, _parse_ruleset ($_, $tokens)||last; |
301
|
|
|
|
|
|
|
} |
302
|
|
|
|
|
|
|
|
303
|
66
|
100
|
|
|
|
549
|
if(s/^}s?//) { |
304
|
49
|
|
|
|
|
158
|
splice @$tokens, 0, $+[0]; |
305
|
49
|
|
|
|
|
9427
|
require CSS::DOM::Rule::Media; |
306
|
49
|
|
66
|
|
|
406
|
my $rule = new CSS::DOM::Rule::Media $_[0]||(); |
307
|
49
|
|
|
|
|
83
|
@{$rule->cssRules} = @rulesets; |
|
49
|
|
|
|
|
221
|
|
308
|
|
|
|
|
|
|
$_->_set_parentRule($rule), |
309
|
|
|
|
|
|
|
$_[0] &&$_->_set_parentStyleSheet($_[0]) |
310
|
49
|
|
33
|
|
|
228
|
for @rulesets; |
311
|
49
|
|
|
|
|
192
|
my $media = $rule->media; |
312
|
49
|
|
|
|
|
278
|
while($header =~ /i/g) { |
313
|
82
|
|
|
|
|
406
|
push @$media, unescape($header[$-[0]]); |
314
|
|
|
|
|
|
|
} |
315
|
49
|
|
|
|
|
448
|
return $rule; |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
else { |
318
|
|
|
|
|
|
|
# ignore rules w/invalid strings |
319
|
17
|
50
|
|
|
|
72
|
$body =~ /"/ and return; |
320
|
|
|
|
|
|
|
|
321
|
17
|
|
|
|
|
33
|
my $length = $tokens_to_begin_with-length $body; |
322
|
17
|
100
|
|
|
|
51
|
$_ = $length ? substr $_, -$length : ''; |
323
|
17
|
|
|
|
|
77
|
@$tokens = @$tokens[-$length..-1]; |
324
|
|
|
|
|
|
|
|
325
|
17
|
100
|
|
|
|
75
|
$body =~ s/s\z// and pop @body; |
326
|
17
|
|
|
|
|
108
|
require CSS::DOM::Rule; |
327
|
17
|
|
33
|
|
|
118
|
(my $rule = new CSS::DOM::Rule $_[0]||()) |
328
|
|
|
|
|
|
|
->_set_tokens( |
329
|
|
|
|
|
|
|
"\@$header$body", |
330
|
|
|
|
|
|
|
[$at,@header,@body] |
331
|
|
|
|
|
|
|
); |
332
|
17
|
|
|
|
|
169
|
return $rule; |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
} |
335
|
|
|
|
|
|
|
elsif($unesc_at eq '@page' && s/^((?:s?:i)?)(s?{s?)// |
336
|
|
|
|
|
|
|
||$unesc_at eq '@font-face' && s/^()(s?{s?)// ) { |
337
|
22
|
|
|
|
|
81
|
my $selector = "\@$1"; |
338
|
22
|
|
|
|
|
104
|
my @selector = ('@page', splice @$tokens, 0, $+[1]); |
339
|
22
|
|
|
|
|
100
|
my @block_start = |
340
|
|
|
|
|
|
|
splice @$tokens, 0, length(my $block_start = $2); |
341
|
|
|
|
|
|
|
|
342
|
22
|
|
|
|
|
50
|
my $class = qw[FontFace Page][$unesc_at eq '@page']; |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
# Unfortunately, these two lines may turn out to |
345
|
|
|
|
|
|
|
# be a waste. |
346
|
22
|
|
|
|
|
16343
|
require "CSS/DOM/Rule/$class.pm"; |
347
|
22
|
|
66
|
|
|
238
|
my $style = ( |
348
|
|
|
|
|
|
|
my $rule = "CSS::DOM::Rule::$class"->new( |
349
|
|
|
|
|
|
|
$_[0]||() |
350
|
|
|
|
|
|
|
) |
351
|
|
|
|
|
|
|
) -> style; |
352
|
|
|
|
|
|
|
|
353
|
22
|
|
|
|
|
86
|
$style = _parse_style_declaration($_,$tokens,$style); |
354
|
22
|
100
|
|
|
|
73
|
if($style) { |
355
|
20
|
50
|
|
|
|
152
|
s/^}s?// and splice @$tokens, 0, $+[0]; # remove } |
356
|
20
|
100
|
|
|
|
122
|
$rule->selectorText(join '', @selector) |
357
|
|
|
|
|
|
|
if $class eq 'Page'; |
358
|
20
|
|
|
|
|
124
|
return $rule; |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
else { |
361
|
2
|
50
|
|
|
|
377
|
"{$_" =~ /^$block_re/ |
362
|
|
|
|
|
|
|
or croak "Invalid block in \@page rule"; |
363
|
0
|
|
|
|
|
0
|
$selector .= $block_start .substr($_,0,$+[0]-1,''), |
364
|
|
|
|
|
|
|
push @selector, @block_start , |
365
|
|
|
|
|
|
|
splice @$tokens, 0, $+[0]-1; |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
# ignore rules w/invalid strings |
368
|
0
|
0
|
|
|
|
0
|
$selector =~ /"/ and return; |
369
|
|
|
|
|
|
|
|
370
|
0
|
0
|
|
|
|
0
|
$selector =~ s/s\z// and pop @selector; |
371
|
|
|
|
|
|
|
|
372
|
0
|
|
|
|
|
0
|
require CSS'DOM'Rule; |
373
|
0
|
|
0
|
|
|
0
|
(my $rule = new CSS::DOM::Rule $_[0]||()) |
374
|
|
|
|
|
|
|
->_set_tokens( |
375
|
|
|
|
|
|
|
$selector,\@selector |
376
|
|
|
|
|
|
|
# not exactly a selector any more |
377
|
|
|
|
|
|
|
); |
378
|
0
|
|
|
|
|
0
|
return $rule; |
379
|
|
|
|
|
|
|
} |
380
|
|
|
|
|
|
|
} |
381
|
|
|
|
|
|
|
elsif($unesc_at eq '@import' |
382
|
|
|
|
|
|
|
&& s/^s?([u'])s?(is?(?:,s?is?)*)?(?:;s?|\z)//) { |
383
|
43
|
|
|
|
|
198
|
my($url_type,$media_token_types) = ($1,$2); |
384
|
43
|
|
|
|
|
144
|
my $url = $$tokens[$-[1]]; |
385
|
43
|
100
|
|
|
|
188
|
my @media_tokens = $2?@$tokens[$-[2]..$+[2]]:(); |
386
|
43
|
|
|
|
|
149
|
splice @$tokens, 0, $+[0]; |
387
|
43
|
|
|
|
|
6146
|
require CSS::DOM::Rule::Import; |
388
|
43
|
|
66
|
|
|
403
|
my $rule = new CSS::DOM::Rule::Import $_[0]||(); |
389
|
43
|
|
|
|
|
181
|
$rule->_set_url_token($url_type,$url); |
390
|
43
|
100
|
|
|
|
348
|
@media_tokens or return $rule; |
391
|
5
|
|
|
|
|
29
|
my $media = $rule->media; |
392
|
5
|
|
|
|
|
41
|
while($media_token_types =~ /i/g) { |
393
|
9
|
|
|
|
|
42
|
push @$media, unescape($media_tokens[$-[0]]); |
394
|
|
|
|
|
|
|
} |
395
|
5
|
|
|
|
|
39
|
return $rule; |
396
|
|
|
|
|
|
|
} |
397
|
|
|
|
|
|
|
elsif($at eq '@charset' # NOT $unesc_at! |
398
|
|
|
|
|
|
|
&& @$tokens >= 3 # @charset rule syntax |
399
|
|
|
|
|
|
|
&& $tokens->[0] eq ' ' # is stricter than the |
400
|
|
|
|
|
|
|
&& $tokens->[1] =~ /^"/ # tokenisation rules. |
401
|
|
|
|
|
|
|
&& s/^s';s?//) { |
402
|
22
|
|
|
|
|
51
|
my $esc_enc = $tokens->[1]; |
403
|
22
|
|
|
|
|
118
|
splice @$tokens, 0, $+[0]; |
404
|
22
|
|
|
|
|
3398
|
require CSS::DOM::Rule::Charset; |
405
|
22
|
|
66
|
|
|
212
|
my $rule = new CSS::DOM::Rule::Charset $_[0]||(); |
406
|
22
|
|
|
|
|
95
|
$rule->encoding(unescape(substr $esc_enc, 1,-1)); |
407
|
22
|
|
|
|
|
292
|
return $rule; |
408
|
|
|
|
|
|
|
} |
409
|
|
|
|
|
|
|
else { # unwist |
410
|
|
|
|
|
|
|
#warn $_; |
411
|
37
|
100
|
|
|
|
168
|
s/^(s?(??{$any_re})*(?:(??{$block_re})|(?:;s?|\z)))// |
412
|
|
|
|
|
|
|
or croak "Invalid $at rule"; |
413
|
32
|
|
|
|
|
291
|
my ($types,@tokens) = ("\@$1",$at,splice @$tokens,0,$+[0]); |
414
|
32
|
100
|
|
|
|
157
|
$types =~ /"/ and return; # ignore rules w/invalid strings |
415
|
31
|
100
|
|
|
|
123
|
$types =~ s/s\z// and pop @tokens; |
416
|
31
|
|
|
|
|
192
|
require CSS'DOM'Rule; |
417
|
31
|
|
66
|
|
|
208
|
(my $rule = new CSS::DOM::Rule $_[0]||()) |
418
|
|
|
|
|
|
|
->_set_tokens( |
419
|
|
|
|
|
|
|
$types, \@tokens |
420
|
|
|
|
|
|
|
); |
421
|
31
|
|
|
|
|
175
|
return $rule; |
422
|
|
|
|
|
|
|
} |
423
|
|
|
|
|
|
|
}}} |
424
|
|
|
|
|
|
|
|
425
|
184
|
|
|
184
|
|
424
|
sub _parse_ruleset { for (shift) { |
426
|
|
|
|
|
|
|
# Just return if there isnât a ruleset |
427
|
184
|
100
|
|
|
|
27264
|
s/(^($any_re*)\{s?(?:$any_re|$block_re|[\@;]s?)*}s?)//x |
428
|
|
|
|
|
|
|
or return; |
429
|
1
|
|
|
|
|
8
|
index $2,'"' =>== -1 or |
430
|
106
|
100
|
|
|
|
932
|
splice (@{+shift}, 0, $+[0]), return; |
431
|
|
|
|
|
|
|
|
432
|
105
|
|
|
|
|
955
|
for(my $x = $1) { |
433
|
105
|
|
|
|
|
175
|
my $tokens = [splice @{+shift}, 0, $+[0]]; |
|
105
|
|
|
|
|
771
|
|
434
|
|
|
|
|
|
|
|
435
|
105
|
|
66
|
|
|
2311
|
(my $ruleset = new CSS::DOM::Rule::Style $_[0]||()) |
436
|
|
|
|
|
|
|
->_set_selector_tokens(_parse_selector($_,$tokens)); |
437
|
|
|
|
|
|
|
|
438
|
105
|
50
|
|
|
|
839
|
s/^{s?// and splice @$tokens, 0, $+[0]; # remove { |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
#use DDS; Dump$_,$tokens; |
441
|
105
|
|
|
|
|
570
|
_parse_style_declaration($_,$tokens,$ruleset->style); |
442
|
|
|
|
|
|
|
|
443
|
105
|
50
|
|
|
|
812
|
s/^}s?// and splice @$tokens, 0, $+[0]; # remove } |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
|
446
|
105
|
|
|
|
|
700
|
return $ruleset |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
}}} |
449
|
|
|
|
|
|
|
|
450
|
105
|
|
|
105
|
|
277
|
sub _parse_selector { for (shift) { for my $tokens (shift) { |
|
105
|
|
|
|
|
220
|
|
451
|
105
|
|
|
|
|
254
|
my($selector,@selector) = ''; |
452
|
105
|
100
|
|
|
|
5914
|
if(s/^($any_re+)//) { |
453
|
78
|
|
|
|
|
194
|
$selector = $1; |
454
|
78
|
|
|
|
|
310
|
push @selector, splice @$tokens, 0, length $1; |
455
|
|
|
|
|
|
|
} |
456
|
105
|
100
|
|
|
|
666
|
$selector =~ s/s\z// and pop @selector; |
457
|
105
|
|
|
|
|
960
|
return $selector, \@selector; |
458
|
|
|
|
|
|
|
}}} |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
# This one takes optional extra args: |
461
|
|
|
|
|
|
|
# 2) the style decl object to add properties to |
462
|
|
|
|
|
|
|
# 3..) extra args to pass to the style objâs constructor if 2 is undef |
463
|
525
|
|
|
525
|
|
1604
|
sub _parse_style_declaration { for (shift) { for my $tokens (shift) { |
|
525
|
|
|
|
|
1037
|
|
464
|
|
|
|
|
|
|
# return if there isnât one |
465
|
525
|
100
|
|
|
|
55744
|
/^(?:$any_re|$block_re|[\@;]s?)*(?:}s?|\z)/x |
466
|
|
|
|
|
|
|
or return; |
467
|
|
|
|
|
|
|
|
468
|
523
|
|
66
|
|
|
5606
|
my $style = shift||new CSS::DOM::Style @_; |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
{ |
471
|
523
|
100
|
|
|
|
804
|
if(s/^is?:s?((?:$any_re|$block_re|\@s?)+)//) { |
|
687
|
100
|
|
|
|
61610
|
|
472
|
633
|
|
|
|
|
2773
|
my ($prop) = splice @$tokens, 0, $-[1]; |
473
|
633
|
|
|
|
|
2073
|
my $types = $1; |
474
|
633
|
|
|
|
|
1985
|
my @tokens = splice @$tokens, 0, length $1; |
475
|
633
|
100
|
|
|
|
2102
|
unless($types =~ /"/) { # ignore invalid strings |
476
|
631
|
100
|
|
|
|
1813
|
$types =~ s/s\z// and pop @tokens;; |
477
|
631
|
|
|
|
|
2236
|
$style->_set_property_tokens( |
478
|
|
|
|
|
|
|
unescape($prop),$types,\@tokens |
479
|
|
|
|
|
|
|
); |
480
|
|
|
|
|
|
|
} |
481
|
633
|
100
|
|
|
|
3830
|
s/^;s?// and splice(@$tokens, 0, $+[0]), redo; |
482
|
|
|
|
|
|
|
} |
483
|
|
|
|
|
|
|
elsif(s/^;s?//) { |
484
|
21
|
|
|
|
|
53
|
splice @$tokens, 0, $+[0]; redo; |
|
21
|
|
|
|
|
65
|
|
485
|
|
|
|
|
|
|
} |
486
|
|
|
|
|
|
|
else { |
487
|
|
|
|
|
|
|
# Ignorable declaration |
488
|
33
|
|
|
|
|
2575
|
s/^(?:$any_re|$block_re|\@s?)*//; |
489
|
33
|
|
|
|
|
188
|
splice @$tokens, 0, $+[0]; |
490
|
33
|
100
|
|
|
|
201
|
s/^;s?// and splice(@$tokens, 0, $+[0]), redo; |
491
|
|
|
|
|
|
|
} |
492
|
|
|
|
|
|
|
# else last |
493
|
|
|
|
|
|
|
} |
494
|
|
|
|
|
|
|
|
495
|
523
|
|
|
|
|
4675
|
return $style; |
496
|
|
|
|
|
|
|
}}} |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
sub _expected { |
499
|
9
|
|
|
9
|
|
24
|
my $tokens = pop; |
500
|
9
|
100
|
|
|
|
2325
|
croak |
|
|
100
|
|
|
|
|
|
501
|
|
|
|
|
|
|
"Syntax error: expected $_[0] but found '" |
502
|
|
|
|
|
|
|
.join('',@$tokens[ |
503
|
|
|
|
|
|
|
0..(10<$#$tokens?10 : $#$tokens) |
504
|
|
|
|
|
|
|
]) . ($#$tokens > 10 ? '...' : '') . "'"; |
505
|
|
|
|
|
|
|
} |
506
|
|
|
|
|
|
|
|
507
|
22
|
|
|
22
|
|
31
|
sub _decode { my $at; for(''.shift) { |
|
22
|
|
|
|
|
75
|
|
508
|
|
|
|
|
|
|
# ~~~ Some of this is repetitive and could probably be compressed. |
509
|
22
|
|
|
|
|
839569
|
require Encode; |
510
|
22
|
50
|
|
|
|
16177
|
if(/^(\xef\xbb\xbf(\@charset "(.*?)";))/s) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
511
|
0
|
|
|
|
|
0
|
my $enc = $3; |
512
|
0
|
|
|
|
|
0
|
my $dec = eval{Encode::decode($3, $1, 9)}; |
|
0
|
|
|
|
|
0
|
|
513
|
0
|
0
|
|
|
|
0
|
if(defined $dec) { |
514
|
0
|
0
|
|
|
|
0
|
$dec =~ /^(\x{feff}?)$2\z/ |
|
|
0
|
|
|
|
|
|
515
|
|
|
|
|
|
|
and return Encode::decode($enc, |
516
|
|
|
|
|
|
|
$1 ? substr $_, 3 : $_); |
517
|
0
|
0
|
|
|
|
0
|
$@ = $1?"Invalid BOM for $enc: \\xef\\xbb\\xbf" |
518
|
|
|
|
|
|
|
:"\"$enc\" is encoded in ASCII but is not" |
519
|
|
|
|
|
|
|
." ASCII-based"; |
520
|
|
|
|
|
|
|
} |
521
|
|
|
|
|
|
|
} |
522
|
|
|
|
|
|
|
elsif(/^\xef\xbb\xbf/) { |
523
|
2
|
|
|
|
|
14
|
return Encode::decode_utf8(substr $_,3); |
524
|
|
|
|
|
|
|
} |
525
|
|
|
|
|
|
|
elsif(/^(\@charset "(.*?)";)/s) { |
526
|
0
|
|
|
|
|
0
|
my $dec = eval{Encode::decode($2, $1, 9)}; |
|
0
|
|
|
|
|
0
|
|
527
|
0
|
0
|
|
|
|
0
|
if(defined $dec) { |
528
|
0
|
0
|
|
|
|
0
|
$dec eq $1 |
529
|
|
|
|
|
|
|
and return Encode::decode($2, $_); |
530
|
0
|
|
|
|
|
0
|
$@ = "\"$2\" is encoded in ASCII but is not " |
531
|
|
|
|
|
|
|
."ASCII-based"; |
532
|
|
|
|
|
|
|
} |
533
|
|
|
|
|
|
|
} |
534
|
|
|
|
|
|
|
elsif( |
535
|
|
|
|
|
|
|
/^(\xfe\xff(\0\@\0c\0h\0a\0r\0s\0e\0t\0 \0"((?:\0.)*?)\0"\0;))/s |
536
|
|
|
|
|
|
|
) { |
537
|
0
|
|
|
|
|
0
|
my $enc = Encode::decode('utf16be', $3); |
538
|
0
|
|
|
|
|
0
|
my $dec = eval{Encode::decode($enc, $1, 9)}; |
|
0
|
|
|
|
|
0
|
|
539
|
0
|
0
|
|
|
|
0
|
if(defined $dec) { |
540
|
0
|
0
|
|
|
|
0
|
$dec =~ /^(\x{feff}?)\@charset "$enc";\z/ |
|
|
0
|
|
|
|
|
|
541
|
|
|
|
|
|
|
and return Encode::decode($enc, |
542
|
|
|
|
|
|
|
$1 ? substr $_, 2 : $_); |
543
|
0
|
0
|
|
|
|
0
|
$@ = $1?"Invalid BOM for $enc: \\xfe\xff" |
544
|
|
|
|
|
|
|
:"\"$enc\" is encoded in UCS-2 but is not" |
545
|
|
|
|
|
|
|
." UCS-2-based"; |
546
|
|
|
|
|
|
|
} |
547
|
|
|
|
|
|
|
} |
548
|
|
|
|
|
|
|
elsif( |
549
|
|
|
|
|
|
|
/^(\0\@\0c\0h\0a\0r\0s\0e\0t\0 \0"((?:\0.)*?)\0"\0;)/s |
550
|
|
|
|
|
|
|
) { |
551
|
1
|
|
|
|
|
7
|
my $origenc = my $enc = Encode::decode('utf16be', $2); |
552
|
1
|
|
|
|
|
657
|
my $dec = eval{Encode::decode($enc, $1, 9)}; |
|
1
|
|
|
|
|
4
|
|
553
|
1
|
|
|
|
|
5
|
defined $dec or $dec |
554
|
1
|
50
|
|
|
|
40
|
= eval{Encode::decode($enc.='-be', $1, 9)}; |
555
|
1
|
50
|
|
|
|
609
|
if(defined $dec) { |
556
|
1
|
50
|
|
|
|
9
|
$dec eq "\@charset \"$origenc\";" |
557
|
|
|
|
|
|
|
and return Encode::decode($enc, $_); |
558
|
0
|
|
|
|
|
0
|
$@ ="\"$origenc\" is encoded in UCS-2 but is not " |
559
|
|
|
|
|
|
|
."UCS-2-based"; |
560
|
|
|
|
|
|
|
} |
561
|
|
|
|
|
|
|
} |
562
|
|
|
|
|
|
|
elsif( |
563
|
|
|
|
|
|
|
/^(\xff\xfe(\@\0c\0h\0a\0r\0s\0e\0t\0 \0"\0((?:.\0)*?)"\0;\0))/s |
564
|
|
|
|
|
|
|
) { |
565
|
1
|
|
|
|
|
35
|
my $enc = Encode::decode('utf16le', $3); |
566
|
1
|
|
|
|
|
689
|
my $dec = eval{Encode::decode($enc, $1, 9)}; |
|
1
|
|
|
|
|
6
|
|
567
|
1
|
50
|
|
|
|
50
|
if(defined $dec) { |
568
|
1
|
50
|
|
|
|
45
|
$dec =~ /^(\x{feff}?)\@charset "$enc";\z/ |
|
|
50
|
|
|
|
|
|
569
|
|
|
|
|
|
|
and return Encode::decode($enc, |
570
|
|
|
|
|
|
|
$1 ? substr $_, 2 : $_); |
571
|
0
|
0
|
|
|
|
0
|
$@ = $1?"Invalid BOM for $enc: \\xfe\xff" |
572
|
|
|
|
|
|
|
:"\"$enc\" is encoded in UCS-2-LE but is not" |
573
|
|
|
|
|
|
|
." UCS-2-LE-based"; |
574
|
|
|
|
|
|
|
} |
575
|
|
|
|
|
|
|
} |
576
|
|
|
|
|
|
|
elsif( |
577
|
|
|
|
|
|
|
/^(\@\0c\0h\0a\0r\0s\0e\0t\0 \0"\0((?:.\0)*?)"\0;\0)/s |
578
|
|
|
|
|
|
|
) { |
579
|
1
|
|
|
|
|
5
|
my $origenc = my $enc = Encode::decode('utf16le', $2); |
580
|
1
|
|
|
|
|
46
|
my $dec = eval{Encode::decode($enc, $1, 9)}; |
|
1
|
|
|
|
|
8
|
|
581
|
1
|
|
|
|
|
5
|
defined $dec or $dec |
582
|
1
|
50
|
|
|
|
51
|
= eval{Encode::decode($enc.='-le', $1, 9)}; |
583
|
1
|
50
|
|
|
|
3251
|
if(defined $dec) { |
584
|
1
|
50
|
|
|
|
9
|
$dec eq "\@charset \"$origenc\";" |
585
|
|
|
|
|
|
|
and return Encode::decode($enc, $_); |
586
|
0
|
|
|
|
|
0
|
$@ ="\"$enc\" is encoded in UCS-2-LE but is not " |
587
|
|
|
|
|
|
|
."UCS-2-LE-based"; |
588
|
|
|
|
|
|
|
} |
589
|
|
|
|
|
|
|
} |
590
|
|
|
|
|
|
|
elsif( |
591
|
|
|
|
|
|
|
/^(\0\0\xfe\xff(\0{3}\@\0{3}c\0{3}h\0{3}a\0{3}r\0{3}s\0{3}e\0{3}t |
592
|
|
|
|
|
|
|
\0{3}\ \0{3}"((?:\0{3}.)*?)\0{3}"\0{3};))/sx |
593
|
|
|
|
|
|
|
) { |
594
|
1
|
|
|
|
|
6
|
my $enc = Encode::decode('utf32be', $3); |
595
|
1
|
|
|
|
|
685
|
my $dec = eval{Encode::decode($enc, $1, 9)}; |
|
1
|
|
|
|
|
4
|
|
596
|
1
|
50
|
|
|
|
628
|
if(defined $dec) { |
597
|
1
|
50
|
|
|
|
43
|
$dec =~ /^(\x{feff}?)\@charset "$enc";\z/ |
|
|
50
|
|
|
|
|
|
598
|
|
|
|
|
|
|
and return Encode::decode($enc, |
599
|
|
|
|
|
|
|
$1 ? substr $_, 2 : $_); |
600
|
0
|
0
|
|
|
|
0
|
$@ = $1?"Invalid BOM for $enc: \\xfe\xff" |
601
|
|
|
|
|
|
|
:"\"$enc\" is encoded in UTF-32-BE but is not" |
602
|
|
|
|
|
|
|
." UTF-32-BE-based"; |
603
|
|
|
|
|
|
|
} |
604
|
|
|
|
|
|
|
} |
605
|
|
|
|
|
|
|
elsif( |
606
|
|
|
|
|
|
|
/^(\0{3}\@\0{3}c\0{3}h\0{3}a\0{3}r\0{3}s\0{3}e\0{3}t |
607
|
|
|
|
|
|
|
\0{3}\ \0{3}"((?:\0{3}.)*?)\0{3}"\0{3};)/sx |
608
|
|
|
|
|
|
|
) { |
609
|
1
|
|
|
|
|
5
|
my $origenc = my $enc = Encode::decode('utf32be', $2); |
610
|
1
|
|
|
|
|
50
|
my $dec = eval{Encode::decode($enc, $1, 9)}; |
|
1
|
|
|
|
|
4
|
|
611
|
1
|
|
|
|
|
6
|
defined $dec or $dec |
612
|
1
|
50
|
|
|
|
51
|
= eval{Encode::decode($enc.='-be', $1, 9)}; |
613
|
1
|
50
|
|
|
|
923
|
if(defined $dec) { |
614
|
1
|
50
|
|
|
|
9
|
$dec eq "\@charset \"$origenc\";" |
615
|
|
|
|
|
|
|
and return Encode::decode($enc, $_); |
616
|
0
|
|
|
|
|
0
|
$@ ="\"$enc\" is encoded in UTF-32-BE but is not " |
617
|
|
|
|
|
|
|
."UTF-32-BE-based"; |
618
|
|
|
|
|
|
|
} |
619
|
|
|
|
|
|
|
} |
620
|
|
|
|
|
|
|
elsif( |
621
|
|
|
|
|
|
|
/^(\xff\xfe\0\0(\@\0{3}c\0{3}h\0{3}a\0{3}r\0{3}s\0{3}e\0{3}t |
622
|
|
|
|
|
|
|
\0{3}\ \0{3}"\0{3}((?:.\0{3})*?)"\0{3};\0{3}))/sx |
623
|
|
|
|
|
|
|
) { |
624
|
1
|
|
|
|
|
5
|
my $enc = Encode::decode('utf32le', $3); |
625
|
1
|
|
|
|
|
689
|
my $dec = eval{Encode::decode($enc, $1, 9)}; |
|
1
|
|
|
|
|
4
|
|
626
|
1
|
50
|
|
|
|
46
|
if(defined $dec) { |
627
|
1
|
50
|
|
|
|
37
|
$dec =~ /^(\x{feff}?)\@charset "$enc";\z/ |
|
|
50
|
|
|
|
|
|
628
|
|
|
|
|
|
|
and return Encode::decode($enc, |
629
|
|
|
|
|
|
|
$1 ? substr $_, 2 : $_); |
630
|
0
|
0
|
|
|
|
0
|
$@ = $1?"Invalid BOM for $enc: \\xfe\xff" |
631
|
|
|
|
|
|
|
:"\"$enc\" is encoded in UTF-32-LE but is not" |
632
|
|
|
|
|
|
|
." UTF-32-LE-based"; |
633
|
|
|
|
|
|
|
} |
634
|
|
|
|
|
|
|
} |
635
|
|
|
|
|
|
|
elsif( |
636
|
|
|
|
|
|
|
/^(\@\0{3}c\0{3}h\0{3}a\0{3}r\0{3}s\0{3}e\0{3}t |
637
|
|
|
|
|
|
|
\0{3}\ \0{3}"\0{3}((?:.\0{3})*?)"\0{3};\0{3})/sx |
638
|
|
|
|
|
|
|
) { |
639
|
1
|
|
|
|
|
5
|
my $origenc = my $enc = Encode::decode('utf32le', $2); |
640
|
1
|
|
|
|
|
44
|
my $dec = eval{Encode::decode($enc, $1, 9)}; |
|
1
|
|
|
|
|
4
|
|
641
|
1
|
|
|
|
|
7
|
defined $dec or $dec |
642
|
1
|
50
|
|
|
|
45
|
= eval{Encode::decode($enc.='-le', $1, 9)}; |
643
|
1
|
50
|
|
|
|
722
|
if(defined $dec) { |
644
|
1
|
50
|
|
|
|
8
|
$dec eq "\@charset \"$origenc\";" |
645
|
|
|
|
|
|
|
and return Encode::decode($enc, $_); |
646
|
0
|
|
|
|
|
0
|
$@ ="\"$enc\" is encoded in UTF-32-LE but is not " |
647
|
|
|
|
|
|
|
."UTF-32-LE-based"; |
648
|
|
|
|
|
|
|
} |
649
|
|
|
|
|
|
|
} |
650
|
|
|
|
|
|
|
elsif(/^(?:\0\0\xfe\xff|\xff\xfe\0\0)/) { |
651
|
2
|
|
|
|
|
8
|
return Encode::decode('utf32', $_); |
652
|
|
|
|
|
|
|
} |
653
|
|
|
|
|
|
|
elsif(/^(?:\xfe\xff|\xff\xfe)/) { |
654
|
3
|
|
|
|
|
12
|
return Encode::decode('utf16', $_); |
655
|
|
|
|
|
|
|
} |
656
|
|
|
|
|
|
|
elsif( |
657
|
|
|
|
|
|
|
/^(\|\x83\x88\x81\x99\xa2\x85\xa3\@\x7f(.*?)\x7f\^)/s |
658
|
|
|
|
|
|
|
) { |
659
|
2
|
|
|
|
|
9
|
my $enc = Encode::decode('cp37', $2); |
660
|
2
|
|
|
|
|
5281
|
my $dec = eval{Encode::decode($enc, $1, 9)}; |
|
2
|
|
|
|
|
7
|
|
661
|
2
|
50
|
|
|
|
67
|
if(defined $dec) { |
662
|
2
|
50
|
|
|
|
15
|
$dec eq "\@charset \"$enc\";" |
663
|
|
|
|
|
|
|
and return Encode::decode($enc, $_); |
664
|
0
|
|
|
|
|
0
|
$@ ="\"$enc\" is encoded in EBCDIC but is not " |
665
|
|
|
|
|
|
|
."EBCDIC-based"; |
666
|
|
|
|
|
|
|
} |
667
|
|
|
|
|
|
|
} |
668
|
|
|
|
|
|
|
elsif( |
669
|
|
|
|
|
|
|
/^(\xae\x83\x88\x81\x99\xa2\x85\xa3\@\xfc(.*?)\xfc\^)/s |
670
|
|
|
|
|
|
|
) { |
671
|
1
|
|
|
|
|
4
|
my $enc = Encode::decode('cp1026', $2); |
672
|
1
|
|
|
|
|
28
|
my $dec = eval{Encode::decode($enc, $1, 9)}; |
|
1
|
|
|
|
|
5
|
|
673
|
1
|
50
|
|
|
|
28
|
if(defined $dec) { |
674
|
1
|
50
|
|
|
|
10
|
$dec eq "\@charset \"$enc\";" |
675
|
|
|
|
|
|
|
and return Encode::decode($enc, $_); |
676
|
0
|
|
|
|
|
0
|
$@ ="\"$enc\" is encoded in IBM1026 but is not " |
677
|
|
|
|
|
|
|
."IBM1026-based"; |
678
|
|
|
|
|
|
|
} |
679
|
|
|
|
|
|
|
} |
680
|
|
|
|
|
|
|
elsif( |
681
|
|
|
|
|
|
|
/^(\0charset "(.*?)";)/s |
682
|
|
|
|
|
|
|
) { |
683
|
1
|
|
|
|
|
4
|
my $enc = Encode::decode('gsm0338', $2); |
684
|
1
|
|
|
|
|
6513
|
my $dec = eval{Encode::decode($enc, $1, 9)}; |
|
1
|
|
|
|
|
6
|
|
685
|
1
|
50
|
|
|
|
143
|
if(defined $dec) { |
686
|
1
|
50
|
|
|
|
10
|
$dec eq "\@charset \"$enc\";" |
687
|
|
|
|
|
|
|
and return Encode::decode($enc, $_); |
688
|
0
|
|
|
|
|
0
|
$@ ="\"$enc\" is encoded in GSM 0338 but is not " |
689
|
|
|
|
|
|
|
."GSM 0338-based"; |
690
|
|
|
|
|
|
|
} |
691
|
|
|
|
|
|
|
} |
692
|
|
|
|
|
|
|
else { |
693
|
4
|
|
|
|
|
16
|
my %args = @_; |
694
|
4
|
|
100
|
|
|
34
|
return Encode::decode($args{encoding_hint}||'utf8', $_); |
695
|
|
|
|
|
|
|
} |
696
|
0
|
|
|
|
|
|
return; |
697
|
|
|
|
|
|
|
}} |
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
**__END__** |
700
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
=head1 NAME |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
CSS::DOM::Parser - Parser for CSS::DOM |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
=head1 VERSION |
706
|
|
|
|
|
|
|
|
707
|
|
|
|
|
|
|
Version 0.15 |
708
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
=head1 DESCRIPTION |
710
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
This is a private module (at least for now). Don't use it directly. |
712
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
=head1 SEE ALSO |
714
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
L<CSS::DOM> |