| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package CSS::DOM::Parser; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
$VERSION = '0.15'; |
|
4
|
|
|
|
|
|
|
|
|
5
|
22
|
|
|
22
|
|
137
|
use strict; use warnings; no warnings qw 'utf8 parenthesis'; |
|
|
22
|
|
|
22
|
|
44
|
|
|
|
22
|
|
|
22
|
|
882
|
|
|
|
22
|
|
|
|
|
133
|
|
|
|
22
|
|
|
|
|
50
|
|
|
|
22
|
|
|
|
|
737
|
|
|
|
22
|
|
|
|
|
114
|
|
|
|
22
|
|
|
|
|
47
|
|
|
|
22
|
|
|
|
|
1068
|
|
|
6
|
22
|
|
|
22
|
|
159
|
use re 'taint'; |
|
|
22
|
|
|
|
|
44
|
|
|
|
22
|
|
|
|
|
1199
|
|
|
7
|
|
|
|
|
|
|
|
|
8
|
22
|
|
|
22
|
|
117
|
use Carp 1.01 qw 'shortmess croak'; |
|
|
22
|
|
|
|
|
764
|
|
|
|
22
|
|
|
|
|
1846
|
|
|
9
|
22
|
|
|
22
|
|
6550
|
use CSS::DOM; |
|
|
22
|
|
|
|
|
51
|
|
|
|
22
|
|
|
|
|
592
|
|
|
10
|
22
|
|
|
22
|
|
19110
|
use CSS::DOM::Rule::Style; |
|
|
22
|
|
|
|
|
71
|
|
|
|
22
|
|
|
|
|
1142
|
|
|
11
|
22
|
|
|
22
|
|
14547
|
use CSS::DOM::Style; |
|
|
22
|
|
|
|
|
60
|
|
|
|
22
|
|
|
|
|
764
|
|
|
12
|
22
|
|
|
22
|
|
147
|
use CSS::DOM::Util 'unescape'; |
|
|
22
|
|
|
|
|
45
|
|
|
|
22
|
|
|
|
|
13608
|
|
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
our @CARP_NOT = qw "CSS::DOM CSS::DOM::Rule::Media"; |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
# Tokeniser regexps |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
my $token_re; |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# This one has to be outside the scope, because we need it in tokenise. |
|
22
|
|
|
|
|
|
|
my $_optspace = qr/[ \t\r\n\f]*/; |
|
23
|
|
|
|
|
|
|
{ |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# Vars beginning with _ here are not token regexps, but are used to |
|
26
|
|
|
|
|
|
|
# build them. |
|
27
|
|
|
|
|
|
|
my $_escape =qr/\\(?:[0-9a-f]{1,6}(?:\r\n|[ \n\r\t\f])?|[^\n\r\f0-9a-f])/i; |
|
28
|
|
|
|
|
|
|
my $_id_start = qr/[_a-zA-Z]|[^\0-\177]|$_escape/; |
|
29
|
|
|
|
|
|
|
my $_id_cont = qr/[_a-zA-Z0-9-]|[^\0-\177]|$_escape/; |
|
30
|
|
|
|
|
|
|
my $_nl = qr/\r\n?|[\n\f]/; |
|
31
|
|
|
|
|
|
|
my $_invalid_qq = qr/"[^\n\r\f\\"]*(?:(?:\\$_nl|$_escape)[^\n\r\f\\"]*)*/; |
|
32
|
|
|
|
|
|
|
my $_invalid_q = qr/'[^\n\r\f\\']*(?:(?:\\$_nl|$_escape)[^\n\r\f\\']*)*/; |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
my $ident = qr/-?$_id_start$_id_cont*/; |
|
35
|
|
|
|
|
|
|
my $at = qr/\@$ident/; |
|
36
|
|
|
|
|
|
|
my $str = qr/$_invalid_qq(?:"|\z)|$_invalid_q(?:'|\z)/; |
|
37
|
|
|
|
|
|
|
my $invalid = qr/$_invalid_qq|$_invalid_q/; |
|
38
|
|
|
|
|
|
|
my $hash = qr/#$_id_cont+/; |
|
39
|
|
|
|
|
|
|
my $num = qr/(?=\.?[0-9])[0-9]*(?:\.[0-9]*)?/; |
|
40
|
|
|
|
|
|
|
my $percent = qr/$num%/; |
|
41
|
|
|
|
|
|
|
my $dim = qr/$num$ident/; |
|
42
|
|
|
|
|
|
|
my $url = qr/url\($_optspace(?: |
|
43
|
|
|
|
|
|
|
$str |
|
44
|
|
|
|
|
|
|
| |
|
45
|
|
|
|
|
|
|
[^\0- "'()\\\x7f]*(?:$_escape[^\0- "'()\\\x7f]*)* |
|
46
|
|
|
|
|
|
|
)$_optspace(?:\)|\z)/x; |
|
47
|
|
|
|
|
|
|
my $uni_range = qr/U\+[0-9A-F?]{1,6}(?:-[0-9a-f]{1,6})?/i; |
|
48
|
|
|
|
|
|
|
my $space = qr/(?:[ \t\r\n\f]+|\/\*.*?(?:\*\/|\z))[ \t\r\n\f]* |
|
49
|
|
|
|
|
|
|
(?:\/\*.*?(?:\*\/|\z)[ \t\r\n\f]*)*/xs; |
|
50
|
|
|
|
|
|
|
my $function = qr/$ident\(/; |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# Literal tokens are as follows: |
|
53
|
|
|
|
|
|
|
# <!-- --> ; { } ( ) [ ] ~= |= , : |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# The order of some tokens is important. $url, $uni_range and $function |
|
56
|
|
|
|
|
|
|
# have to come before $ident. $url has to come before $function. $percent |
|
57
|
|
|
|
|
|
|
# and $dim have to come before $num. |
|
58
|
|
|
|
|
|
|
$token_re = qr/\G(?: |
|
59
|
|
|
|
|
|
|
($url)|($uni_range)|($function)|($ident)|($at)|($str)|($invalid)| |
|
60
|
|
|
|
|
|
|
($hash)|($percent)|($dim)|($num)|(<!--|-->)|(;)|({)|(})|(\()|(\)) |
|
61
|
|
|
|
|
|
|
|(\[)|(])|($space)|(~=)|(\|=)|(,)|(:)|(.) |
|
62
|
|
|
|
|
|
|
)/xs; |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
} # end of tokeniser regexps |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
# tokenise returns a string of token types in addition to the array of |
|
67
|
|
|
|
|
|
|
# tokens so that we can apply grammar rules using regexps. The types are |
|
68
|
|
|
|
|
|
|
# as follows: |
|
69
|
|
|
|
|
|
|
# u url |
|
70
|
|
|
|
|
|
|
# U unicode range |
|
71
|
|
|
|
|
|
|
# f function |
|
72
|
|
|
|
|
|
|
# i identifier |
|
73
|
|
|
|
|
|
|
# @ at keyword |
|
74
|
|
|
|
|
|
|
# ' string |
|
75
|
|
|
|
|
|
|
# " invalid string (unterminated) |
|
76
|
|
|
|
|
|
|
# # hash |
|
77
|
|
|
|
|
|
|
# % percentage |
|
78
|
|
|
|
|
|
|
# D dimension |
|
79
|
|
|
|
|
|
|
# 1 number (not 0, because we want it true) |
|
80
|
|
|
|
|
|
|
# < html comment delimiter |
|
81
|
|
|
|
|
|
|
# s space/comments |
|
82
|
|
|
|
|
|
|
# ~ ~= |
|
83
|
|
|
|
|
|
|
# | |= |
|
84
|
|
|
|
|
|
|
# d delimiter (miscellaneous character) |
|
85
|
|
|
|
|
|
|
# The characters ;{}()[],: represent themselves. The comma and colon are |
|
86
|
|
|
|
|
|
|
# actually delimiters according to the CSS 2.1 spec, but itâs more conveni- |
|
87
|
|
|
|
|
|
|
# ent to have them as their own tokens. |
|
88
|
|
|
|
|
|
|
# ~~~ It might actually make the code cleaner if we make them all their own |
|
89
|
|
|
|
|
|
|
# tokens, in which case we can provide a $delim_re for matching against a |
|
90
|
|
|
|
|
|
|
# token type string. |
|
91
|
|
|
|
|
|
|
|
|
92
|
1991
|
50
|
|
1991
|
0
|
5857
|
sub tokenise { warn caller unless defined $_[0];for (''.shift) { |
|
|
1991
|
|
|
|
|
7073
|
|
|
93
|
1991
|
|
|
|
|
4473
|
my($tokens,@tokens)=''; |
|
94
|
1991
|
|
|
|
|
268078
|
while(/$token_re/gc){ |
|
95
|
6967
|
|
|
|
|
195981
|
my $which = (grep defined $+[$_], 1..$#+)[0]; |
|
96
|
22
|
|
|
22
|
|
143
|
no strict 'refs'; |
|
|
22
|
|
|
|
|
47
|
|
|
|
22
|
|
|
|
|
927
|
|
|
97
|
6967
|
|
|
|
|
38170
|
push @tokens, $$which; |
|
98
|
22
|
|
|
22
|
|
239
|
no warnings qw]qw]; |
|
|
22
|
|
|
|
|
52
|
|
|
|
22
|
|
|
|
|
12348
|
|
|
99
|
6967
|
|
|
|
|
15982
|
$tokens .= |
|
100
|
|
|
|
|
|
|
qw/u U f i @ ' " # % D 1 < ; { } ( ) [ ] s ~ | , : d/ |
|
101
|
|
|
|
|
|
|
[$which-1]; |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# We need to close unterminated tokens for the sake of |
|
104
|
|
|
|
|
|
|
# serialisation. If we donât, then too many other parts of |
|
105
|
|
|
|
|
|
|
# the code base have to deal with it. |
|
106
|
6967
|
100
|
|
|
|
104361
|
if($tokens =~ /'\z/) { |
|
|
|
100
|
|
|
|
|
|
|
107
|
127
|
100
|
66
|
|
|
1953
|
$tokens[-1] =~ /^(')[^'\\]*(?:\\.[^'\\]*)*\z |
|
108
|
|
|
|
|
|
|
| |
|
109
|
|
|
|
|
|
|
^(")[^"\\]*(?:\\.[^"\\]*)*\z/xs |
|
110
|
|
|
|
|
|
|
and $tokens[-1] .= $1 || $2; |
|
111
|
|
|
|
|
|
|
} |
|
112
|
|
|
|
|
|
|
elsif($tokens =~ /u\z/) { |
|
113
|
75
|
|
|
|
|
913
|
(my $copy = $tokens[-1]) =~ s/^url\($_optspace(?: |
|
114
|
|
|
|
|
|
|
(')[^'\\]*(?:\\.[^'\\]*)* |
|
115
|
|
|
|
|
|
|
| |
|
116
|
|
|
|
|
|
|
(")[^"\\]*(?:\\.[^"\\]*)* |
|
117
|
|
|
|
|
|
|
| |
|
118
|
|
|
|
|
|
|
[^)\\]*(?:\\.[^)\\]*)* |
|
119
|
|
|
|
|
|
|
)//sox; |
|
120
|
75
|
|
66
|
|
|
380
|
my $str_delim = $1||$2; |
|
121
|
75
|
100
|
100
|
|
|
362
|
$str_delim and $copy!~s/^['"]$_optspace//o |
|
122
|
|
|
|
|
|
|
and $tokens[-1] .= $str_delim; |
|
123
|
75
|
100
|
|
|
|
910
|
$copy or $tokens[-1] .= ')'; |
|
124
|
|
|
|
|
|
|
} |
|
125
|
|
|
|
|
|
|
} |
|
126
|
|
|
|
|
|
|
# This canât ever happen: |
|
127
|
1991
|
50
|
66
|
|
|
14499
|
pos and pos() < length |
|
128
|
|
|
|
|
|
|
and die "CSS::DOM::Parser internal error (please report this):" |
|
129
|
|
|
|
|
|
|
." Can't tokenise " .substr $_,pos; |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
# close bracketed constructs: again, we do this here so that other |
|
132
|
|
|
|
|
|
|
# pieces of code scattered all over the place (including the reg- |
|
133
|
|
|
|
|
|
|
# exps below, which would need things like â(?:\)|\z)â) |
|
134
|
|
|
|
|
|
|
# donât have to. |
|
135
|
1991
|
|
|
|
|
12735
|
my $brack_count = (()=$tokens=~/[(f]/g)-(()=$tokens=~/\)/g) |
|
136
|
|
|
|
|
|
|
+ (()=$tokens=~/\[/g)-(()=$tokens=~/]/g) |
|
137
|
|
|
|
|
|
|
+ (()=$tokens=~/{/g)-(()=$tokens=~/}/g); |
|
138
|
1991
|
|
|
|
|
4810
|
my $tokens_copy = reverse $tokens; |
|
139
|
1991
|
|
|
|
|
7457
|
for(1..$brack_count) { |
|
140
|
74
|
|
|
|
|
320
|
$tokens_copy =~ s/.*?([[{(f])//; |
|
141
|
74
|
100
|
|
|
|
1297
|
push @tokens, $1 eq'['?']':$1 eq'{'?'}':')'; |
|
|
|
100
|
|
|
|
|
|
|
142
|
74
|
|
|
|
|
217
|
$tokens .= $tokens[-1]; |
|
143
|
|
|
|
|
|
|
} |
|
144
|
|
|
|
|
|
|
|
|
145
|
1991
|
|
|
|
|
11488
|
return $tokens,\@tokens, ; |
|
146
|
|
|
|
|
|
|
}} |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
# Each statement is either an @ rule or a ruleset (style rule) |
|
149
|
|
|
|
|
|
|
# @ rule syntax is |
|
150
|
|
|
|
|
|
|
# @ s? any* followed by block or ; |
|
151
|
|
|
|
|
|
|
# A block is { s? (any|block|@ s?|; s?)* } s? |
|
152
|
|
|
|
|
|
|
# ruleset syntax is |
|
153
|
|
|
|
|
|
|
# any* { s? [d,:]? ident s? : s? (any|block|@ s?)+ |
|
154
|
|
|
|
|
|
|
# (; s? [d,:]? ident s? : s? (any|block|@ s?)+)* } s? |
|
155
|
|
|
|
|
|
|
# "any" means |
|
156
|
|
|
|
|
|
|
# ( [i1%D'd,:u#U~|] | f s? any* \) | \(s? any \) | \[ s? any \] ) s? |
|
157
|
|
|
|
|
|
|
# Thatâs the âfuture-compatibleâ CSS syntax. Below, we sift out the valid |
|
158
|
|
|
|
|
|
|
# CSS 2.1 rules to put them in the right classes. Everything else goes in |
|
159
|
|
|
|
|
|
|
# âUnknownâ. |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
# Methods beginning with _parse truncate the arguments (a string of token |
|
162
|
|
|
|
|
|
|
# types and an array ref of tokens) and return an object. Whatâs left of |
|
163
|
|
|
|
|
|
|
# the args is whatever couldnât be parsed. If the args were parsed in their |
|
164
|
|
|
|
|
|
|
# entirety, they end up blank. |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
our $any_re; our $block_re; |
|
167
|
22
|
|
|
22
|
|
142
|
no warnings 'regexp'; |
|
|
22
|
|
|
|
|
53
|
|
|
|
22
|
|
|
|
|
145062
|
|
|
168
|
|
|
|
|
|
|
# Although we include invalid strings (") in the $any_re, they are not |
|
169
|
|
|
|
|
|
|
# actually valid, but cause the enclosing property declaration or rule to |
|
170
|
|
|
|
|
|
|
# be ignored. |
|
171
|
|
|
|
|
|
|
$any_re = |
|
172
|
|
|
|
|
|
|
qr/(?: |
|
173
|
|
|
|
|
|
|
[i1%D'"d,:u#U~|] |
|
174
|
|
|
|
|
|
|
| |
|
175
|
|
|
|
|
|
|
[f(]s?(??{$any_re})*\) |
|
176
|
|
|
|
|
|
|
| |
|
177
|
|
|
|
|
|
|
\[s?(??{$any_re})*] |
|
178
|
|
|
|
|
|
|
)s?/x; |
|
179
|
|
|
|
|
|
|
$block_re = |
|
180
|
|
|
|
|
|
|
qr/{s?(?:(??{$any_re})|(??{$block_re})|[\@;]s?)*}s?/; |
|
181
|
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
sub tokenise_value { # This is for ::Style to use. It dies if there are |
|
183
|
|
|
|
|
|
|
# tokens left over. |
|
184
|
1054
|
|
|
1054
|
0
|
3440
|
my ($types, $tokens) = tokenise($_[0]); |
|
185
|
1054
|
100
|
|
|
|
125957
|
$types =~ /^s?(?:$any_re|$block_re|\@s?)*\z/ or die |
|
186
|
|
|
|
|
|
|
"Invalid property value: $_[0]"; |
|
187
|
1053
|
|
|
|
|
11204
|
return $types, $tokens; |
|
188
|
|
|
|
|
|
|
} |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
sub parse { # Donât shift $_[0] off @_. Weâd end up copying it if we did |
|
191
|
|
|
|
|
|
|
# that--something we ought to avoid, in case itâs huge. |
|
192
|
74
|
|
|
74
|
0
|
189
|
my $pos = pos $_[0]; |
|
193
|
74
|
|
|
|
|
406
|
my(%args) = @_[1..$#_]; |
|
194
|
74
|
|
|
|
|
148
|
my $src; |
|
195
|
74
|
100
|
|
|
|
394
|
if( $args{qw[encoding_hint decode][exists $args{decode}]} ) { |
|
196
|
22
|
|
|
|
|
74
|
$src = _decode(@_); |
|
197
|
22
|
50
|
|
|
|
29061
|
defined $src or shift, return new CSS::DOM @_; |
|
198
|
|
|
|
|
|
|
} |
|
199
|
74
|
100
|
|
|
|
408
|
my($types,$tokens,) = tokenise defined $src ? $src : $_[0]; |
|
200
|
74
|
|
|
|
|
664
|
my $sheet = new CSS::DOM @_[1..$#_]; |
|
201
|
74
|
|
|
|
|
342
|
my $stmts = $sheet->cssRules; |
|
202
|
74
|
|
|
|
|
252
|
eval { for($types) { |
|
|
74
|
|
|
|
|
199
|
|
|
203
|
74
|
|
|
|
|
224
|
while($_) { |
|
204
|
103
|
100
|
|
|
|
533
|
s/^([s<]+)// |
|
205
|
|
|
|
|
|
|
and splice @$tokens, 0, length $1; |
|
206
|
103
|
|
|
|
|
209
|
my $tokcount = @$tokens; |
|
207
|
103
|
100
|
|
|
|
674
|
if(/^@/) { |
|
208
|
44
|
|
|
|
|
167
|
push @$stmts, |
|
209
|
|
|
|
|
|
|
_parse_at_rule($_,$tokens,$sheet); |
|
210
|
|
|
|
|
|
|
} |
|
211
|
|
|
|
|
|
|
else { |
|
212
|
59
|
|
|
|
|
207
|
push @$stmts, _parse_ruleset( |
|
213
|
|
|
|
|
|
|
$_,$tokens,$sheet |
|
214
|
|
|
|
|
|
|
); |
|
215
|
|
|
|
|
|
|
} |
|
216
|
96
|
100
|
|
|
|
553
|
if($tokcount == @$tokens) { |
|
217
|
10
|
100
|
|
|
|
54
|
$types and _expected("rule",$tokens) |
|
218
|
|
|
|
|
|
|
} |
|
219
|
|
|
|
|
|
|
} |
|
220
|
|
|
|
|
|
|
}}; |
|
221
|
74
|
|
|
|
|
251
|
pos $_[0] = $pos; |
|
222
|
74
|
|
|
|
|
810
|
return $sheet; |
|
223
|
|
|
|
|
|
|
} |
|
224
|
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
sub parse_statement { |
|
226
|
177
|
|
|
177
|
0
|
419
|
my $pos = pos $_[0]; |
|
227
|
177
|
|
|
|
|
496
|
my($types,$tokens,) = tokenise $_[0]; |
|
228
|
177
|
|
|
|
|
323
|
my $stmt; |
|
229
|
177
|
|
|
|
|
334
|
eval{ for($types) { |
|
|
177
|
|
|
|
|
382
|
|
|
230
|
177
|
100
|
|
|
|
567
|
s/^s// |
|
231
|
|
|
|
|
|
|
and shift @$tokens; |
|
232
|
177
|
100
|
|
|
|
613
|
if(/^@/) { |
|
233
|
146
|
|
|
|
|
495
|
$stmt = _parse_at_rule($_,$tokens,$_[1]); |
|
234
|
|
|
|
|
|
|
} |
|
235
|
|
|
|
|
|
|
else { |
|
236
|
|
|
|
|
|
|
#use DDS; Dump [$_,$tokens]; |
|
237
|
31
|
100
|
|
|
|
123
|
$stmt = _parse_ruleset( |
|
238
|
|
|
|
|
|
|
$_,$tokens,$_[1] |
|
239
|
|
|
|
|
|
|
) or last; |
|
240
|
|
|
|
|
|
|
# use DDS; Dump $stmt; |
|
241
|
|
|
|
|
|
|
} |
|
242
|
|
|
|
|
|
|
}}; |
|
243
|
177
|
|
|
|
|
537
|
pos $_[0] = $pos; |
|
244
|
177
|
100
|
|
|
|
2398
|
$@ = length $types ? shortmess "Invalid CSS statement" |
|
|
|
50
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
: '' |
|
246
|
|
|
|
|
|
|
unless $@; |
|
247
|
177
|
|
|
|
|
1088
|
return $stmt; |
|
248
|
|
|
|
|
|
|
} |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
sub parse_style_declaration { |
|
251
|
398
|
|
|
398
|
0
|
786
|
my $pos = pos $_[0]; |
|
252
|
|
|
|
|
|
|
#use DDS; Dump tokenise $_[0]; pos $_[0] = $pos; |
|
253
|
398
|
|
|
|
|
1403
|
my @tokens = tokenise $_[0]; |
|
254
|
398
|
100
|
|
|
|
1683
|
$tokens[0] =~ s/^s// and shift @{$tokens[1]}; |
|
|
2
|
|
|
|
|
5
|
|
|
255
|
398
|
100
|
66
|
|
|
1680
|
$@ = ( |
|
256
|
|
|
|
|
|
|
my $style = _parse_style_declaration( |
|
257
|
|
|
|
|
|
|
@tokens,undef,@_[1..$#_] |
|
258
|
|
|
|
|
|
|
) and!$tokens[0] |
|
259
|
|
|
|
|
|
|
) ? '' : shortmess 'Invalid style declaration'; |
|
260
|
398
|
|
|
|
|
1101
|
pos $_[0] = $pos; |
|
261
|
398
|
|
|
|
|
2430
|
return $style; |
|
262
|
|
|
|
|
|
|
} |
|
263
|
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
# This one will die if it fails to match a rule. We only call it when we |
|
265
|
|
|
|
|
|
|
# are certain that we could only have an @ rule. |
|
266
|
|
|
|
|
|
|
# This accepts as an optional third arg the parent rule or stylesheet. |
|
267
|
190
|
|
|
190
|
|
449
|
sub _parse_at_rule { for (shift) { for my $tokens (shift) { |
|
|
190
|
|
|
|
|
411
|
|
|
268
|
190
|
|
|
|
|
848
|
my $unesc_at = lc unescape(my $at = shift @$tokens); |
|
269
|
190
|
|
|
|
|
309
|
my $type; |
|
270
|
190
|
|
|
|
|
1080
|
s/^@//; |
|
271
|
190
|
100
|
100
|
|
|
2937
|
if($unesc_at eq '@media' |
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
272
|
|
|
|
|
|
|
&& s/^(s?is?(?:,s?is?)*\{)//) { |
|
273
|
|
|
|
|
|
|
# Thereâs a good chance |
|
274
|
|
|
|
|
|
|
# this is a @media rule, |
|
275
|
|
|
|
|
|
|
# but if what follows this |
|
276
|
|
|
|
|
|
|
# regexp match turns out |
|
277
|
|
|
|
|
|
|
# not to be a valid set of |
|
278
|
|
|
|
|
|
|
# rulesets, we have an |
|
279
|
|
|
|
|
|
|
# unknown rule. |
|
280
|
66
|
|
|
|
|
170
|
my $header = $1; |
|
281
|
66
|
|
|
|
|
317
|
my @header = splice @$tokens, |
|
282
|
|
|
|
|
|
|
0, |
|
283
|
|
|
|
|
|
|
length $1; |
|
284
|
|
|
|
|
|
|
# set aside all body tokens in case this turns out to be |
|
285
|
|
|
|
|
|
|
# an unknown rule |
|
286
|
66
|
|
|
|
|
135
|
my ($body,@body); |
|
287
|
66
|
50
|
|
|
|
2655
|
"{$_" =~ /^$block_re/ |
|
288
|
|
|
|
|
|
|
? ($body = substr($_,0,$+[0]-1), |
|
289
|
|
|
|
|
|
|
@body = @$tokens[0..$+[0]-2]) |
|
290
|
|
|
|
|
|
|
: croak "Invalid block in \@media rule"; |
|
291
|
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
#use DDS; Dump $body, \@body; |
|
293
|
|
|
|
|
|
|
# We need to record the number of tokens we have now, so |
|
294
|
|
|
|
|
|
|
# that, if we revert to âunknownâ status, we can remove the |
|
295
|
|
|
|
|
|
|
# right number of tokens. |
|
296
|
66
|
|
|
|
|
183
|
my $tokens_to_begin_with = length; |
|
297
|
66
|
100
|
|
|
|
314
|
s/^s// and shift @$tokens; |
|
298
|
66
|
|
|
|
|
112
|
my @rulesets; |
|
299
|
66
|
|
|
|
|
191
|
while($_) { |
|
300
|
94
|
|
100
|
|
|
320
|
push @rulesets, _parse_ruleset ($_, $tokens)||last; |
|
301
|
|
|
|
|
|
|
} |
|
302
|
|
|
|
|
|
|
|
|
303
|
66
|
100
|
|
|
|
549
|
if(s/^}s?//) { |
|
304
|
49
|
|
|
|
|
158
|
splice @$tokens, 0, $+[0]; |
|
305
|
49
|
|
|
|
|
9427
|
require CSS::DOM::Rule::Media; |
|
306
|
49
|
|
66
|
|
|
406
|
my $rule = new CSS::DOM::Rule::Media $_[0]||(); |
|
307
|
49
|
|
|
|
|
83
|
@{$rule->cssRules} = @rulesets; |
|
|
49
|
|
|
|
|
221
|
|
|
308
|
|
|
|
|
|
|
$_->_set_parentRule($rule), |
|
309
|
|
|
|
|
|
|
$_[0] &&$_->_set_parentStyleSheet($_[0]) |
|
310
|
49
|
|
33
|
|
|
228
|
for @rulesets; |
|
311
|
49
|
|
|
|
|
192
|
my $media = $rule->media; |
|
312
|
49
|
|
|
|
|
278
|
while($header =~ /i/g) { |
|
313
|
82
|
|
|
|
|
406
|
push @$media, unescape($header[$-[0]]); |
|
314
|
|
|
|
|
|
|
} |
|
315
|
49
|
|
|
|
|
448
|
return $rule; |
|
316
|
|
|
|
|
|
|
} |
|
317
|
|
|
|
|
|
|
else { |
|
318
|
|
|
|
|
|
|
# ignore rules w/invalid strings |
|
319
|
17
|
50
|
|
|
|
72
|
$body =~ /"/ and return; |
|
320
|
|
|
|
|
|
|
|
|
321
|
17
|
|
|
|
|
33
|
my $length = $tokens_to_begin_with-length $body; |
|
322
|
17
|
100
|
|
|
|
51
|
$_ = $length ? substr $_, -$length : ''; |
|
323
|
17
|
|
|
|
|
77
|
@$tokens = @$tokens[-$length..-1]; |
|
324
|
|
|
|
|
|
|
|
|
325
|
17
|
100
|
|
|
|
75
|
$body =~ s/s\z// and pop @body; |
|
326
|
17
|
|
|
|
|
108
|
require CSS::DOM::Rule; |
|
327
|
17
|
|
33
|
|
|
118
|
(my $rule = new CSS::DOM::Rule $_[0]||()) |
|
328
|
|
|
|
|
|
|
->_set_tokens( |
|
329
|
|
|
|
|
|
|
"\@$header$body", |
|
330
|
|
|
|
|
|
|
[$at,@header,@body] |
|
331
|
|
|
|
|
|
|
); |
|
332
|
17
|
|
|
|
|
169
|
return $rule; |
|
333
|
|
|
|
|
|
|
} |
|
334
|
|
|
|
|
|
|
} |
|
335
|
|
|
|
|
|
|
elsif($unesc_at eq '@page' && s/^((?:s?:i)?)(s?{s?)// |
|
336
|
|
|
|
|
|
|
||$unesc_at eq '@font-face' && s/^()(s?{s?)// ) { |
|
337
|
22
|
|
|
|
|
81
|
my $selector = "\@$1"; |
|
338
|
22
|
|
|
|
|
104
|
my @selector = ('@page', splice @$tokens, 0, $+[1]); |
|
339
|
22
|
|
|
|
|
100
|
my @block_start = |
|
340
|
|
|
|
|
|
|
splice @$tokens, 0, length(my $block_start = $2); |
|
341
|
|
|
|
|
|
|
|
|
342
|
22
|
|
|
|
|
50
|
my $class = qw[FontFace Page][$unesc_at eq '@page']; |
|
343
|
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
# Unfortunately, these two lines may turn out to |
|
345
|
|
|
|
|
|
|
# be a waste. |
|
346
|
22
|
|
|
|
|
16343
|
require "CSS/DOM/Rule/$class.pm"; |
|
347
|
22
|
|
66
|
|
|
238
|
my $style = ( |
|
348
|
|
|
|
|
|
|
my $rule = "CSS::DOM::Rule::$class"->new( |
|
349
|
|
|
|
|
|
|
$_[0]||() |
|
350
|
|
|
|
|
|
|
) |
|
351
|
|
|
|
|
|
|
) -> style; |
|
352
|
|
|
|
|
|
|
|
|
353
|
22
|
|
|
|
|
86
|
$style = _parse_style_declaration($_,$tokens,$style); |
|
354
|
22
|
100
|
|
|
|
73
|
if($style) { |
|
355
|
20
|
50
|
|
|
|
152
|
s/^}s?// and splice @$tokens, 0, $+[0]; # remove } |
|
356
|
20
|
100
|
|
|
|
122
|
$rule->selectorText(join '', @selector) |
|
357
|
|
|
|
|
|
|
if $class eq 'Page'; |
|
358
|
20
|
|
|
|
|
124
|
return $rule; |
|
359
|
|
|
|
|
|
|
} |
|
360
|
|
|
|
|
|
|
else { |
|
361
|
2
|
50
|
|
|
|
377
|
"{$_" =~ /^$block_re/ |
|
362
|
|
|
|
|
|
|
or croak "Invalid block in \@page rule"; |
|
363
|
0
|
|
|
|
|
0
|
$selector .= $block_start .substr($_,0,$+[0]-1,''), |
|
364
|
|
|
|
|
|
|
push @selector, @block_start , |
|
365
|
|
|
|
|
|
|
splice @$tokens, 0, $+[0]-1; |
|
366
|
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
# ignore rules w/invalid strings |
|
368
|
0
|
0
|
|
|
|
0
|
$selector =~ /"/ and return; |
|
369
|
|
|
|
|
|
|
|
|
370
|
0
|
0
|
|
|
|
0
|
$selector =~ s/s\z// and pop @selector; |
|
371
|
|
|
|
|
|
|
|
|
372
|
0
|
|
|
|
|
0
|
require CSS'DOM'Rule; |
|
373
|
0
|
|
0
|
|
|
0
|
(my $rule = new CSS::DOM::Rule $_[0]||()) |
|
374
|
|
|
|
|
|
|
->_set_tokens( |
|
375
|
|
|
|
|
|
|
$selector,\@selector |
|
376
|
|
|
|
|
|
|
# not exactly a selector any more |
|
377
|
|
|
|
|
|
|
); |
|
378
|
0
|
|
|
|
|
0
|
return $rule; |
|
379
|
|
|
|
|
|
|
} |
|
380
|
|
|
|
|
|
|
} |
|
381
|
|
|
|
|
|
|
elsif($unesc_at eq '@import' |
|
382
|
|
|
|
|
|
|
&& s/^s?([u'])s?(is?(?:,s?is?)*)?(?:;s?|\z)//) { |
|
383
|
43
|
|
|
|
|
198
|
my($url_type,$media_token_types) = ($1,$2); |
|
384
|
43
|
|
|
|
|
144
|
my $url = $$tokens[$-[1]]; |
|
385
|
43
|
100
|
|
|
|
188
|
my @media_tokens = $2?@$tokens[$-[2]..$+[2]]:(); |
|
386
|
43
|
|
|
|
|
149
|
splice @$tokens, 0, $+[0]; |
|
387
|
43
|
|
|
|
|
6146
|
require CSS::DOM::Rule::Import; |
|
388
|
43
|
|
66
|
|
|
403
|
my $rule = new CSS::DOM::Rule::Import $_[0]||(); |
|
389
|
43
|
|
|
|
|
181
|
$rule->_set_url_token($url_type,$url); |
|
390
|
43
|
100
|
|
|
|
348
|
@media_tokens or return $rule; |
|
391
|
5
|
|
|
|
|
29
|
my $media = $rule->media; |
|
392
|
5
|
|
|
|
|
41
|
while($media_token_types =~ /i/g) { |
|
393
|
9
|
|
|
|
|
42
|
push @$media, unescape($media_tokens[$-[0]]); |
|
394
|
|
|
|
|
|
|
} |
|
395
|
5
|
|
|
|
|
39
|
return $rule; |
|
396
|
|
|
|
|
|
|
} |
|
397
|
|
|
|
|
|
|
elsif($at eq '@charset' # NOT $unesc_at! |
|
398
|
|
|
|
|
|
|
&& @$tokens >= 3 # @charset rule syntax |
|
399
|
|
|
|
|
|
|
&& $tokens->[0] eq ' ' # is stricter than the |
|
400
|
|
|
|
|
|
|
&& $tokens->[1] =~ /^"/ # tokenisation rules. |
|
401
|
|
|
|
|
|
|
&& s/^s';s?//) { |
|
402
|
22
|
|
|
|
|
51
|
my $esc_enc = $tokens->[1]; |
|
403
|
22
|
|
|
|
|
118
|
splice @$tokens, 0, $+[0]; |
|
404
|
22
|
|
|
|
|
3398
|
require CSS::DOM::Rule::Charset; |
|
405
|
22
|
|
66
|
|
|
212
|
my $rule = new CSS::DOM::Rule::Charset $_[0]||(); |
|
406
|
22
|
|
|
|
|
95
|
$rule->encoding(unescape(substr $esc_enc, 1,-1)); |
|
407
|
22
|
|
|
|
|
292
|
return $rule; |
|
408
|
|
|
|
|
|
|
} |
|
409
|
|
|
|
|
|
|
else { # unwist |
|
410
|
|
|
|
|
|
|
#warn $_; |
|
411
|
37
|
100
|
|
|
|
168
|
s/^(s?(??{$any_re})*(?:(??{$block_re})|(?:;s?|\z)))// |
|
412
|
|
|
|
|
|
|
or croak "Invalid $at rule"; |
|
413
|
32
|
|
|
|
|
291
|
my ($types,@tokens) = ("\@$1",$at,splice @$tokens,0,$+[0]); |
|
414
|
32
|
100
|
|
|
|
157
|
$types =~ /"/ and return; # ignore rules w/invalid strings |
|
415
|
31
|
100
|
|
|
|
123
|
$types =~ s/s\z// and pop @tokens; |
|
416
|
31
|
|
|
|
|
192
|
require CSS'DOM'Rule; |
|
417
|
31
|
|
66
|
|
|
208
|
(my $rule = new CSS::DOM::Rule $_[0]||()) |
|
418
|
|
|
|
|
|
|
->_set_tokens( |
|
419
|
|
|
|
|
|
|
$types, \@tokens |
|
420
|
|
|
|
|
|
|
); |
|
421
|
31
|
|
|
|
|
175
|
return $rule; |
|
422
|
|
|
|
|
|
|
} |
|
423
|
|
|
|
|
|
|
}}} |
|
424
|
|
|
|
|
|
|
|
|
425
|
184
|
|
|
184
|
|
424
|
sub _parse_ruleset { for (shift) { |
|
426
|
|
|
|
|
|
|
# Just return if there isnât a ruleset |
|
427
|
184
|
100
|
|
|
|
27264
|
s/(^($any_re*)\{s?(?:$any_re|$block_re|[\@;]s?)*}s?)//x |
|
428
|
|
|
|
|
|
|
or return; |
|
429
|
1
|
|
|
|
|
8
|
index $2,'"' =>== -1 or |
|
430
|
106
|
100
|
|
|
|
932
|
splice (@{+shift}, 0, $+[0]), return; |
|
431
|
|
|
|
|
|
|
|
|
432
|
105
|
|
|
|
|
955
|
for(my $x = $1) { |
|
433
|
105
|
|
|
|
|
175
|
my $tokens = [splice @{+shift}, 0, $+[0]]; |
|
|
105
|
|
|
|
|
771
|
|
|
434
|
|
|
|
|
|
|
|
|
435
|
105
|
|
66
|
|
|
2311
|
(my $ruleset = new CSS::DOM::Rule::Style $_[0]||()) |
|
436
|
|
|
|
|
|
|
->_set_selector_tokens(_parse_selector($_,$tokens)); |
|
437
|
|
|
|
|
|
|
|
|
438
|
105
|
50
|
|
|
|
839
|
s/^{s?// and splice @$tokens, 0, $+[0]; # remove { |
|
439
|
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
#use DDS; Dump$_,$tokens; |
|
441
|
105
|
|
|
|
|
570
|
_parse_style_declaration($_,$tokens,$ruleset->style); |
|
442
|
|
|
|
|
|
|
|
|
443
|
105
|
50
|
|
|
|
812
|
s/^}s?// and splice @$tokens, 0, $+[0]; # remove } |
|
444
|
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
|
|
446
|
105
|
|
|
|
|
700
|
return $ruleset |
|
447
|
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
}}} |
|
449
|
|
|
|
|
|
|
|
|
450
|
105
|
|
|
105
|
|
277
|
sub _parse_selector { for (shift) { for my $tokens (shift) { |
|
|
105
|
|
|
|
|
220
|
|
|
451
|
105
|
|
|
|
|
254
|
my($selector,@selector) = ''; |
|
452
|
105
|
100
|
|
|
|
5914
|
if(s/^($any_re+)//) { |
|
453
|
78
|
|
|
|
|
194
|
$selector = $1; |
|
454
|
78
|
|
|
|
|
310
|
push @selector, splice @$tokens, 0, length $1; |
|
455
|
|
|
|
|
|
|
} |
|
456
|
105
|
100
|
|
|
|
666
|
$selector =~ s/s\z// and pop @selector; |
|
457
|
105
|
|
|
|
|
960
|
return $selector, \@selector; |
|
458
|
|
|
|
|
|
|
}}} |
|
459
|
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
# This one takes optional extra args: |
|
461
|
|
|
|
|
|
|
# 2) the style decl object to add properties to |
|
462
|
|
|
|
|
|
|
# 3..) extra args to pass to the style objâs constructor if 2 is undef |
|
463
|
525
|
|
|
525
|
|
1604
|
sub _parse_style_declaration { for (shift) { for my $tokens (shift) { |
|
|
525
|
|
|
|
|
1037
|
|
|
464
|
|
|
|
|
|
|
# return if there isnât one |
|
465
|
525
|
100
|
|
|
|
55744
|
/^(?:$any_re|$block_re|[\@;]s?)*(?:}s?|\z)/x |
|
466
|
|
|
|
|
|
|
or return; |
|
467
|
|
|
|
|
|
|
|
|
468
|
523
|
|
66
|
|
|
5606
|
my $style = shift||new CSS::DOM::Style @_; |
|
469
|
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
{ |
|
471
|
523
|
100
|
|
|
|
804
|
if(s/^is?:s?((?:$any_re|$block_re|\@s?)+)//) { |
|
|
687
|
100
|
|
|
|
61610
|
|
|
472
|
633
|
|
|
|
|
2773
|
my ($prop) = splice @$tokens, 0, $-[1]; |
|
473
|
633
|
|
|
|
|
2073
|
my $types = $1; |
|
474
|
633
|
|
|
|
|
1985
|
my @tokens = splice @$tokens, 0, length $1; |
|
475
|
633
|
100
|
|
|
|
2102
|
unless($types =~ /"/) { # ignore invalid strings |
|
476
|
631
|
100
|
|
|
|
1813
|
$types =~ s/s\z// and pop @tokens;; |
|
477
|
631
|
|
|
|
|
2236
|
$style->_set_property_tokens( |
|
478
|
|
|
|
|
|
|
unescape($prop),$types,\@tokens |
|
479
|
|
|
|
|
|
|
); |
|
480
|
|
|
|
|
|
|
} |
|
481
|
633
|
100
|
|
|
|
3830
|
s/^;s?// and splice(@$tokens, 0, $+[0]), redo; |
|
482
|
|
|
|
|
|
|
} |
|
483
|
|
|
|
|
|
|
elsif(s/^;s?//) { |
|
484
|
21
|
|
|
|
|
53
|
splice @$tokens, 0, $+[0]; redo; |
|
|
21
|
|
|
|
|
65
|
|
|
485
|
|
|
|
|
|
|
} |
|
486
|
|
|
|
|
|
|
else { |
|
487
|
|
|
|
|
|
|
# Ignorable declaration |
|
488
|
33
|
|
|
|
|
2575
|
s/^(?:$any_re|$block_re|\@s?)*//; |
|
489
|
33
|
|
|
|
|
188
|
splice @$tokens, 0, $+[0]; |
|
490
|
33
|
100
|
|
|
|
201
|
s/^;s?// and splice(@$tokens, 0, $+[0]), redo; |
|
491
|
|
|
|
|
|
|
} |
|
492
|
|
|
|
|
|
|
# else last |
|
493
|
|
|
|
|
|
|
} |
|
494
|
|
|
|
|
|
|
|
|
495
|
523
|
|
|
|
|
4675
|
return $style; |
|
496
|
|
|
|
|
|
|
}}} |
|
497
|
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
sub _expected { |
|
499
|
9
|
|
|
9
|
|
24
|
my $tokens = pop; |
|
500
|
9
|
100
|
|
|
|
2325
|
croak |
|
|
|
100
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
"Syntax error: expected $_[0] but found '" |
|
502
|
|
|
|
|
|
|
.join('',@$tokens[ |
|
503
|
|
|
|
|
|
|
0..(10<$#$tokens?10 : $#$tokens) |
|
504
|
|
|
|
|
|
|
]) . ($#$tokens > 10 ? '...' : '') . "'"; |
|
505
|
|
|
|
|
|
|
} |
|
506
|
|
|
|
|
|
|
|
|
507
|
22
|
|
|
22
|
|
31
|
sub _decode { my $at; for(''.shift) { |
|
|
22
|
|
|
|
|
75
|
|
|
508
|
|
|
|
|
|
|
# ~~~ Some of this is repetitive and could probably be compressed. |
|
509
|
22
|
|
|
|
|
839569
|
require Encode; |
|
510
|
22
|
50
|
|
|
|
16177
|
if(/^(\xef\xbb\xbf(\@charset "(.*?)";))/s) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
511
|
0
|
|
|
|
|
0
|
my $enc = $3; |
|
512
|
0
|
|
|
|
|
0
|
my $dec = eval{Encode::decode($3, $1, 9)}; |
|
|
0
|
|
|
|
|
0
|
|
|
513
|
0
|
0
|
|
|
|
0
|
if(defined $dec) { |
|
514
|
0
|
0
|
|
|
|
0
|
$dec =~ /^(\x{feff}?)$2\z/ |
|
|
|
0
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
and return Encode::decode($enc, |
|
516
|
|
|
|
|
|
|
$1 ? substr $_, 3 : $_); |
|
517
|
0
|
0
|
|
|
|
0
|
$@ = $1?"Invalid BOM for $enc: \\xef\\xbb\\xbf" |
|
518
|
|
|
|
|
|
|
:"\"$enc\" is encoded in ASCII but is not" |
|
519
|
|
|
|
|
|
|
." ASCII-based"; |
|
520
|
|
|
|
|
|
|
} |
|
521
|
|
|
|
|
|
|
} |
|
522
|
|
|
|
|
|
|
elsif(/^\xef\xbb\xbf/) { |
|
523
|
2
|
|
|
|
|
14
|
return Encode::decode_utf8(substr $_,3); |
|
524
|
|
|
|
|
|
|
} |
|
525
|
|
|
|
|
|
|
elsif(/^(\@charset "(.*?)";)/s) { |
|
526
|
0
|
|
|
|
|
0
|
my $dec = eval{Encode::decode($2, $1, 9)}; |
|
|
0
|
|
|
|
|
0
|
|
|
527
|
0
|
0
|
|
|
|
0
|
if(defined $dec) { |
|
528
|
0
|
0
|
|
|
|
0
|
$dec eq $1 |
|
529
|
|
|
|
|
|
|
and return Encode::decode($2, $_); |
|
530
|
0
|
|
|
|
|
0
|
$@ = "\"$2\" is encoded in ASCII but is not " |
|
531
|
|
|
|
|
|
|
."ASCII-based"; |
|
532
|
|
|
|
|
|
|
} |
|
533
|
|
|
|
|
|
|
} |
|
534
|
|
|
|
|
|
|
elsif( |
|
535
|
|
|
|
|
|
|
/^(\xfe\xff(\0\@\0c\0h\0a\0r\0s\0e\0t\0 \0"((?:\0.)*?)\0"\0;))/s |
|
536
|
|
|
|
|
|
|
) { |
|
537
|
0
|
|
|
|
|
0
|
my $enc = Encode::decode('utf16be', $3); |
|
538
|
0
|
|
|
|
|
0
|
my $dec = eval{Encode::decode($enc, $1, 9)}; |
|
|
0
|
|
|
|
|
0
|
|
|
539
|
0
|
0
|
|
|
|
0
|
if(defined $dec) { |
|
540
|
0
|
0
|
|
|
|
0
|
$dec =~ /^(\x{feff}?)\@charset "$enc";\z/ |
|
|
|
0
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
and return Encode::decode($enc, |
|
542
|
|
|
|
|
|
|
$1 ? substr $_, 2 : $_); |
|
543
|
0
|
0
|
|
|
|
0
|
$@ = $1?"Invalid BOM for $enc: \\xfe\xff" |
|
544
|
|
|
|
|
|
|
:"\"$enc\" is encoded in UCS-2 but is not" |
|
545
|
|
|
|
|
|
|
." UCS-2-based"; |
|
546
|
|
|
|
|
|
|
} |
|
547
|
|
|
|
|
|
|
} |
|
548
|
|
|
|
|
|
|
elsif( |
|
549
|
|
|
|
|
|
|
/^(\0\@\0c\0h\0a\0r\0s\0e\0t\0 \0"((?:\0.)*?)\0"\0;)/s |
|
550
|
|
|
|
|
|
|
) { |
|
551
|
1
|
|
|
|
|
7
|
my $origenc = my $enc = Encode::decode('utf16be', $2); |
|
552
|
1
|
|
|
|
|
657
|
my $dec = eval{Encode::decode($enc, $1, 9)}; |
|
|
1
|
|
|
|
|
4
|
|
|
553
|
1
|
|
|
|
|
5
|
defined $dec or $dec |
|
554
|
1
|
50
|
|
|
|
40
|
= eval{Encode::decode($enc.='-be', $1, 9)}; |
|
555
|
1
|
50
|
|
|
|
609
|
if(defined $dec) { |
|
556
|
1
|
50
|
|
|
|
9
|
$dec eq "\@charset \"$origenc\";" |
|
557
|
|
|
|
|
|
|
and return Encode::decode($enc, $_); |
|
558
|
0
|
|
|
|
|
0
|
$@ ="\"$origenc\" is encoded in UCS-2 but is not " |
|
559
|
|
|
|
|
|
|
."UCS-2-based"; |
|
560
|
|
|
|
|
|
|
} |
|
561
|
|
|
|
|
|
|
} |
|
562
|
|
|
|
|
|
|
elsif( |
|
563
|
|
|
|
|
|
|
/^(\xff\xfe(\@\0c\0h\0a\0r\0s\0e\0t\0 \0"\0((?:.\0)*?)"\0;\0))/s |
|
564
|
|
|
|
|
|
|
) { |
|
565
|
1
|
|
|
|
|
35
|
my $enc = Encode::decode('utf16le', $3); |
|
566
|
1
|
|
|
|
|
689
|
my $dec = eval{Encode::decode($enc, $1, 9)}; |
|
|
1
|
|
|
|
|
6
|
|
|
567
|
1
|
50
|
|
|
|
50
|
if(defined $dec) { |
|
568
|
1
|
50
|
|
|
|
45
|
$dec =~ /^(\x{feff}?)\@charset "$enc";\z/ |
|
|
|
50
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
and return Encode::decode($enc, |
|
570
|
|
|
|
|
|
|
$1 ? substr $_, 2 : $_); |
|
571
|
0
|
0
|
|
|
|
0
|
$@ = $1?"Invalid BOM for $enc: \\xfe\xff" |
|
572
|
|
|
|
|
|
|
:"\"$enc\" is encoded in UCS-2-LE but is not" |
|
573
|
|
|
|
|
|
|
." UCS-2-LE-based"; |
|
574
|
|
|
|
|
|
|
} |
|
575
|
|
|
|
|
|
|
} |
|
576
|
|
|
|
|
|
|
elsif( |
|
577
|
|
|
|
|
|
|
/^(\@\0c\0h\0a\0r\0s\0e\0t\0 \0"\0((?:.\0)*?)"\0;\0)/s |
|
578
|
|
|
|
|
|
|
) { |
|
579
|
1
|
|
|
|
|
5
|
my $origenc = my $enc = Encode::decode('utf16le', $2); |
|
580
|
1
|
|
|
|
|
46
|
my $dec = eval{Encode::decode($enc, $1, 9)}; |
|
|
1
|
|
|
|
|
8
|
|
|
581
|
1
|
|
|
|
|
5
|
defined $dec or $dec |
|
582
|
1
|
50
|
|
|
|
51
|
= eval{Encode::decode($enc.='-le', $1, 9)}; |
|
583
|
1
|
50
|
|
|
|
3251
|
if(defined $dec) { |
|
584
|
1
|
50
|
|
|
|
9
|
$dec eq "\@charset \"$origenc\";" |
|
585
|
|
|
|
|
|
|
and return Encode::decode($enc, $_); |
|
586
|
0
|
|
|
|
|
0
|
$@ ="\"$enc\" is encoded in UCS-2-LE but is not " |
|
587
|
|
|
|
|
|
|
."UCS-2-LE-based"; |
|
588
|
|
|
|
|
|
|
} |
|
589
|
|
|
|
|
|
|
} |
|
590
|
|
|
|
|
|
|
elsif( |
|
591
|
|
|
|
|
|
|
/^(\0\0\xfe\xff(\0{3}\@\0{3}c\0{3}h\0{3}a\0{3}r\0{3}s\0{3}e\0{3}t |
|
592
|
|
|
|
|
|
|
\0{3}\ \0{3}"((?:\0{3}.)*?)\0{3}"\0{3};))/sx |
|
593
|
|
|
|
|
|
|
) { |
|
594
|
1
|
|
|
|
|
6
|
my $enc = Encode::decode('utf32be', $3); |
|
595
|
1
|
|
|
|
|
685
|
my $dec = eval{Encode::decode($enc, $1, 9)}; |
|
|
1
|
|
|
|
|
4
|
|
|
596
|
1
|
50
|
|
|
|
628
|
if(defined $dec) { |
|
597
|
1
|
50
|
|
|
|
43
|
$dec =~ /^(\x{feff}?)\@charset "$enc";\z/ |
|
|
|
50
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
and return Encode::decode($enc, |
|
599
|
|
|
|
|
|
|
$1 ? substr $_, 2 : $_); |
|
600
|
0
|
0
|
|
|
|
0
|
$@ = $1?"Invalid BOM for $enc: \\xfe\xff" |
|
601
|
|
|
|
|
|
|
:"\"$enc\" is encoded in UTF-32-BE but is not" |
|
602
|
|
|
|
|
|
|
." UTF-32-BE-based"; |
|
603
|
|
|
|
|
|
|
} |
|
604
|
|
|
|
|
|
|
} |
|
605
|
|
|
|
|
|
|
elsif( |
|
606
|
|
|
|
|
|
|
/^(\0{3}\@\0{3}c\0{3}h\0{3}a\0{3}r\0{3}s\0{3}e\0{3}t |
|
607
|
|
|
|
|
|
|
\0{3}\ \0{3}"((?:\0{3}.)*?)\0{3}"\0{3};)/sx |
|
608
|
|
|
|
|
|
|
) { |
|
609
|
1
|
|
|
|
|
5
|
my $origenc = my $enc = Encode::decode('utf32be', $2); |
|
610
|
1
|
|
|
|
|
50
|
my $dec = eval{Encode::decode($enc, $1, 9)}; |
|
|
1
|
|
|
|
|
4
|
|
|
611
|
1
|
|
|
|
|
6
|
defined $dec or $dec |
|
612
|
1
|
50
|
|
|
|
51
|
= eval{Encode::decode($enc.='-be', $1, 9)}; |
|
613
|
1
|
50
|
|
|
|
923
|
if(defined $dec) { |
|
614
|
1
|
50
|
|
|
|
9
|
$dec eq "\@charset \"$origenc\";" |
|
615
|
|
|
|
|
|
|
and return Encode::decode($enc, $_); |
|
616
|
0
|
|
|
|
|
0
|
$@ ="\"$enc\" is encoded in UTF-32-BE but is not " |
|
617
|
|
|
|
|
|
|
."UTF-32-BE-based"; |
|
618
|
|
|
|
|
|
|
} |
|
619
|
|
|
|
|
|
|
} |
|
620
|
|
|
|
|
|
|
elsif( |
|
621
|
|
|
|
|
|
|
/^(\xff\xfe\0\0(\@\0{3}c\0{3}h\0{3}a\0{3}r\0{3}s\0{3}e\0{3}t |
|
622
|
|
|
|
|
|
|
\0{3}\ \0{3}"\0{3}((?:.\0{3})*?)"\0{3};\0{3}))/sx |
|
623
|
|
|
|
|
|
|
) { |
|
624
|
1
|
|
|
|
|
5
|
my $enc = Encode::decode('utf32le', $3); |
|
625
|
1
|
|
|
|
|
689
|
my $dec = eval{Encode::decode($enc, $1, 9)}; |
|
|
1
|
|
|
|
|
4
|
|
|
626
|
1
|
50
|
|
|
|
46
|
if(defined $dec) { |
|
627
|
1
|
50
|
|
|
|
37
|
$dec =~ /^(\x{feff}?)\@charset "$enc";\z/ |
|
|
|
50
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
and return Encode::decode($enc, |
|
629
|
|
|
|
|
|
|
$1 ? substr $_, 2 : $_); |
|
630
|
0
|
0
|
|
|
|
0
|
$@ = $1?"Invalid BOM for $enc: \\xfe\xff" |
|
631
|
|
|
|
|
|
|
:"\"$enc\" is encoded in UTF-32-LE but is not" |
|
632
|
|
|
|
|
|
|
." UTF-32-LE-based"; |
|
633
|
|
|
|
|
|
|
} |
|
634
|
|
|
|
|
|
|
} |
|
635
|
|
|
|
|
|
|
elsif( |
|
636
|
|
|
|
|
|
|
/^(\@\0{3}c\0{3}h\0{3}a\0{3}r\0{3}s\0{3}e\0{3}t |
|
637
|
|
|
|
|
|
|
\0{3}\ \0{3}"\0{3}((?:.\0{3})*?)"\0{3};\0{3})/sx |
|
638
|
|
|
|
|
|
|
) { |
|
639
|
1
|
|
|
|
|
5
|
my $origenc = my $enc = Encode::decode('utf32le', $2); |
|
640
|
1
|
|
|
|
|
44
|
my $dec = eval{Encode::decode($enc, $1, 9)}; |
|
|
1
|
|
|
|
|
4
|
|
|
641
|
1
|
|
|
|
|
7
|
defined $dec or $dec |
|
642
|
1
|
50
|
|
|
|
45
|
= eval{Encode::decode($enc.='-le', $1, 9)}; |
|
643
|
1
|
50
|
|
|
|
722
|
if(defined $dec) { |
|
644
|
1
|
50
|
|
|
|
8
|
$dec eq "\@charset \"$origenc\";" |
|
645
|
|
|
|
|
|
|
and return Encode::decode($enc, $_); |
|
646
|
0
|
|
|
|
|
0
|
$@ ="\"$enc\" is encoded in UTF-32-LE but is not " |
|
647
|
|
|
|
|
|
|
."UTF-32-LE-based"; |
|
648
|
|
|
|
|
|
|
} |
|
649
|
|
|
|
|
|
|
} |
|
650
|
|
|
|
|
|
|
elsif(/^(?:\0\0\xfe\xff|\xff\xfe\0\0)/) { |
|
651
|
2
|
|
|
|
|
8
|
return Encode::decode('utf32', $_); |
|
652
|
|
|
|
|
|
|
} |
|
653
|
|
|
|
|
|
|
elsif(/^(?:\xfe\xff|\xff\xfe)/) { |
|
654
|
3
|
|
|
|
|
12
|
return Encode::decode('utf16', $_); |
|
655
|
|
|
|
|
|
|
} |
|
656
|
|
|
|
|
|
|
elsif( |
|
657
|
|
|
|
|
|
|
/^(\|\x83\x88\x81\x99\xa2\x85\xa3\@\x7f(.*?)\x7f\^)/s |
|
658
|
|
|
|
|
|
|
) { |
|
659
|
2
|
|
|
|
|
9
|
my $enc = Encode::decode('cp37', $2); |
|
660
|
2
|
|
|
|
|
5281
|
my $dec = eval{Encode::decode($enc, $1, 9)}; |
|
|
2
|
|
|
|
|
7
|
|
|
661
|
2
|
50
|
|
|
|
67
|
if(defined $dec) { |
|
662
|
2
|
50
|
|
|
|
15
|
$dec eq "\@charset \"$enc\";" |
|
663
|
|
|
|
|
|
|
and return Encode::decode($enc, $_); |
|
664
|
0
|
|
|
|
|
0
|
$@ ="\"$enc\" is encoded in EBCDIC but is not " |
|
665
|
|
|
|
|
|
|
."EBCDIC-based"; |
|
666
|
|
|
|
|
|
|
} |
|
667
|
|
|
|
|
|
|
} |
|
668
|
|
|
|
|
|
|
elsif( |
|
669
|
|
|
|
|
|
|
/^(\xae\x83\x88\x81\x99\xa2\x85\xa3\@\xfc(.*?)\xfc\^)/s |
|
670
|
|
|
|
|
|
|
) { |
|
671
|
1
|
|
|
|
|
4
|
my $enc = Encode::decode('cp1026', $2); |
|
672
|
1
|
|
|
|
|
28
|
my $dec = eval{Encode::decode($enc, $1, 9)}; |
|
|
1
|
|
|
|
|
5
|
|
|
673
|
1
|
50
|
|
|
|
28
|
if(defined $dec) { |
|
674
|
1
|
50
|
|
|
|
10
|
$dec eq "\@charset \"$enc\";" |
|
675
|
|
|
|
|
|
|
and return Encode::decode($enc, $_); |
|
676
|
0
|
|
|
|
|
0
|
$@ ="\"$enc\" is encoded in IBM1026 but is not " |
|
677
|
|
|
|
|
|
|
."IBM1026-based"; |
|
678
|
|
|
|
|
|
|
} |
|
679
|
|
|
|
|
|
|
} |
|
680
|
|
|
|
|
|
|
elsif( |
|
681
|
|
|
|
|
|
|
/^(\0charset "(.*?)";)/s |
|
682
|
|
|
|
|
|
|
) { |
|
683
|
1
|
|
|
|
|
4
|
my $enc = Encode::decode('gsm0338', $2); |
|
684
|
1
|
|
|
|
|
6513
|
my $dec = eval{Encode::decode($enc, $1, 9)}; |
|
|
1
|
|
|
|
|
6
|
|
|
685
|
1
|
50
|
|
|
|
143
|
if(defined $dec) { |
|
686
|
1
|
50
|
|
|
|
10
|
$dec eq "\@charset \"$enc\";" |
|
687
|
|
|
|
|
|
|
and return Encode::decode($enc, $_); |
|
688
|
0
|
|
|
|
|
0
|
$@ ="\"$enc\" is encoded in GSM 0338 but is not " |
|
689
|
|
|
|
|
|
|
."GSM 0338-based"; |
|
690
|
|
|
|
|
|
|
} |
|
691
|
|
|
|
|
|
|
} |
|
692
|
|
|
|
|
|
|
else { |
|
693
|
4
|
|
|
|
|
16
|
my %args = @_; |
|
694
|
4
|
|
100
|
|
|
34
|
return Encode::decode($args{encoding_hint}||'utf8', $_); |
|
695
|
|
|
|
|
|
|
} |
|
696
|
0
|
|
|
|
|
|
return; |
|
697
|
|
|
|
|
|
|
}} |
|
698
|
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
**__END__** |
|
700
|
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
=head1 NAME |
|
702
|
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
CSS::DOM::Parser - Parser for CSS::DOM |
|
704
|
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
=head1 VERSION |
|
706
|
|
|
|
|
|
|
|
|
707
|
|
|
|
|
|
|
Version 0.15 |
|
708
|
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
710
|
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
This is a private module (at least for now). Don't use it directly. |
|
712
|
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
714
|
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
L<CSS::DOM> |