| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package XML::Parser::Lite::Tree::XPath::Tokener; |
|
2
|
|
|
|
|
|
|
|
|
3
|
31
|
|
|
31
|
|
104797
|
use XML::Parser::Lite::Tree::XPath::Token; |
|
|
31
|
|
|
|
|
93
|
|
|
|
31
|
|
|
|
|
117565
|
|
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
sub new { |
|
6
|
180
|
|
|
180
|
0
|
4572
|
my $class = shift; |
|
7
|
180
|
|
|
|
|
711
|
my $self = bless {}, $class; |
|
8
|
|
|
|
|
|
|
|
|
9
|
180
|
|
|
|
|
469
|
return $self; |
|
10
|
|
|
|
|
|
|
} |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
sub parse { |
|
13
|
216
|
|
|
216
|
0
|
28679
|
my ($self, $input) = @_; |
|
14
|
|
|
|
|
|
|
|
|
15
|
216
|
|
|
|
|
854
|
$self->{tokens} = []; |
|
16
|
216
|
|
|
|
|
942
|
$self->{input} = $input; |
|
17
|
216
|
|
|
|
|
500
|
$self->{error} = 0; |
|
18
|
216
|
|
|
|
|
688
|
$self->{rx} = XML::Parser::Lite::Tree::XPath::Tokener::Rx::fetch(); |
|
19
|
|
|
|
|
|
|
|
|
20
|
216
|
|
|
|
|
914
|
$self->trim(); |
|
21
|
|
|
|
|
|
|
|
|
22
|
216
|
|
|
|
|
904
|
while($self->{input}){ |
|
23
|
1507
|
|
|
|
|
3320
|
$self->step(); |
|
24
|
1507
|
50
|
|
|
|
5071
|
last if $self->{error}; |
|
25
|
|
|
|
|
|
|
} |
|
26
|
|
|
|
|
|
|
|
|
27
|
216
|
|
|
|
|
468
|
$self->{rx} = 0; |
|
28
|
|
|
|
|
|
|
|
|
29
|
216
|
50
|
|
|
|
1191
|
warn $self->{error} if $self->{error}; |
|
30
|
|
|
|
|
|
|
|
|
31
|
216
|
|
|
|
|
659
|
$self->special_rules(); |
|
32
|
|
|
|
|
|
|
|
|
33
|
216
|
50
|
|
|
|
706
|
warn $self->{error} if $self->{error}; |
|
34
|
|
|
|
|
|
|
|
|
35
|
216
|
|
|
|
|
856
|
return 1; |
|
36
|
|
|
|
|
|
|
} |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
sub step { |
|
39
|
1507
|
|
|
1507
|
0
|
1958
|
my ($self) = @_; |
|
40
|
|
|
|
|
|
|
|
|
41
|
1507
|
|
|
|
|
3039
|
$self->trim(); |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
# |
|
45
|
|
|
|
|
|
|
# Symbols |
|
46
|
|
|
|
|
|
|
# |
|
47
|
|
|
|
|
|
|
|
|
48
|
1507
|
100
|
|
|
|
4713
|
if ($self->{input} =~ m!^(\(|\)|\[|\]|\.\.|\.|\@|,|::)!){ |
|
49
|
|
|
|
|
|
|
|
|
50
|
538
|
|
|
|
|
1117
|
$self->push_token('Symbol', $1); |
|
51
|
538
|
|
|
|
|
1430
|
$self->consume(length $1); |
|
52
|
538
|
|
|
|
|
793
|
return; |
|
53
|
|
|
|
|
|
|
} |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# |
|
56
|
|
|
|
|
|
|
# NameTest |
|
57
|
|
|
|
|
|
|
# |
|
58
|
|
|
|
|
|
|
|
|
59
|
969
|
100
|
|
|
|
2458
|
if ($self->{input} =~ m!^(\*)!){ |
|
60
|
|
|
|
|
|
|
|
|
61
|
81
|
|
|
|
|
215
|
$self->push_token('Star', '*'); |
|
62
|
81
|
|
|
|
|
215
|
$self->consume(1); |
|
63
|
81
|
|
|
|
|
131
|
return; |
|
64
|
|
|
|
|
|
|
} |
|
65
|
|
|
|
|
|
|
|
|
66
|
888
|
50
|
|
|
|
39837
|
if ($self->{input} =~ m!^($self->{rx}->{NCName})\:\*!){ |
|
67
|
|
|
|
|
|
|
|
|
68
|
0
|
|
|
|
|
0
|
$self->push_token('NCName', $1); |
|
69
|
0
|
|
|
|
|
0
|
$self->push_token('NameTestPostfix', ':*'); |
|
70
|
|
|
|
|
|
|
|
|
71
|
0
|
|
|
|
|
0
|
$self->consume(2 + length $1); |
|
72
|
0
|
|
|
|
|
0
|
return; |
|
73
|
|
|
|
|
|
|
} |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
# QName test |
|
76
|
|
|
|
|
|
|
|
|
77
|
888
|
100
|
|
|
|
83651
|
if ($self->{input} =~ m!^((($self->{rx}->{NCName})\\x3a)?($self->{rx}->{NCName}))!){ |
|
78
|
|
|
|
|
|
|
|
|
79
|
424
|
50
|
|
|
|
1248
|
$self->push_token('NCName', $3) if defined $3; |
|
80
|
424
|
50
|
|
|
|
874
|
$self->push_token('QNameSep', ':') if defined $3; |
|
81
|
424
|
|
|
|
|
1067
|
$self->push_token('NCName', $4); |
|
82
|
424
|
|
|
|
|
1246
|
$self->consume(length $1); |
|
83
|
424
|
|
|
|
|
1036
|
return; |
|
84
|
|
|
|
|
|
|
} |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
# |
|
88
|
|
|
|
|
|
|
# NodeType |
|
89
|
|
|
|
|
|
|
# |
|
90
|
|
|
|
|
|
|
|
|
91
|
464
|
50
|
|
|
|
2185
|
if ($self->{input} =~ m!^(comment|text|processing-instruction|node)!){ |
|
92
|
|
|
|
|
|
|
|
|
93
|
0
|
|
|
|
|
0
|
$self->push_token('NodeType', $1); |
|
94
|
0
|
|
|
|
|
0
|
$self->consume(length $1); |
|
95
|
0
|
|
|
|
|
0
|
return; |
|
96
|
|
|
|
|
|
|
} |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
# |
|
99
|
|
|
|
|
|
|
# Operator |
|
100
|
|
|
|
|
|
|
# |
|
101
|
|
|
|
|
|
|
|
|
102
|
464
|
100
|
|
|
|
1988
|
if ($self->{input} =~ m!^(and|or|mod|div|//|/|\||\+|-|=|\!=|<=|<|>=|>)!){ |
|
103
|
|
|
|
|
|
|
|
|
104
|
304
|
|
|
|
|
889
|
$self->push_token('Operator', $1); |
|
105
|
304
|
|
|
|
|
965
|
$self->consume(length $1); |
|
106
|
304
|
|
|
|
|
730
|
return; |
|
107
|
|
|
|
|
|
|
} |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
# |
|
110
|
|
|
|
|
|
|
# FunctionName (no need to test - it's a QName - it'll be found later on via special rules) |
|
111
|
|
|
|
|
|
|
# |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
# |
|
114
|
|
|
|
|
|
|
# AxisName (no test - it's a NCName) |
|
115
|
|
|
|
|
|
|
# |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
# |
|
118
|
|
|
|
|
|
|
# Literal |
|
119
|
|
|
|
|
|
|
# |
|
120
|
|
|
|
|
|
|
|
|
121
|
160
|
100
|
|
|
|
684
|
if ($self->{input} =~ m!^(('[^']*')|("[^"]*"))!){ |
|
122
|
|
|
|
|
|
|
|
|
123
|
68
|
|
|
|
|
177
|
my $inner = $1; |
|
124
|
68
|
|
|
|
|
207
|
$inner =~ m!^.(.*).$!; |
|
125
|
|
|
|
|
|
|
|
|
126
|
68
|
|
|
|
|
177
|
$self->push_token('Literal', $1); |
|
127
|
68
|
|
|
|
|
195
|
$self->consume(2 + length $1); |
|
128
|
68
|
|
|
|
|
171
|
return; |
|
129
|
|
|
|
|
|
|
} |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
# |
|
132
|
|
|
|
|
|
|
# Number |
|
133
|
|
|
|
|
|
|
# |
|
134
|
|
|
|
|
|
|
|
|
135
|
92
|
50
|
|
|
|
956
|
if ($self->{input} =~ m!^($self->{rx}->{Number})!){ |
|
136
|
|
|
|
|
|
|
|
|
137
|
92
|
|
|
|
|
212
|
$self->push_token('Number', $1); |
|
138
|
92
|
|
|
|
|
251
|
$self->consume(length $1); |
|
139
|
92
|
|
|
|
|
217
|
return; |
|
140
|
|
|
|
|
|
|
} |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
# |
|
143
|
|
|
|
|
|
|
# VariableReference |
|
144
|
|
|
|
|
|
|
# |
|
145
|
|
|
|
|
|
|
|
|
146
|
0
|
0
|
|
|
|
0
|
if ($self->{input} =~ m!^\$($self->{rx}->{QName})!){ |
|
147
|
|
|
|
|
|
|
|
|
148
|
0
|
|
|
|
|
0
|
$self->push_token('VariableReference', $1); |
|
149
|
0
|
|
|
|
|
0
|
$self->consume(1 + length $1); |
|
150
|
0
|
|
|
|
|
0
|
return; |
|
151
|
|
|
|
|
|
|
} |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
|
|
155
|
0
|
|
|
|
|
0
|
$self->{error} = "couldn't toke at >>>$self->{input}<<<"; |
|
156
|
|
|
|
|
|
|
} |
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
sub push_token { |
|
159
|
1507
|
|
|
1507
|
0
|
3131
|
my ($self, $type, $content) = @_; |
|
160
|
|
|
|
|
|
|
|
|
161
|
1507
|
|
|
|
|
4648
|
my $token = XML::Parser::Lite::Tree::XPath::Token->new(); |
|
162
|
1507
|
|
|
|
|
3825
|
$token->{type} = $type; |
|
163
|
1507
|
50
|
|
|
|
5150
|
$token->{content} = $content if defined $content; |
|
164
|
|
|
|
|
|
|
|
|
165
|
1507
|
|
|
|
|
1772
|
push @{$self->{tokens}}, $token; |
|
|
1507
|
|
|
|
|
3763
|
|
|
166
|
|
|
|
|
|
|
} |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
sub consume { |
|
169
|
1507
|
|
|
1507
|
0
|
2005
|
my ($self, $count) = @_; |
|
170
|
1507
|
|
|
|
|
4753
|
$self->{input} = substr $self->{input}, $count; |
|
171
|
|
|
|
|
|
|
} |
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
sub trim { |
|
174
|
1723
|
|
|
1723
|
0
|
1901
|
my ($self) = @_; |
|
175
|
1723
|
|
|
|
|
4645
|
$self->{input} =~ s!^[\x20\x09\x0D\x0A]+!!; |
|
176
|
|
|
|
|
|
|
} |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
sub special_rules { |
|
179
|
216
|
|
|
216
|
0
|
505
|
my ($self) = @_; |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
# |
|
182
|
|
|
|
|
|
|
# set up node chain |
|
183
|
|
|
|
|
|
|
# |
|
184
|
|
|
|
|
|
|
|
|
185
|
216
|
|
|
|
|
324
|
my $prev = undef; |
|
186
|
216
|
|
|
|
|
517
|
for my $token(@{$self->{tokens}}){ |
|
|
216
|
|
|
|
|
577
|
|
|
187
|
|
|
|
|
|
|
|
|
188
|
1507
|
|
|
|
|
1903
|
$token->{prev} = $prev; |
|
189
|
1507
|
|
|
|
|
1838
|
$token->{next} = undef; |
|
190
|
1507
|
100
|
|
|
|
3706
|
$prev->{next} = $token if defined $prev; |
|
191
|
1507
|
|
|
|
|
2153
|
$prev = $token; |
|
192
|
|
|
|
|
|
|
} |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
# |
|
196
|
|
|
|
|
|
|
# special rules |
|
197
|
|
|
|
|
|
|
# |
|
198
|
|
|
|
|
|
|
|
|
199
|
216
|
|
|
|
|
394
|
for my $token(@{$self->{tokens}}){ |
|
|
216
|
|
|
|
|
483
|
|
|
200
|
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
# |
|
202
|
|
|
|
|
|
|
# rule 1 |
|
203
|
|
|
|
|
|
|
# |
|
204
|
|
|
|
|
|
|
# If there is a preceding token and the preceding token is not one of @, ::, (, [, , or an Operator, |
|
205
|
|
|
|
|
|
|
# then a * must be recognized as a MultiplyOperator and an NCName must be recognized as an OperatorName. |
|
206
|
|
|
|
|
|
|
# |
|
207
|
|
|
|
|
|
|
|
|
208
|
1507
|
100
|
|
|
|
3314
|
if (defined $token->{prev}){ |
|
209
|
1291
|
|
|
|
|
1834
|
my $p = $token->{prev}; |
|
210
|
|
|
|
|
|
|
|
|
211
|
1291
|
100
|
100
|
|
|
3132
|
unless ($p->match('Symbol', '@') |
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
212
|
|
|
|
|
|
|
|| $p->match('Symbol', '::') |
|
213
|
|
|
|
|
|
|
|| $p->match('Symbol', '(') |
|
214
|
|
|
|
|
|
|
|| $p->match('Symbol', '[') |
|
215
|
|
|
|
|
|
|
|| $p->match('Symbol', ',') |
|
216
|
|
|
|
|
|
|
|| $p->match('Operator')){ |
|
217
|
|
|
|
|
|
|
|
|
218
|
641
|
100
|
|
|
|
1404
|
if ($token->{type} eq 'Star'){ |
|
219
|
|
|
|
|
|
|
|
|
220
|
1
|
|
|
|
|
3
|
$token->{type} = 'Operator'; |
|
221
|
|
|
|
|
|
|
}else{ |
|
222
|
640
|
100
|
|
|
|
1778
|
if ($token->{type} eq 'NCName'){ |
|
223
|
|
|
|
|
|
|
|
|
224
|
18
|
50
|
|
|
|
55
|
if ($self->is_OperatorName($token->{content})){ |
|
225
|
|
|
|
|
|
|
|
|
226
|
18
|
|
|
|
|
44
|
$token->{type} = 'Operator'; |
|
227
|
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
}else{ |
|
229
|
0
|
|
|
|
|
0
|
$self->{error} = "Found NCName '$token->{content}' when an OperatorName was required"; |
|
230
|
0
|
|
|
|
|
0
|
return; |
|
231
|
|
|
|
|
|
|
} |
|
232
|
|
|
|
|
|
|
} |
|
233
|
|
|
|
|
|
|
} |
|
234
|
|
|
|
|
|
|
} |
|
235
|
|
|
|
|
|
|
} |
|
236
|
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
# |
|
238
|
|
|
|
|
|
|
# rule 2 |
|
239
|
|
|
|
|
|
|
# |
|
240
|
|
|
|
|
|
|
# If the character following an NCName (possibly after intervening ExprWhitespace) is (, |
|
241
|
|
|
|
|
|
|
# then the token must be recognized as a NodeType or a FunctionName. |
|
242
|
|
|
|
|
|
|
# |
|
243
|
|
|
|
|
|
|
|
|
244
|
1507
|
100
|
|
|
|
4163
|
if ($token->match('NCName')){ |
|
245
|
|
|
|
|
|
|
|
|
246
|
406
|
100
|
|
|
|
992
|
if (defined $token->{next}){ |
|
247
|
|
|
|
|
|
|
|
|
248
|
366
|
100
|
|
|
|
1007
|
if ($token->{next}->match('Symbol', '(')){ |
|
249
|
|
|
|
|
|
|
|
|
250
|
130
|
100
|
|
|
|
397
|
if ($self->is_NodeType($token->{content})){ |
|
251
|
|
|
|
|
|
|
|
|
252
|
9
|
|
|
|
|
21
|
$token->{type} = 'NodeType'; |
|
253
|
|
|
|
|
|
|
}else{ |
|
254
|
121
|
|
|
|
|
249
|
$token->{type} = 'FunctionName'; |
|
255
|
|
|
|
|
|
|
} |
|
256
|
|
|
|
|
|
|
} |
|
257
|
|
|
|
|
|
|
} |
|
258
|
|
|
|
|
|
|
} |
|
259
|
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
# |
|
261
|
|
|
|
|
|
|
# rule 3 |
|
262
|
|
|
|
|
|
|
# |
|
263
|
|
|
|
|
|
|
# If the two characters following an NCName (possibly after intervening ExprWhitespace) are ::, |
|
264
|
|
|
|
|
|
|
# then the token must be recognized as an AxisName. |
|
265
|
|
|
|
|
|
|
# |
|
266
|
|
|
|
|
|
|
|
|
267
|
1507
|
100
|
|
|
|
3490
|
if ($token->match('NCName')){ |
|
268
|
|
|
|
|
|
|
|
|
269
|
276
|
100
|
|
|
|
839
|
if (defined $token->{next}){ |
|
270
|
|
|
|
|
|
|
|
|
271
|
236
|
100
|
|
|
|
825
|
if ($token->{next}->match('Symbol', '::')){ |
|
272
|
|
|
|
|
|
|
|
|
273
|
84
|
50
|
|
|
|
266
|
if ($self->is_AxisName($token->{content})){ |
|
274
|
|
|
|
|
|
|
|
|
275
|
84
|
|
|
|
|
215
|
$token->{type} = 'AxisName'; |
|
276
|
|
|
|
|
|
|
}else{ |
|
277
|
0
|
|
|
|
|
0
|
$self->{error} = "Found NCName '$token->{content}' when an AxisName was required"; |
|
278
|
0
|
|
|
|
|
0
|
return; |
|
279
|
|
|
|
|
|
|
} |
|
280
|
|
|
|
|
|
|
} |
|
281
|
|
|
|
|
|
|
} |
|
282
|
|
|
|
|
|
|
} |
|
283
|
|
|
|
|
|
|
} |
|
284
|
|
|
|
|
|
|
|
|
285
|
216
|
|
|
|
|
487
|
for my $token(@{$self->{tokens}}){ |
|
|
216
|
|
|
|
|
525
|
|
|
286
|
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
# |
|
288
|
|
|
|
|
|
|
# rule 4 |
|
289
|
|
|
|
|
|
|
# |
|
290
|
|
|
|
|
|
|
# Otherwise, the token must not be recognized as a MultiplyOperator, an OperatorName, |
|
291
|
|
|
|
|
|
|
# a NodeType, a FunctionName, or an AxisName. |
|
292
|
|
|
|
|
|
|
# |
|
293
|
|
|
|
|
|
|
# (this means we need to clean up Star and NCName tokens) |
|
294
|
|
|
|
|
|
|
# |
|
295
|
|
|
|
|
|
|
|
|
296
|
1507
|
100
|
|
|
|
3510
|
if ($token->match('Star')){ |
|
297
|
80
|
|
|
|
|
155
|
$token->{type} = 'NameTest'; |
|
298
|
|
|
|
|
|
|
} |
|
299
|
|
|
|
|
|
|
|
|
300
|
1507
|
100
|
|
|
|
4084
|
if ($token->match('NCName')){ |
|
301
|
192
|
50
|
66
|
|
|
807
|
if (defined $token->{next} && $token->{next}->match('NameTestPostfix')){ |
|
302
|
|
|
|
|
|
|
|
|
303
|
0
|
|
|
|
|
0
|
$token->{type} = 'NameTestBase'; |
|
304
|
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
}else{ |
|
306
|
|
|
|
|
|
|
|
|
307
|
192
|
50
|
66
|
|
|
848
|
if (defined $token->{next} && $token->{next}->match('QNameSep') |
|
|
|
|
33
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
308
|
|
|
|
|
|
|
&& defined $token->{next}->{next} && $token->{next}->{next}->match('NCName')){ |
|
309
|
|
|
|
|
|
|
|
|
310
|
0
|
|
|
|
|
0
|
$token->{type} = 'QNamePre'; |
|
311
|
0
|
|
|
|
|
0
|
$token->{next}->{next}->{type} = 'QNamePost'; |
|
312
|
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
}else{ |
|
314
|
|
|
|
|
|
|
|
|
315
|
192
|
|
|
|
|
527
|
$token->{type} = 'NameTest'; |
|
316
|
|
|
|
|
|
|
} |
|
317
|
|
|
|
|
|
|
} |
|
318
|
|
|
|
|
|
|
} |
|
319
|
|
|
|
|
|
|
} |
|
320
|
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
# |
|
322
|
|
|
|
|
|
|
# remove the node chain |
|
323
|
|
|
|
|
|
|
# (it's a pain for debugging) |
|
324
|
|
|
|
|
|
|
# |
|
325
|
|
|
|
|
|
|
|
|
326
|
216
|
|
|
|
|
380
|
for my $token(@{$self->{tokens}}){ |
|
|
216
|
|
|
|
|
525
|
|
|
327
|
|
|
|
|
|
|
|
|
328
|
1507
|
|
|
|
|
2256
|
delete $token->{prev}; |
|
329
|
1507
|
|
|
|
|
2886
|
delete $token->{next}; |
|
330
|
|
|
|
|
|
|
} |
|
331
|
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
# |
|
334
|
|
|
|
|
|
|
# squish temp token sequences together |
|
335
|
|
|
|
|
|
|
# |
|
336
|
|
|
|
|
|
|
|
|
337
|
216
|
|
|
|
|
464
|
my $old_tokens = $self->{tokens}; |
|
338
|
216
|
|
|
|
|
1288
|
$self->{tokens} = []; |
|
339
|
|
|
|
|
|
|
|
|
340
|
216
|
|
|
|
|
324
|
while(my $token = shift @{$old_tokens}){ |
|
|
1723
|
|
|
|
|
4575
|
|
|
341
|
|
|
|
|
|
|
|
|
342
|
1507
|
50
|
|
|
|
3470
|
if ($token->match('NameTestBase')){ |
|
343
|
|
|
|
|
|
|
|
|
344
|
0
|
|
|
|
|
0
|
$token->{type} = 'NameTest'; |
|
345
|
0
|
|
|
|
|
0
|
$token->{content} .= ':*'; |
|
346
|
|
|
|
|
|
|
|
|
347
|
0
|
|
|
|
|
0
|
shift @{$old_tokens}; |
|
|
0
|
|
|
|
|
0
|
|
|
348
|
|
|
|
|
|
|
} |
|
349
|
|
|
|
|
|
|
|
|
350
|
1507
|
50
|
|
|
|
3478
|
if ($token->match('QNamePre')){ |
|
351
|
|
|
|
|
|
|
|
|
352
|
0
|
|
|
|
|
0
|
shift @{$old_tokens}; |
|
|
0
|
|
|
|
|
0
|
|
|
353
|
0
|
|
|
|
|
0
|
my $post = shift @{$old_tokens}; |
|
|
0
|
|
|
|
|
0
|
|
|
354
|
|
|
|
|
|
|
|
|
355
|
0
|
|
|
|
|
0
|
$token->{type} = 'NameTest'; |
|
356
|
0
|
|
|
|
|
0
|
$token->{content} .= ':'.$post->{content}; |
|
357
|
|
|
|
|
|
|
} |
|
358
|
|
|
|
|
|
|
|
|
359
|
1507
|
|
|
|
|
1868
|
push @{$self->{tokens}}, $token; |
|
|
1507
|
|
|
|
|
4065
|
|
|
360
|
|
|
|
|
|
|
} |
|
361
|
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
# |
|
363
|
|
|
|
|
|
|
# TODO - need to check we don't have any temporaory tokens still in the list |
|
364
|
|
|
|
|
|
|
# i.e. invalid sub-sequences. not sure what ones we could end up with |
|
365
|
|
|
|
|
|
|
# |
|
366
|
|
|
|
|
|
|
} |
|
367
|
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
sub is_OperatorName { |
|
369
|
18
|
|
|
18
|
0
|
36
|
my ($self, $content) = @_; |
|
370
|
|
|
|
|
|
|
|
|
371
|
18
|
50
|
|
|
|
113
|
return 1 if $content =~ m!^(and|or|mod|div)$!; |
|
372
|
0
|
|
|
|
|
0
|
return 0; |
|
373
|
|
|
|
|
|
|
} |
|
374
|
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
sub is_NodeType { |
|
376
|
130
|
|
|
130
|
0
|
234
|
my ($self, $content) = @_; |
|
377
|
|
|
|
|
|
|
|
|
378
|
130
|
100
|
|
|
|
547
|
return 1 if $content =~ m!^(comment|text|processing-instruction|node)$!; |
|
379
|
121
|
|
|
|
|
344
|
return 0; |
|
380
|
|
|
|
|
|
|
} |
|
381
|
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
sub is_AxisName { |
|
383
|
84
|
|
|
84
|
0
|
164
|
my ($self, $content) = @_; |
|
384
|
|
|
|
|
|
|
|
|
385
|
84
|
50
|
|
|
|
661
|
return 1 if $content =~ m!^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self| |
|
386
|
|
|
|
|
|
|
following|following-sibling|namespace|parent|preceding|preceding-sibling|self)$!x; |
|
387
|
0
|
|
|
|
|
0
|
return 0; |
|
388
|
|
|
|
|
|
|
} |
|
389
|
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
package XML::Parser::Lite::Tree::XPath::Tokener::Token; |
|
392
|
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
sub new { |
|
394
|
0
|
|
|
0
|
|
0
|
my $class = shift; |
|
395
|
0
|
|
|
|
|
0
|
my $self = bless {}, $class; |
|
396
|
0
|
|
|
|
|
0
|
return $self; |
|
397
|
|
|
|
|
|
|
} |
|
398
|
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
sub match { |
|
400
|
0
|
|
|
0
|
|
0
|
my ($self, $type, $content) = @_; |
|
401
|
|
|
|
|
|
|
|
|
402
|
0
|
0
|
|
|
|
0
|
return 0 unless $self->{type} eq $type; |
|
403
|
|
|
|
|
|
|
|
|
404
|
0
|
0
|
0
|
|
|
0
|
return 0 if (defined($content) && ($self->{content} ne $content)); |
|
405
|
|
|
|
|
|
|
|
|
406
|
0
|
|
|
|
|
0
|
return 1; |
|
407
|
|
|
|
|
|
|
} |
|
408
|
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
sub dump { |
|
410
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
|
411
|
|
|
|
|
|
|
|
|
412
|
0
|
|
|
|
|
0
|
my $ret = $self->{type}; |
|
413
|
0
|
0
|
|
|
|
0
|
$ret .= ':absolute' if $self->{absolute}; |
|
414
|
0
|
0
|
|
|
|
0
|
$ret .= ':'.$self->{content} if defined $self->{content}; |
|
415
|
0
|
0
|
|
|
|
0
|
$ret .= $self->{axis} if defined $self->{axis}; |
|
416
|
|
|
|
|
|
|
|
|
417
|
0
|
|
|
|
|
0
|
return $ret; |
|
418
|
|
|
|
|
|
|
} |
|
419
|
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
package XML::Parser::Lite::Tree::XPath::Tokener::Rx; |
|
421
|
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
sub fetch { |
|
423
|
|
|
|
|
|
|
|
|
424
|
216
|
|
|
216
|
|
365
|
my %rx; |
|
425
|
|
|
|
|
|
|
|
|
426
|
216
|
|
|
|
|
1125
|
$rx{CombiningChar} = '\\x{300}-\\x{345}\\x{360}-\\x{361}\\x{483}-\\x{486}\\x{591}-\\x{5a1}\\x{5a3}-\\x{5b9}\\x{5bb}' |
|
427
|
|
|
|
|
|
|
.'-\\x{5bd}\\x{5bf}\\x{5c1}-\\x{5c2}\\x{5c4}\\x{64b}-\\x{652}\\x{670}\\x{6d6}-\\x{6dc}\\x{6dd}-\\' |
|
428
|
|
|
|
|
|
|
.'x{6df}\\x{6e0}-\\x{6e4}\\x{6e7}-\\x{6e8}\\x{6ea}-\\x{6ed}\\x{901}-\\x{903}\\x{93c}\\x{93e}-\\x' |
|
429
|
|
|
|
|
|
|
.'{94c}\\x{94d}\\x{951}-\\x{954}\\x{962}-\\x{963}\\x{981}-\\x{983}\\x{9bc}\\x{9be}\\x{9bf}\\x{9c' |
|
430
|
|
|
|
|
|
|
.'0}-\\x{9c4}\\x{9c7}-\\x{9c8}\\x{9cb}-\\x{9cd}\\x{9d7}\\x{9e2}-\\x{9e3}\\x{a02}\\x{a3c}\\x{a3e}' |
|
431
|
|
|
|
|
|
|
.'\\x{a3f}\\x{a40}-\\x{a42}\\x{a47}-\\x{a48}\\x{a4b}-\\x{a4d}\\x{a70}-\\x{a71}\\x{a81}-\\x{a83}\\' |
|
432
|
|
|
|
|
|
|
.'x{abc}\\x{abe}-\\x{ac5}\\x{ac7}-\\x{ac9}\\x{acb}-\\x{acd}\\x{b01}-\\x{b03}\\x{b3c}\\x{b3e}-\\x' |
|
433
|
|
|
|
|
|
|
.'{b43}\\x{b47}-\\x{b48}\\x{b4b}-\\x{b4d}\\x{b56}-\\x{b57}\\x{b82}-\\x{b83}\\x{bbe}-\\x{bc2}\\x{' |
|
434
|
|
|
|
|
|
|
.'bc6}-\\x{bc8}\\x{bca}-\\x{bcd}\\x{bd7}\\x{c01}-\\x{c03}\\x{c3e}-\\x{c44}\\x{c46}-\\x{c48}\\x{c' |
|
435
|
|
|
|
|
|
|
.'4a}-\\x{c4d}\\x{c55}-\\x{c56}\\x{c82}-\\x{c83}\\x{cbe}-\\x{cc4}\\x{cc6}-\\x{cc8}\\x{cca}-\\x{c' |
|
436
|
|
|
|
|
|
|
.'cd}\\x{cd5}-\\x{cd6}\\x{d02}-\\x{d03}\\x{d3e}-\\x{d43}\\x{d46}-\\x{d48}\\x{d4a}-\\x{d4d}\\x{d5' |
|
437
|
|
|
|
|
|
|
.'7}\\x{e31}\\x{e34}-\\x{e3a}\\x{e47}-\\x{e4e}\\x{eb1}\\x{eb4}-\\x{eb9}\\x{ebb}-\\x{ebc}\\x{ec8}' |
|
438
|
|
|
|
|
|
|
.'-\\x{ecd}\\x{f18}-\\x{f19}\\x{f35}\\x{f37}\\x{f39}\\x{f3e}\\x{f3f}\\x{f71}-\\x{f84}\\x{f86}-\\' |
|
439
|
|
|
|
|
|
|
.'x{f8b}\\x{f90}-\\x{f95}\\x{f97}\\x{f99}-\\x{fad}\\x{fb1}-\\x{fb7}\\x{fb9}\\x{20d0}-\\x{20dc}\\' |
|
440
|
|
|
|
|
|
|
.'x{20e1}\\x{302a}-\\x{302f}\\x{3099}\\x{309a}'; |
|
441
|
|
|
|
|
|
|
|
|
442
|
216
|
|
|
|
|
526
|
$rx{Extender} = '\\xb7\\x{2d0}\\x{2d1}\\x{387}\\x{640}\\x{e46}\\x{ec6}\\x{3005}\\x{3031}-\\x{3035}\\x{309d}-\\' |
|
443
|
|
|
|
|
|
|
.'x{309e}\\x{30fc}-\\x{30fe}'; |
|
444
|
|
|
|
|
|
|
|
|
445
|
216
|
|
|
|
|
504
|
$rx{Digit} = '\\x30-\\x39\\x{660}-\\x{669}\\x{6f0}-\\x{6f9}\\x{966}-\\x{96f}\\x{9e6}-\\x{9ef}\\x{a66}-\\x{a' |
|
446
|
|
|
|
|
|
|
.'6f}\\x{ae6}-\\x{aef}\\x{b66}-\\x{b6f}\\x{be7}-\\x{bef}\\x{c66}-\\x{c6f}\\x{ce6}-\\x{cef}\\x{d6' |
|
447
|
|
|
|
|
|
|
.'6}-\\x{d6f}\\x{e50}-\\x{e59}\\x{ed0}-\\x{ed9}\\x{f20}-\\x{f29}'; |
|
448
|
|
|
|
|
|
|
|
|
449
|
216
|
|
|
|
|
759
|
$rx{BaseChar} = '\\x41-\\x5a\\x61-\\x7a\\xc0-\\xd6\\xd8-\\xf6\\xf8-\\xff\\x{100}-\\x{131}\\x{134}-\\x{13e}\\x{' |
|
450
|
|
|
|
|
|
|
.'141}-\\x{148}\\x{14a}-\\x{17e}\\x{180}-\\x{1c3}\\x{1cd}-\\x{1f0}\\x{1f4}-\\x{1f5}\\x{1fa}-\\x{' |
|
451
|
|
|
|
|
|
|
.'217}\\x{250}-\\x{2a8}\\x{2bb}-\\x{2c1}\\x{386}\\x{388}-\\x{38a}\\x{38c}\\x{38e}-\\x{3a1}\\x{3a' |
|
452
|
|
|
|
|
|
|
.'3}-\\x{3ce}\\x{3d0}-\\x{3d6}\\x{3da}\\x{3dc}\\x{3de}\\x{3e0}\\x{3e2}-\\x{3f3}\\x{401}-\\x{40c}' |
|
453
|
|
|
|
|
|
|
.'\\x{40e}-\\x{44f}\\x{451}-\\x{45c}\\x{45e}-\\x{481}\\x{490}-\\x{4c4}\\x{4c7}-\\x{4c8}\\x{4cb}-' |
|
454
|
|
|
|
|
|
|
.'\\x{4cc}\\x{4d0}-\\x{4eb}\\x{4ee}-\\x{4f5}\\x{4f8}-\\x{4f9}\\x{531}-\\x{556}\\x{559}\\x{561}-\\' |
|
455
|
|
|
|
|
|
|
.'x{586}\\x{5d0}-\\x{5ea}\\x{5f0}-\\x{5f2}\\x{621}-\\x{63a}\\x{641}-\\x{64a}\\x{671}-\\x{6b7}\\x' |
|
456
|
|
|
|
|
|
|
.'{6ba}-\\x{6be}\\x{6c0}-\\x{6ce}\\x{6d0}-\\x{6d3}\\x{6d5}\\x{6e5}-\\x{6e6}\\x{905}-\\x{939}\\x{' |
|
457
|
|
|
|
|
|
|
.'93d}\\x{958}-\\x{961}\\x{985}-\\x{98c}\\x{98f}-\\x{990}\\x{993}-\\x{9a8}\\x{9aa}-\\x{9b0}\\x{9' |
|
458
|
|
|
|
|
|
|
.'b2}\\x{9b6}-\\x{9b9}\\x{9dc}-\\x{9dd}\\x{9df}-\\x{9e1}\\x{9f0}-\\x{9f1}\\x{a05}-\\x{a0a}\\x{a0' |
|
459
|
|
|
|
|
|
|
.'f}-\\x{a10}\\x{a13}-\\x{a28}\\x{a2a}-\\x{a30}\\x{a32}-\\x{a33}\\x{a35}-\\x{a36}\\x{a38}-\\x{a3' |
|
460
|
|
|
|
|
|
|
.'9}\\x{a59}-\\x{a5c}\\x{a5e}\\x{a72}-\\x{a74}\\x{a85}-\\x{a8b}\\x{a8d}\\x{a8f}-\\x{a91}\\x{a93}' |
|
461
|
|
|
|
|
|
|
.'-\\x{aa8}\\x{aaa}-\\x{ab0}\\x{ab2}-\\x{ab3}\\x{ab5}-\\x{ab9}\\x{abd}\\x{ae0}\\x{b05}-\\x{b0c}\\' |
|
462
|
|
|
|
|
|
|
.'x{b0f}-\\x{b10}\\x{b13}-\\x{b28}\\x{b2a}-\\x{b30}\\x{b32}-\\x{b33}\\x{b36}-\\x{b39}\\x{b3d}\\x' |
|
463
|
|
|
|
|
|
|
.'{b5c}-\\x{b5d}\\x{b5f}-\\x{b61}\\x{b85}-\\x{b8a}\\x{b8e}-\\x{b90}\\x{b92}-\\x{b95}\\x{b99}-\\x' |
|
464
|
|
|
|
|
|
|
.'{b9a}\\x{b9c}\\x{b9e}-\\x{b9f}\\x{ba3}-\\x{ba4}\\x{ba8}-\\x{baa}\\x{bae}-\\x{bb5}\\x{bb7}-\\x{' |
|
465
|
|
|
|
|
|
|
.'bb9}\\x{c05}-\\x{c0c}\\x{c0e}-\\x{c10}\\x{c12}-\\x{c28}\\x{c2a}-\\x{c33}\\x{c35}-\\x{c39}\\x{c' |
|
466
|
|
|
|
|
|
|
.'60}-\\x{c61}\\x{c85}-\\x{c8c}\\x{c8e}-\\x{c90}\\x{c92}-\\x{ca8}\\x{caa}-\\x{cb3}\\x{cb5}-\\x{c' |
|
467
|
|
|
|
|
|
|
.'b9}\\x{cde}\\x{ce0}-\\x{ce1}\\x{d05}-\\x{d0c}\\x{d0e}-\\x{d10}\\x{d12}-\\x{d28}\\x{d2a}-\\x{d3' |
|
468
|
|
|
|
|
|
|
.'9}\\x{d60}-\\x{d61}\\x{e01}-\\x{e2e}\\x{e30}\\x{e32}-\\x{e33}\\x{e40}-\\x{e45}\\x{e81}-\\x{e82' |
|
469
|
|
|
|
|
|
|
.'}\\x{e84}\\x{e87}-\\x{e88}\\x{e8a}\\x{e8d}\\x{e94}-\\x{e97}\\x{e99}-\\x{e9f}\\x{ea1}-\\x{ea3}\\' |
|
470
|
|
|
|
|
|
|
.'x{ea5}\\x{ea7}\\x{eaa}-\\x{eab}\\x{ead}-\\x{eae}\\x{eb0}\\x{eb2}-\\x{eb3}\\x{ebd}\\x{ec0}-\\x{' |
|
471
|
|
|
|
|
|
|
.'ec4}\\x{f40}-\\x{f47}\\x{f49}-\\x{f69}\\x{10a0}-\\x{10c5}\\x{10d0}-\\x{10f6}\\x{1100}\\x{1102}' |
|
472
|
|
|
|
|
|
|
.'-\\x{1103}\\x{1105}-\\x{1107}\\x{1109}\\x{110b}-\\x{110c}\\x{110e}-\\x{1112}\\x{113c}\\x{113e}' |
|
473
|
|
|
|
|
|
|
.'\\x{1140}\\x{114c}\\x{114e}\\x{1150}\\x{1154}-\\x{1155}\\x{1159}\\x{115f}-\\x{1161}\\x{1163}\\' |
|
474
|
|
|
|
|
|
|
.'x{1165}\\x{1167}\\x{1169}\\x{116d}-\\x{116e}\\x{1172}-\\x{1173}\\x{1175}\\x{119e}\\x{11a8}\\x{' |
|
475
|
|
|
|
|
|
|
.'11ab}\\x{11ae}-\\x{11af}\\x{11b7}-\\x{11b8}\\x{11ba}\\x{11bc}-\\x{11c2}\\x{11eb}\\x{11f0}\\x{1' |
|
476
|
|
|
|
|
|
|
.'1f9}\\x{1e00}-\\x{1e9b}\\x{1ea0}-\\x{1ef9}\\x{1f00}-\\x{1f15}\\x{1f18}-\\x{1f1d}\\x{1f20}-\\x{' |
|
477
|
|
|
|
|
|
|
.'1f45}\\x{1f48}-\\x{1f4d}\\x{1f50}-\\x{1f57}\\x{1f59}\\x{1f5b}\\x{1f5d}\\x{1f5f}-\\x{1f7d}\\x{1' |
|
478
|
|
|
|
|
|
|
.'f80}-\\x{1fb4}\\x{1fb6}-\\x{1fbc}\\x{1fbe}\\x{1fc2}-\\x{1fc4}\\x{1fc6}-\\x{1fcc}\\x{1fd0}-\\x{' |
|
479
|
|
|
|
|
|
|
.'1fd3}\\x{1fd6}-\\x{1fdb}\\x{1fe0}-\\x{1fec}\\x{1ff2}-\\x{1ff4}\\x{1ff6}-\\x{1ffc}\\x{2126}\\x{' |
|
480
|
|
|
|
|
|
|
.'212a}-\\x{212b}\\x{212e}\\x{2180}-\\x{2182}\\x{3041}-\\x{3094}\\x{30a1}-\\x{30fa}\\x{3105}-\\x' |
|
481
|
|
|
|
|
|
|
.'{312c}\\x{ac00}-\\x{d7a3}'; |
|
482
|
|
|
|
|
|
|
|
|
483
|
216
|
|
|
|
|
466
|
$rx{IdeoGraphic} = '\\x{4e00}-\\x{9fa5}\\x{3007}\\x{3021}-\\x{3029}'; |
|
484
|
|
|
|
|
|
|
|
|
485
|
216
|
|
|
|
|
1175
|
$rx{Letter} = $rx{BaseChar} . $rx{IdeoGraphic}; |
|
486
|
|
|
|
|
|
|
|
|
487
|
216
|
|
|
|
|
1566
|
$rx{NCNameChar} = $rx{Letter} . $rx{Digit} . '\\x2e\\x2d\\x5f' . $rx{CombiningChar} . $rx{Extender}; |
|
488
|
|
|
|
|
|
|
|
|
489
|
216
|
|
|
|
|
2168
|
$rx{NCName} = '['.$rx{Letter}.'\\x5f]['.$rx{NCNameChar}.']*'; |
|
490
|
|
|
|
|
|
|
|
|
491
|
216
|
|
|
|
|
2156
|
$rx{QName} = '('.$rx{NCName}.'\\x3a)?'.$rx{NCName}; |
|
492
|
|
|
|
|
|
|
|
|
493
|
216
|
|
|
|
|
493
|
$rx{Digits} = '[0-9]+'; |
|
494
|
216
|
|
|
|
|
420
|
$rx{Number} = '([0-9]+(\\.([0-9]+)?)?)|(\\.[0-9]+)'; |
|
495
|
|
|
|
|
|
|
|
|
496
|
216
|
|
|
|
|
794
|
return \%rx; |
|
497
|
|
|
|
|
|
|
} |
|
498
|
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
1; |