| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# -*- mode: perl; coding: utf-8 -*- |
|
2
|
|
|
|
|
|
|
package YATT::LRXML::Parser; |
|
3
|
7
|
|
|
7
|
|
14281
|
use strict; |
|
|
7
|
|
|
|
|
18
|
|
|
|
7
|
|
|
|
|
260
|
|
|
4
|
7
|
|
|
7
|
|
48
|
use warnings qw(FATAL all NONFATAL misc); |
|
|
7
|
|
|
|
|
17
|
|
|
|
7
|
|
|
|
|
439
|
|
|
5
|
7
|
|
|
7
|
|
35
|
use base qw(YATT::Class::Configurable); |
|
|
7
|
|
|
|
|
16
|
|
|
|
7
|
|
|
|
|
2131
|
|
|
6
|
|
|
|
|
|
|
use YATT::Fields |
|
7
|
7
|
|
|
|
|
126
|
(qw(^tokens |
|
8
|
|
|
|
|
|
|
cf_tree |
|
9
|
|
|
|
|
|
|
metainfo |
|
10
|
|
|
|
|
|
|
nsdict |
|
11
|
|
|
|
|
|
|
nslist |
|
12
|
|
|
|
|
|
|
re_splitter |
|
13
|
|
|
|
|
|
|
re_ns |
|
14
|
|
|
|
|
|
|
re_attlist |
|
15
|
|
|
|
|
|
|
re_entity |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
re_arg_decls |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
elem_kids |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
cf_special_entities |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
cf_untaint |
|
24
|
|
|
|
|
|
|
cf_debug |
|
25
|
|
|
|
|
|
|
cf_registry |
|
26
|
|
|
|
|
|
|
) |
|
27
|
|
|
|
|
|
|
, [cf_html_tags => {input => 1, option => 0 |
|
28
|
|
|
|
|
|
|
, form => 0, textarea => 0, select => 0}] |
|
29
|
|
|
|
|
|
|
, [cf_tokens => qw(comment declarator pi tag entity)] |
|
30
|
7
|
|
|
7
|
|
1504
|
); |
|
|
7
|
|
|
|
|
15
|
|
|
31
|
|
|
|
|
|
|
|
|
32
|
7
|
|
|
7
|
|
40
|
use YATT::Util; |
|
|
7
|
|
|
|
|
14
|
|
|
|
7
|
|
|
|
|
1188
|
|
|
33
|
7
|
|
|
7
|
|
39
|
use YATT::Util::Taint; |
|
|
7
|
|
|
|
|
13
|
|
|
|
7
|
|
|
|
|
696
|
|
|
34
|
7
|
|
|
7
|
|
38
|
use YATT::Util::Symbol qw(fields_hash); |
|
|
7
|
|
|
|
|
16
|
|
|
|
7
|
|
|
|
|
326
|
|
|
35
|
7
|
|
|
7
|
|
781
|
use YATT::LRXML::Node; |
|
|
7
|
|
|
|
|
14
|
|
|
|
7
|
|
|
|
|
1753
|
|
|
36
|
|
|
|
|
|
|
|
|
37
|
7
|
|
|
7
|
|
2106
|
use YATT::LRXML (); |
|
|
7
|
|
|
|
|
19
|
|
|
|
7
|
|
|
|
|
139
|
|
|
38
|
7
|
|
|
7
|
|
2234
|
use YATT::LRXML::MetaInfo (); |
|
|
7
|
|
|
|
|
17
|
|
|
|
7
|
|
|
|
|
46080
|
|
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
sub MetaInfo () { 'YATT::LRXML::MetaInfo' } |
|
41
|
|
|
|
|
|
|
sub Scanner () { 'YATT::LRXML::Scanner' } |
|
42
|
|
|
|
|
|
|
sub Builder () { 'YATT::LRXML::Builder' } |
|
43
|
|
|
|
|
|
|
sub Cursor () { 'YATT::LRXML::NodeCursor' } |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
sub after_configure { |
|
46
|
489
|
|
|
489
|
0
|
808
|
my MY $self = shift; |
|
47
|
489
|
|
|
|
|
1645
|
$self->SUPER::after_configure; |
|
48
|
489
|
|
|
|
|
1988
|
$$self{re_ns} = $self->re_ns(0); |
|
49
|
489
|
|
|
|
|
1505
|
$$self{re_splitter} = $self->re_splitter(1, $$self{re_ns}); |
|
50
|
489
|
|
|
|
|
3080
|
$$self{re_attlist} = $self->re_attlist(2); |
|
51
|
489
|
|
|
|
|
2151
|
$$self{re_arg_decls} = $self->re_arg_decls(1); |
|
52
|
|
|
|
|
|
|
{ |
|
53
|
489
|
|
|
|
|
993
|
my %re_cached = map {$_ => 1} grep {/^re_/} keys %{fields_hash($self)}; |
|
|
489
|
|
|
|
|
915
|
|
|
|
2445
|
|
|
|
|
5126
|
|
|
|
8313
|
|
|
|
|
16966
|
|
|
|
489
|
|
|
|
|
1678
|
|
|
54
|
489
|
|
|
|
|
2111
|
my @token_pat = $self->re_tokens(2); |
|
55
|
489
|
|
|
|
|
1796
|
while (@token_pat) { |
|
56
|
2445
|
|
|
|
|
4841
|
my ($name, $pattern) = splice @token_pat, 0, 2; |
|
57
|
2445
|
|
|
|
|
3780
|
push @{$self->{elem_kids}}, [$name, qr{^$pattern}]; |
|
|
2445
|
|
|
|
|
128599
|
|
|
58
|
2445
|
100
|
|
|
|
13104
|
next unless $re_cached{"re_$name"}; |
|
59
|
489
|
|
|
|
|
3777
|
$self->{"re_$name"} = $pattern; |
|
60
|
|
|
|
|
|
|
} |
|
61
|
|
|
|
|
|
|
} |
|
62
|
|
|
|
|
|
|
} |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
sub configure_namespace { |
|
65
|
4
|
|
|
4
|
0
|
21
|
shift->metainfo->configure(namespace => shift); |
|
66
|
|
|
|
|
|
|
} |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
sub configure_metainfo { |
|
69
|
489
|
|
|
489
|
0
|
818
|
(my MY $self) = shift; |
|
70
|
489
|
50
|
|
|
|
1803
|
if (@_ == 1) { |
|
|
|
100
|
|
|
|
|
|
|
71
|
0
|
|
|
|
|
0
|
$self->{metainfo} = shift; |
|
72
|
|
|
|
|
|
|
} elsif (not $self->{metainfo}) { |
|
73
|
|
|
|
|
|
|
# @_ == 0 || > 1 |
|
74
|
165
|
|
|
|
|
1074
|
$self->{metainfo} = MetaInfo->new(@_); |
|
75
|
|
|
|
|
|
|
} else { |
|
76
|
324
|
|
|
|
|
1195
|
$self->{metainfo}->configure(@_); |
|
77
|
|
|
|
|
|
|
} |
|
78
|
|
|
|
|
|
|
$self->{metainfo} |
|
79
|
489
|
|
|
|
|
1743
|
} |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
sub metainfo { |
|
82
|
739
|
|
|
739
|
0
|
1276
|
(my MY $self) = shift; |
|
83
|
739
|
|
66
|
|
|
3318
|
$self->{metainfo} ||= $self->configure_metainfo; |
|
84
|
|
|
|
|
|
|
} |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
sub parse_handle { |
|
87
|
156
|
|
|
156
|
0
|
433
|
(my MY $self, my ($fh)) = splice @_, 0, 2; |
|
88
|
156
|
|
|
|
|
432
|
$self->configure_metainfo(@_); |
|
89
|
156
|
|
|
|
|
406
|
$self->after_configure; |
|
90
|
156
|
50
|
|
|
|
805
|
if (my $layer = $self->{metainfo}->cget('iolayer')) { |
|
91
|
0
|
|
|
|
|
0
|
binmode $fh, $layer; |
|
92
|
|
|
|
|
|
|
} |
|
93
|
156
|
|
|
|
|
286
|
my $scan = $self->tokenize(do { |
|
94
|
156
|
|
|
|
|
693
|
local $/; |
|
95
|
156
|
|
|
|
|
4038
|
my $data = <$fh>; |
|
96
|
156
|
50
|
|
|
|
1116
|
$self->{cf_untaint} ? untaint_any($data) : $data; |
|
97
|
|
|
|
|
|
|
}); |
|
98
|
156
|
|
|
|
|
622
|
$self->organize($scan); |
|
99
|
|
|
|
|
|
|
} |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
sub parse_string { |
|
102
|
12
|
|
|
12
|
0
|
109
|
my MY $self = shift; |
|
103
|
12
|
|
|
|
|
49
|
$self->configure_metainfo(splice @_, 1); |
|
104
|
12
|
|
|
|
|
40
|
$self->after_configure; |
|
105
|
12
|
|
|
|
|
54
|
my $scan = $self->tokenize($_[0]); |
|
106
|
12
|
|
|
|
|
54
|
$self->organize($scan); |
|
107
|
|
|
|
|
|
|
# $self->{cf_document}->set_tokens($self->{tokens}); |
|
108
|
|
|
|
|
|
|
# $self->{cf_document}->set_tree($tree); |
|
109
|
|
|
|
|
|
|
} |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
#======================================== |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
sub scanner { |
|
114
|
170
|
|
|
170
|
0
|
401
|
(my MY $self) = @_; |
|
115
|
|
|
|
|
|
|
$self->Scanner->new(array => $self->{tokens}, index => 0 |
|
116
|
|
|
|
|
|
|
, linenum => 1 |
|
117
|
170
|
|
|
|
|
1731
|
, metainfo => $self->{metainfo}); |
|
118
|
|
|
|
|
|
|
} |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
sub tree { |
|
121
|
166
|
|
|
166
|
0
|
258
|
my MY $self = shift; |
|
122
|
|
|
|
|
|
|
my $cursor = $self->call_type(Cursor => new => $self->{cf_tree} |
|
123
|
166
|
|
|
|
|
863
|
, metainfo => $self->{metainfo}); |
|
124
|
|
|
|
|
|
|
#$cursor->configure(path => $self->Cursor->Path->new($self->{cf_tree})); |
|
125
|
166
|
|
|
|
|
975
|
$cursor; |
|
126
|
|
|
|
|
|
|
} |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
sub new_root_builder { |
|
129
|
168
|
|
|
168
|
0
|
302
|
(my MY $self, my Scanner $scan) = @_; |
|
130
|
168
|
100
|
|
|
|
603
|
if (my $reg = $self->{cf_registry}) { |
|
131
|
156
|
|
|
|
|
795
|
$reg->new_root_builder($self, $scan); |
|
132
|
|
|
|
|
|
|
} else { |
|
133
|
|
|
|
|
|
|
require_and($self->Builder |
|
134
|
|
|
|
|
|
|
, new => $self->{cf_tree} = $self->create_node('root') |
|
135
|
|
|
|
|
|
|
, undef |
|
136
|
|
|
|
|
|
|
, startpos => 0 |
|
137
|
|
|
|
|
|
|
, startline => $scan->{cf_linenum} |
|
138
|
12
|
|
|
|
|
79
|
, linenum => $scan->{cf_linenum}); |
|
139
|
|
|
|
|
|
|
} |
|
140
|
|
|
|
|
|
|
} |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
sub organize { |
|
143
|
168
|
|
|
168
|
0
|
325
|
(my MY $self, my Scanner $scan) = @_; |
|
144
|
168
|
|
|
|
|
566
|
my $builder = $self->new_root_builder($scan); |
|
145
|
168
|
|
|
|
|
973
|
while ($scan->readable) { |
|
146
|
789
|
|
|
|
|
2539
|
my $text = $scan->read; |
|
147
|
789
|
100
|
|
|
|
3088
|
$builder->add($scan, $text) if $text ne ''; |
|
148
|
789
|
100
|
|
|
|
2047
|
last unless $scan->readable; |
|
149
|
635
|
|
|
|
|
2312
|
my ($toktype, @match) = $scan->expect($self->{elem_kids}); |
|
150
|
635
|
50
|
|
|
|
1612
|
unless (defined $toktype) { |
|
151
|
0
|
|
|
|
|
0
|
$self->build_scanned($builder, $scan |
|
152
|
|
|
|
|
|
|
, unknown => undef, $scan->read); |
|
153
|
0
|
|
|
|
|
0
|
next; |
|
154
|
|
|
|
|
|
|
} |
|
155
|
|
|
|
|
|
|
|
|
156
|
635
|
100
|
|
|
|
3056
|
if (my $sub = $self->can("build_$toktype")) { |
|
157
|
|
|
|
|
|
|
# declarator も complex 扱いにした方が良いね。 |
|
158
|
633
|
|
|
|
|
1660
|
$builder = $sub->($self, $scan, $builder, \@match); |
|
159
|
|
|
|
|
|
|
} else { |
|
160
|
|
|
|
|
|
|
# easy case. |
|
161
|
2
|
|
|
|
|
6
|
my ($ns, $body) = @match; |
|
162
|
2
|
|
|
|
|
6
|
$self->build_scanned($builder, $scan |
|
163
|
|
|
|
|
|
|
, $toktype => $ns, $body); |
|
164
|
|
|
|
|
|
|
} |
|
165
|
|
|
|
|
|
|
} |
|
166
|
163
|
50
|
66
|
|
|
563
|
if ($builder->{cf_endtag} and $builder->{parent}) { |
|
167
|
|
|
|
|
|
|
die "Missing close tag '$builder->{cf_endtag}'" |
|
168
|
|
|
|
|
|
|
." at line $builder->{cf_startline}" |
|
169
|
2
|
|
|
|
|
19
|
.$scan->{cf_metainfo}->in_file." \n"; |
|
170
|
|
|
|
|
|
|
} |
|
171
|
|
|
|
|
|
|
|
|
172
|
161
|
50
|
|
|
|
339
|
if (wantarray) { |
|
173
|
0
|
|
|
|
|
0
|
($self->tree, $self->{metainfo}); |
|
174
|
|
|
|
|
|
|
} else { |
|
175
|
161
|
|
|
|
|
576
|
$self->tree; |
|
176
|
|
|
|
|
|
|
} |
|
177
|
|
|
|
|
|
|
} |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
sub build_scanned { |
|
180
|
193
|
|
|
193
|
0
|
429
|
(my MY $self, my Builder $builder, my Scanner $scan) = splice @_, 0, 3; |
|
181
|
193
|
|
|
|
|
621
|
my $node = $self->create_node(@_); |
|
182
|
193
|
|
|
|
|
738
|
node_set_nlines($node, $scan->{cf_last_nol}); |
|
183
|
193
|
|
|
|
|
607
|
$builder->add($scan, $node); |
|
184
|
|
|
|
|
|
|
} |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
sub build_pi { |
|
187
|
19
|
|
|
19
|
0
|
37
|
(my MY $self, my Scanner $scan, my Builder $builder, my ($match)) = @_; |
|
188
|
19
|
|
|
|
|
83
|
$self->build_scanned($builder, $scan |
|
189
|
|
|
|
|
|
|
, pi => $match->[0] |
|
190
|
|
|
|
|
|
|
, $self->parse_entities($match->[1])); |
|
191
|
19
|
|
|
|
|
95
|
$builder; |
|
192
|
|
|
|
|
|
|
} |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
sub build_entity { |
|
195
|
172
|
|
|
172
|
0
|
345
|
(my MY $self, my Scanner $scan, my Builder $builder, my ($match)) = @_; |
|
196
|
172
|
|
|
|
|
571
|
$self->build_scanned($builder, $scan |
|
197
|
|
|
|
|
|
|
, entity => $self->parse_entpath($match->[0])); |
|
198
|
172
|
|
|
|
|
829
|
$builder; |
|
199
|
|
|
|
|
|
|
} |
|
200
|
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
sub build_tag { |
|
202
|
255
|
|
|
255
|
0
|
962
|
(my MY $self, my Scanner $scan, my Builder $builder, my ($match)) = @_; |
|
203
|
255
|
|
|
|
|
727
|
my ($close, $html, $ns, $tagname, $attlist, $is_ee) = @$match; |
|
204
|
255
|
|
66
|
|
|
620
|
$tagname ||= $html; |
|
205
|
|
|
|
|
|
|
|
|
206
|
255
|
100
|
|
|
|
637
|
if ($close) { |
|
207
|
64
|
|
|
|
|
276
|
$builder->verify_close($tagname, $scan); |
|
208
|
|
|
|
|
|
|
# そうか、ここで attribute element からの脱出もせにゃならん。 |
|
209
|
|
|
|
|
|
|
# switched product 方式なら、parent は共通、かな? |
|
210
|
63
|
|
|
|
|
219
|
return $builder->parent; |
|
211
|
|
|
|
|
|
|
} |
|
212
|
|
|
|
|
|
|
|
|
213
|
191
|
|
|
|
|
347
|
my ($is_att, $nodetype, $qflag) = do { |
|
214
|
191
|
100
|
100
|
|
|
1137
|
if (defined $ns and $ns =~ s/^:(?=\w)//) { |
|
215
|
21
|
|
|
|
|
126
|
(1, attribute => YATT::LRXML::Node->quoted_by_element($is_ee)); |
|
216
|
|
|
|
|
|
|
} else { |
|
217
|
170
|
|
|
|
|
256
|
my $type = do { |
|
218
|
170
|
100
|
|
|
|
362
|
if (defined $html) { |
|
219
|
18
|
|
|
|
|
68
|
$is_ee = $self->{cf_html_tags}{lc($html)}; |
|
220
|
18
|
|
|
|
|
45
|
'html'; |
|
221
|
|
|
|
|
|
|
} else { |
|
222
|
152
|
|
|
|
|
342
|
'element' |
|
223
|
|
|
|
|
|
|
} |
|
224
|
|
|
|
|
|
|
}; |
|
225
|
170
|
100
|
|
|
|
759
|
(0, $type => $is_ee ? EMPTY_ELEMENT : 0); |
|
226
|
|
|
|
|
|
|
} |
|
227
|
|
|
|
|
|
|
}; |
|
228
|
|
|
|
|
|
|
|
|
229
|
191
|
100
|
|
|
|
1481
|
my $element = $self->create_node([$nodetype, $qflag] |
|
230
|
|
|
|
|
|
|
, $html |
|
231
|
|
|
|
|
|
|
? $html |
|
232
|
|
|
|
|
|
|
: [$ns, split /[:\.]/, $tagname]); |
|
233
|
191
|
|
|
|
|
854
|
$self->parse_attlist($attlist, $element); |
|
234
|
|
|
|
|
|
|
|
|
235
|
191
|
100
|
|
|
|
632
|
unless ($is_ee) { |
|
|
|
100
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
# ..., <:yatt:attr>... |
|
237
|
67
|
|
|
|
|
296
|
$builder->add($scan, $element)->open($element, endtag => $tagname); |
|
238
|
|
|
|
|
|
|
} elsif ($is_att) { |
|
239
|
|
|
|
|
|
|
# <:yatt:attr />... |
|
240
|
16
|
|
|
|
|
69
|
$builder->switch($element); |
|
241
|
|
|
|
|
|
|
} else { |
|
242
|
|
|
|
|
|
|
# |
|
243
|
108
|
|
|
|
|
473
|
node_set_nlines($element, $scan->{cf_last_nol}); |
|
244
|
108
|
|
|
|
|
365
|
$builder->add($scan, $element); |
|
245
|
|
|
|
|
|
|
} |
|
246
|
|
|
|
|
|
|
} |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
#======================================== |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
sub build_declarator { |
|
251
|
187
|
|
|
187
|
0
|
461
|
(my MY $self, my Scanner $scan, my Builder $builder, my ($match)) = @_; |
|
252
|
187
|
|
|
|
|
476
|
my ($ns, $tagname, $attlist) = @$match; |
|
253
|
|
|
|
|
|
|
|
|
254
|
187
|
|
|
|
|
876
|
my $element = $self->create_node(declarator => |
|
255
|
|
|
|
|
|
|
[$ns, $tagname]); |
|
256
|
187
|
|
|
|
|
714
|
push @$element, $self->parse_arg_decls(\$attlist); |
|
257
|
187
|
|
|
|
|
799
|
node_set_nlines($element, $scan->{cf_last_nol}); |
|
258
|
187
|
100
|
|
|
|
571
|
if (my $reg = $self->{cf_registry}) { |
|
259
|
183
|
|
|
|
|
944
|
$reg->new_decl_builder($builder, $scan, $element, $self); |
|
260
|
|
|
|
|
|
|
} else { |
|
261
|
4
|
|
|
|
|
16
|
$builder->add($scan, $element); |
|
262
|
|
|
|
|
|
|
} |
|
263
|
|
|
|
|
|
|
} |
|
264
|
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
sub re_arg_decls { |
|
266
|
491
|
|
|
491
|
0
|
2557
|
(my MY $self, my ($capture)) = @_; |
|
267
|
491
|
50
|
|
|
|
1180
|
die "re_arg_decls(capture=0) is not yet implemented!" unless $capture; |
|
268
|
491
|
|
|
|
|
1207
|
my ($SQ, $DQ) = ($self->re_sqv(2), $self->re_dqv(2)); |
|
269
|
491
|
|
|
|
|
1309
|
my $BARE = qr{([^=\-\'\"\s<>/\[\]%]+ | /(?!>))}x; |
|
270
|
491
|
|
|
|
|
1374
|
my $ENT = qr{%([\w\:\.]+(?:[\w:\.\-=\[\]\{\}\(,\)]+)?);}x; |
|
271
|
491
|
|
|
|
|
5202
|
qr{^ \s* -- (.*?) -- # 1 |
|
272
|
|
|
|
|
|
|
|^ \s* $ENT # 2 |
|
273
|
|
|
|
|
|
|
|^ \s* (\]) # 3 |
|
274
|
|
|
|
|
|
|
|^ \s+ |
|
275
|
|
|
|
|
|
|
(?: (\w+)\s*=\s*)? # 4 |
|
276
|
|
|
|
|
|
|
(?: $SQ # 5 |
|
277
|
|
|
|
|
|
|
| $DQ # 6 |
|
278
|
|
|
|
|
|
|
| $BARE # 7 |
|
279
|
|
|
|
|
|
|
| (\[)(?:\s* (\w+(?:\:\w+)*)) # 8, 9 |
|
280
|
|
|
|
|
|
|
) |
|
281
|
|
|
|
|
|
|
}xs; |
|
282
|
|
|
|
|
|
|
# '[ word' を一括で取り出すのは、次に ^\s+ を残しておくため. |
|
283
|
|
|
|
|
|
|
} |
|
284
|
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
sub re_decl_entity { |
|
286
|
3
|
|
|
3
|
0
|
17
|
(my MY $self, my ($capture)) = @_; |
|
287
|
3
|
|
|
|
|
12
|
qr{%([\w\:\.]+(?:[\w:\.\-=\[\]\{\}\(,\)]+)?);}x; |
|
288
|
|
|
|
|
|
|
} |
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
sub parse_arg_decls { |
|
291
|
207
|
|
|
207
|
0
|
467
|
(my MY $self, my ($strref)) = @_; |
|
292
|
207
|
|
|
|
|
288
|
my @args; |
|
293
|
207
|
|
|
|
|
2164
|
while ($$strref =~ s{$$self{re_arg_decls}}{}x) { |
|
294
|
|
|
|
|
|
|
print STDERR "parse_arg_decls: ", join("|", map { |
|
295
|
0
|
0
|
|
|
|
0
|
defined $_ ? $_ : "(null)" |
|
296
|
|
|
|
|
|
|
} $& |
|
297
|
|
|
|
|
|
|
, $1 # comment |
|
298
|
|
|
|
|
|
|
, $2 # ENT |
|
299
|
|
|
|
|
|
|
, $3 # ] |
|
300
|
|
|
|
|
|
|
, $4 # name |
|
301
|
|
|
|
|
|
|
, $5 # '..' |
|
302
|
|
|
|
|
|
|
, $6 # ".." |
|
303
|
|
|
|
|
|
|
, $7 # bare |
|
304
|
|
|
|
|
|
|
, $8 # [ |
|
305
|
|
|
|
|
|
|
, $9 # leader |
|
306
|
429
|
50
|
|
|
|
1332
|
), "\n" if $self->{cf_debug}; |
|
307
|
429
|
100
|
|
|
|
2082
|
if (defined $1) { # comment |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
308
|
3
|
|
|
|
|
14
|
push @args, $self->create_node(decl_comment => undef, $1); |
|
309
|
|
|
|
|
|
|
} elsif (defined $2) { # ENT |
|
310
|
52
|
|
|
|
|
225
|
push @args |
|
311
|
|
|
|
|
|
|
, $self->create_node([entity => 1] => $self->parse_entpath($2)); |
|
312
|
|
|
|
|
|
|
} elsif (defined $3) { # ] |
|
313
|
20
|
|
|
|
|
45
|
last; |
|
314
|
|
|
|
|
|
|
} else { |
|
315
|
|
|
|
|
|
|
# $4 # name |
|
316
|
|
|
|
|
|
|
# $5 # '..' |
|
317
|
|
|
|
|
|
|
# $6 # ".." |
|
318
|
|
|
|
|
|
|
# $7 # bare |
|
319
|
|
|
|
|
|
|
# $8 # ] |
|
320
|
354
|
100
|
|
|
|
873
|
if (defined $8) { # [ |
|
321
|
|
|
|
|
|
|
# XXX: hard coded. |
|
322
|
20
|
|
|
|
|
98
|
push @args, my $nest = $self->create_node([attribute => 3], $4, $9); |
|
323
|
20
|
|
|
|
|
95
|
push @$nest, $self->parse_arg_decls($strref); |
|
324
|
|
|
|
|
|
|
} else { |
|
325
|
|
|
|
|
|
|
# XXX: dummy. |
|
326
|
334
|
|
|
|
|
1284
|
push @args, $self->create_attlist('', $4, '=', $5, $6, $7); |
|
327
|
|
|
|
|
|
|
} |
|
328
|
|
|
|
|
|
|
} |
|
329
|
|
|
|
|
|
|
} |
|
330
|
207
|
50
|
|
|
|
659
|
print STDERR "REST<$$strref>\n" if $self->{cf_debug}; |
|
331
|
207
|
|
|
|
|
702
|
@args; |
|
332
|
|
|
|
|
|
|
} |
|
333
|
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
#======================================== |
|
335
|
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
sub parse_attlist { |
|
337
|
191
|
|
|
191
|
0
|
259
|
my MY $self = shift; |
|
338
|
191
|
|
|
|
|
283
|
my $result = $_[1]; # Yes. this *is* intentional. |
|
339
|
|
|
|
|
|
|
# XXX: タグ内改行がここでカウントされなくなる。 |
|
340
|
191
|
100
|
66
|
|
|
2433
|
if (defined $_[0] and my @match = $_[0] =~ m{$$self{re_attlist}}g) { |
|
341
|
112
|
|
|
|
|
475
|
push @$result, $self->create_attlist(@match); |
|
342
|
|
|
|
|
|
|
} |
|
343
|
191
|
|
|
|
|
485
|
$result; |
|
344
|
|
|
|
|
|
|
} |
|
345
|
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
sub parse_entities { |
|
347
|
264
|
|
|
264
|
0
|
431
|
my MY $self = shift; |
|
348
|
|
|
|
|
|
|
# XXX: 行番号情報を受け取れた方が、嬉しいのだが… |
|
349
|
264
|
50
|
|
|
|
616
|
return undef unless defined $_[0]; # make sure single scalar is returned. |
|
350
|
264
|
50
|
|
|
|
695
|
return '' if $_[0] eq ''; |
|
351
|
264
|
50
|
|
|
|
664
|
return $_[0] unless defined $$self{re_entity}; |
|
352
|
264
|
|
|
|
|
1767
|
my @tokens = split $$self{re_entity}, $_[0]; |
|
353
|
264
|
100
|
|
|
|
1401
|
return $tokens[0] if @tokens == 1; |
|
354
|
58
|
|
|
|
|
109
|
my @result; |
|
355
|
58
|
|
|
|
|
206
|
for (my $i = 0; $i < @tokens; $i += 2) { |
|
356
|
91
|
100
|
|
|
|
285
|
push @result, $tokens[$i] if $tokens[$i] ne ""; |
|
357
|
91
|
100
|
|
|
|
419
|
push @result |
|
358
|
|
|
|
|
|
|
, $self->create_node(entity => $self->parse_entpath($tokens[$i+1])) |
|
359
|
|
|
|
|
|
|
if $i+1 < @tokens; |
|
360
|
|
|
|
|
|
|
} |
|
361
|
58
|
100
|
|
|
|
137
|
if (wantarray) { |
|
|
|
100
|
|
|
|
|
|
|
362
|
52
|
|
|
|
|
278
|
@result; |
|
363
|
|
|
|
|
|
|
} elsif (@result > 1) { |
|
364
|
5
|
|
|
|
|
40
|
[TEXT_TYPE, undef, @result]; |
|
365
|
|
|
|
|
|
|
} else { |
|
366
|
1
|
|
|
|
|
8
|
$result[0]; |
|
367
|
|
|
|
|
|
|
} |
|
368
|
|
|
|
|
|
|
} |
|
369
|
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
sub parse_entpath { |
|
371
|
291
|
|
|
291
|
0
|
594
|
(my MY $self, my ($entpath)) = @_; |
|
372
|
291
|
|
|
|
|
417
|
my @name; |
|
373
|
291
|
|
|
|
|
3521
|
push @name, $1 while $entpath =~ s{^[\.\:]?(\w+)(?=[\.\:]|$)}{}; |
|
374
|
|
|
|
|
|
|
# :func(), array[], hash{} is stored in node_body. |
|
375
|
|
|
|
|
|
|
# In &SA(); case, node_name is undef. |
|
376
|
291
|
100
|
|
|
|
1893
|
(@name ? \@name : undef |
|
|
|
100
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
, $entpath eq "" ? () : $entpath); |
|
378
|
|
|
|
|
|
|
} |
|
379
|
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
#======================================== |
|
381
|
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
sub tokenize { |
|
383
|
168
|
|
|
168
|
0
|
334
|
my MY $self = shift; |
|
384
|
168
|
|
|
|
|
6649
|
$self->{tokens} = [split $$self{re_splitter}, $_[0]]; |
|
385
|
168
|
50
|
|
|
|
810
|
if (my MetaInfo $meta = $self->{metainfo}) { |
|
386
|
|
|
|
|
|
|
# $meta->{tokens} = $self->{tokens}; |
|
387
|
|
|
|
|
|
|
} |
|
388
|
168
|
|
|
|
|
640
|
$self->scanner; |
|
389
|
|
|
|
|
|
|
} |
|
390
|
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
sub token_patterns { |
|
392
|
1981
|
|
|
1981
|
0
|
3383
|
my ($self, $token_types, $capture, $ns) = @_; |
|
393
|
1981
|
|
|
|
|
2964
|
my $wantarray = wantarray; |
|
394
|
1981
|
|
|
|
|
2331
|
my @result; |
|
395
|
1981
|
|
|
|
|
3554
|
foreach my $type (@$token_types) { |
|
396
|
6920
|
|
|
|
|
12010
|
my $meth = "re_$type"; |
|
397
|
6920
|
100
|
|
|
|
21870
|
push @result |
|
398
|
|
|
|
|
|
|
, $wantarray ? $type : () |
|
399
|
|
|
|
|
|
|
, $self->$meth($capture, $ns); |
|
400
|
|
|
|
|
|
|
} |
|
401
|
1981
|
100
|
|
|
|
6384
|
return @result if $wantarray; |
|
402
|
1492
|
|
|
|
|
3979
|
my $pattern = join "\n | ", @result; |
|
403
|
1492
|
|
|
|
|
200027
|
qr{$pattern}x; |
|
404
|
|
|
|
|
|
|
} |
|
405
|
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
#---------------------------------------- |
|
407
|
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
sub re_splitter { |
|
409
|
494
|
|
|
494
|
0
|
1955
|
(my MY $self, my ($capture, $ns)) = @_; |
|
410
|
494
|
|
|
|
|
1394
|
my $body = $self->re_tokens(0, $ns); |
|
411
|
494
|
100
|
|
|
|
80078
|
$capture ? qr{($body)} : $body; |
|
412
|
|
|
|
|
|
|
} |
|
413
|
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
sub re_tokens { |
|
415
|
986
|
|
|
986
|
0
|
1870
|
(my MY $self, my ($capture, $ns)) = @_; |
|
416
|
986
|
|
|
|
|
2850
|
$self->token_patterns($self->{cf_tokens}, $capture, $ns); |
|
417
|
|
|
|
|
|
|
} |
|
418
|
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
# |
|
420
|
|
|
|
|
|
|
# re_tag(2) returns [ /, specialtag, ns, tag, attlist, / ] |
|
421
|
|
|
|
|
|
|
# |
|
422
|
|
|
|
|
|
|
sub re_tag { |
|
423
|
995
|
|
|
995
|
0
|
2201
|
(my MY $self, my ($capture, $ns)) = @_; |
|
424
|
995
|
|
|
|
|
3974
|
my $namepat = $self->token_patterns([qw(tagname_html tagname_qualified)] |
|
425
|
|
|
|
|
|
|
, $capture, $ns); |
|
426
|
995
|
|
|
|
|
3454
|
my $attlist = $self->re_attlist; |
|
427
|
995
|
100
|
100
|
|
|
4506
|
if (defined $capture and $capture > 1) { |
|
428
|
493
|
|
|
|
|
44070
|
qr{<(/)? (?: $namepat) ($attlist*) \s*(/)?>}xs; |
|
429
|
|
|
|
|
|
|
} else { |
|
430
|
502
|
|
|
|
|
43164
|
my $re = qr{? $namepat $attlist* \s*/?>}xs; |
|
431
|
502
|
100
|
|
|
|
2834
|
$capture ? qr{($re)} : $re; |
|
432
|
|
|
|
|
|
|
} |
|
433
|
|
|
|
|
|
|
} |
|
434
|
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
#---------------------------------------- |
|
436
|
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
sub re_name { |
|
438
|
3
|
|
|
3
|
0
|
16
|
my ($self, $capture) = @_; |
|
439
|
3
|
|
|
|
|
6
|
my $body = q{[\w\-\.]+}; |
|
440
|
3
|
100
|
|
|
|
54
|
$capture ? qr{($body)} : qr{$body}; |
|
441
|
|
|
|
|
|
|
} |
|
442
|
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
sub re_ns { |
|
444
|
579
|
|
|
579
|
0
|
1086
|
my ($self, $capture, $nslist, $additional) = @_; |
|
445
|
579
|
50
|
|
|
|
1327
|
die "re_ns capture is not yet implemented" if $capture; |
|
446
|
579
|
|
33
|
|
|
1632
|
$nslist ||= $self->{nslist} = do { |
|
447
|
579
|
|
|
|
|
1414
|
my $meta = $self->metainfo; |
|
448
|
579
|
|
|
|
|
1833
|
$self->{nsdict} = $meta->nsdict; |
|
449
|
579
|
|
|
|
|
1966
|
$meta->cget('namespace'); |
|
450
|
|
|
|
|
|
|
}; |
|
451
|
579
|
50
|
|
|
|
1649
|
unless (@$nslist) { |
|
452
|
0
|
|
|
|
|
0
|
''; |
|
453
|
|
|
|
|
|
|
} else { |
|
454
|
579
|
50
|
|
|
|
1672
|
my $pattern = join "|", map {ref $_ ? @$_ : $_} @$nslist |
|
|
1053
|
50
|
|
|
|
3346
|
|
|
|
|
100
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
, !$additional ? () : ref $additional ? @$additional : $additional; |
|
456
|
579
|
|
|
|
|
2427
|
qq{(?:$pattern)}; |
|
457
|
|
|
|
|
|
|
} |
|
458
|
|
|
|
|
|
|
} |
|
459
|
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
sub re_nsname { |
|
461
|
3505
|
|
|
3505
|
0
|
5375
|
my ($self, $capture) = @_; |
|
462
|
3505
|
|
|
|
|
4567
|
my $body = q{[\w\-\.:]+}; |
|
463
|
3505
|
100
|
|
|
|
14942
|
$capture ? qr{($body)} : qr{$body}; |
|
464
|
|
|
|
|
|
|
} |
|
465
|
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
sub re_tagname_qualified { |
|
467
|
1989
|
|
|
1989
|
0
|
3013
|
my ($self, $capture, $ns) = @_; |
|
468
|
1989
|
100
|
|
|
|
4733
|
$ns = $$self{re_ns} unless defined $ns; |
|
469
|
1989
|
|
|
|
|
3715
|
my $name = $self->re_nsname; |
|
470
|
1989
|
100
|
100
|
|
|
8747
|
if (defined $capture and $capture > 1) { |
|
471
|
985
|
|
|
|
|
19269
|
qr{ ( :?$ns) : ($name) }xs; |
|
472
|
|
|
|
|
|
|
} else { |
|
473
|
1004
|
|
|
|
|
2430
|
my $re = qq{ :?$ns : $name }; |
|
474
|
1004
|
100
|
|
|
|
20364
|
$capture ? qr{($re)}xs : qr{$re}xs; |
|
475
|
|
|
|
|
|
|
} |
|
476
|
|
|
|
|
|
|
} |
|
477
|
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
sub re_tagname_html { |
|
479
|
998
|
|
|
998
|
0
|
1916
|
(my MY $self, my ($capture, $ns)) = @_; |
|
480
|
998
|
|
|
|
|
1533
|
my $body = join "|", keys %{$self->{cf_html_tags}}; |
|
|
998
|
|
|
|
|
3980
|
|
|
481
|
998
|
100
|
|
|
|
6830
|
$capture ? qr{($body)}i : qr{$body}i; |
|
482
|
|
|
|
|
|
|
} |
|
483
|
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
#---------------------------------------- |
|
485
|
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
sub re_attlist { |
|
487
|
1491
|
|
|
1491
|
0
|
4188
|
my ($self, $capture) = @_; |
|
488
|
1491
|
|
|
|
|
3182
|
my $name = $self->re_nsname; |
|
489
|
1491
|
|
|
|
|
3620
|
my $value = $self->re_attvalue($capture); |
|
490
|
1491
|
|
|
|
|
5466
|
my $sp = q{\s+}; |
|
491
|
1491
|
|
|
|
|
1971
|
my $eq = q{\s* = \s*}; |
|
492
|
1491
|
100
|
100
|
|
|
5847
|
if (defined $capture and $capture > 1) { |
|
493
|
494
|
|
|
|
|
5947
|
qr{($sp|\b) (?:($name) ($eq))? $value}xs; |
|
494
|
|
|
|
|
|
|
} else { |
|
495
|
997
|
|
|
|
|
8493
|
my $re = qr{(?:$sp|\b) (?:$name $eq)? $value}xs; |
|
496
|
997
|
100
|
|
|
|
5153
|
$capture ? qr{($re)} : $re; |
|
497
|
|
|
|
|
|
|
} |
|
498
|
|
|
|
|
|
|
} |
|
499
|
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
sub re_attvalue { |
|
501
|
1494
|
|
|
1494
|
0
|
2590
|
my ($self, $capture) = @_; |
|
502
|
1494
|
|
|
|
|
3008
|
my ($SQ, $DQ, $NQ) = |
|
503
|
|
|
|
|
|
|
($self->re_sqv($capture), |
|
504
|
|
|
|
|
|
|
$self->re_dqv($capture), |
|
505
|
|
|
|
|
|
|
$self->re_bare($capture)); |
|
506
|
1494
|
|
|
|
|
65960
|
qr{$SQ | $DQ | $NQ}xs; |
|
507
|
|
|
|
|
|
|
} |
|
508
|
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
sub re_sqv { |
|
510
|
1988
|
|
|
1988
|
0
|
2679
|
my ($self, $capture) = @_; |
|
511
|
1988
|
|
|
|
|
5002
|
my $body = qr{(?: [^\'\\]+ | \\.)*}x; |
|
512
|
1988
|
100
|
|
|
|
7143
|
$body = qr{($body)} if $capture; |
|
513
|
1988
|
|
|
|
|
29501
|
qr{\'$body\'}s; |
|
514
|
|
|
|
|
|
|
} |
|
515
|
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
sub re_dqv { |
|
517
|
1988
|
|
|
1988
|
0
|
3241
|
my ($self, $capture) = @_; |
|
518
|
1988
|
|
|
|
|
4808
|
my $body = qr{(?: [^\"\\]+ | \\.)*}x; |
|
519
|
1988
|
100
|
|
|
|
6920
|
$body = qr{($body)} if $capture; |
|
520
|
1988
|
|
|
|
|
26608
|
qr{\"$body\"}s; |
|
521
|
|
|
|
|
|
|
} |
|
522
|
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
sub re_bare; |
|
524
|
|
|
|
|
|
|
*re_bare = \&re_bare_torelant; |
|
525
|
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
sub re_bare_strict { |
|
527
|
3
|
|
|
3
|
0
|
18
|
shift->re_nsname(@_); |
|
528
|
|
|
|
|
|
|
} |
|
529
|
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
sub re_bare_torelant { |
|
531
|
1500
|
|
|
1500
|
0
|
2306
|
my ($self, $capture) = @_; |
|
532
|
1500
|
|
|
|
|
3598
|
my $body = qr{[^\'\"\s<>/]+ | /(?!>)}x; |
|
533
|
1500
|
100
|
|
|
|
8714
|
$capture ? qr{($body+)} : qr{$body+}; |
|
534
|
|
|
|
|
|
|
} |
|
535
|
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
sub strip_bs { |
|
537
|
0
|
|
|
0
|
0
|
0
|
shift; |
|
538
|
0
|
|
|
|
|
0
|
$_[0] =~ s/\\(\.)/$1/g; |
|
539
|
0
|
|
|
|
|
0
|
$_[0]; |
|
540
|
|
|
|
|
|
|
} |
|
541
|
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
#---------------------------------------- |
|
543
|
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
sub re_declarator { |
|
545
|
991
|
|
|
991
|
0
|
1770
|
my ($self, $capture, $ns) = @_; |
|
546
|
991
|
|
|
|
|
2251
|
my $namepat = $self->re_tagname_qualified($capture, $ns); |
|
547
|
991
|
|
|
|
|
2096
|
my $arg_decls = q{[^>]}; |
|
548
|
|
|
|
|
|
|
# $self->re_arg_decls(0); |
|
549
|
|
|
|
|
|
|
# print "<<$arg_decls>>\n"; |
|
550
|
991
|
100
|
100
|
|
|
4505
|
if (defined $capture and $capture > 1) { |
|
551
|
491
|
|
|
|
|
17065
|
qr{}xs; |
|
552
|
|
|
|
|
|
|
} else { |
|
553
|
500
|
|
|
|
|
16266
|
my $re = qr{}xs; |
|
554
|
500
|
100
|
|
|
|
2632
|
$capture ? qr{($re)} : $re; |
|
555
|
|
|
|
|
|
|
} |
|
556
|
|
|
|
|
|
|
} |
|
557
|
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
sub re_comment { |
|
559
|
991
|
|
|
991
|
0
|
1523
|
my ($self, $capture, $ns) = @_; |
|
560
|
991
|
|
|
|
|
2344
|
$ns = $self->re_prefix($capture, $ns, '#'); |
|
561
|
991
|
100
|
|
|
|
20282
|
$capture ? qr{}s : qr{}s; |
|
562
|
|
|
|
|
|
|
} |
|
563
|
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
sub re_pi { |
|
565
|
991
|
|
|
991
|
0
|
1718
|
my ($self, $capture, $ns) = @_; |
|
566
|
991
|
|
|
|
|
2099
|
$ns = $self->re_prefix($capture, $ns); |
|
567
|
991
|
100
|
|
|
|
3704
|
my $body = $capture ? qr{(.*?)}s : qr{.*?}s; |
|
568
|
991
|
|
|
|
|
23577
|
qr{<\?\b$ns\b$body\?>}s; |
|
569
|
|
|
|
|
|
|
} |
|
570
|
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
sub re_entity { |
|
572
|
991
|
|
|
991
|
0
|
2737
|
shift->re_entity_pathexpr(@_); |
|
573
|
|
|
|
|
|
|
} |
|
574
|
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
# normal entity |
|
576
|
|
|
|
|
|
|
sub re_entity_strict { |
|
577
|
3
|
|
|
3
|
0
|
16
|
my ($self, $capture, $ns) = @_; |
|
578
|
3
|
50
|
|
|
|
11
|
$ns = defined $ns ? qq{$ns\:} : qr{\w+:}; |
|
579
|
3
|
|
|
|
|
8
|
my $body = $self->re_nsname; |
|
580
|
3
|
100
|
100
|
|
|
14
|
if (defined $capture and $capture > 1) { |
|
581
|
1
|
|
|
|
|
38
|
qr{&$ns($body);}xs; |
|
582
|
|
|
|
|
|
|
} else { |
|
583
|
2
|
|
|
|
|
32
|
my $re = qr{&$ns$body;}xs; |
|
584
|
2
|
100
|
|
|
|
38
|
$capture ? qr{($re)} : $re; |
|
585
|
|
|
|
|
|
|
} |
|
586
|
|
|
|
|
|
|
} |
|
587
|
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
# extended (subscripted) entity. |
|
589
|
|
|
|
|
|
|
sub re_entity_subscripted { |
|
590
|
6
|
|
|
6
|
0
|
20
|
my ($self, $capture, $ns) = @_; |
|
591
|
6
|
50
|
|
|
|
27
|
$ns = defined $ns ? qq{$ns\:} : qr{\w+:}; |
|
592
|
6
|
|
|
|
|
14
|
my $name = $self->re_nsname; |
|
593
|
6
|
|
|
|
|
14
|
my $sub = $self->re_subscript; |
|
594
|
6
|
|
|
|
|
17
|
my $body = qq{$name$sub*}; |
|
595
|
6
|
100
|
100
|
|
|
29
|
if (defined $capture and $capture > 1) { |
|
596
|
1
|
|
|
|
|
93
|
qr{&($ns)($body);}xs; |
|
597
|
|
|
|
|
|
|
} else { |
|
598
|
5
|
|
|
|
|
118
|
my $re = qr{&$ns$body;}xs; |
|
599
|
5
|
100
|
|
|
|
131
|
$capture ? qr{($re)} : $re; |
|
600
|
|
|
|
|
|
|
} |
|
601
|
|
|
|
|
|
|
} |
|
602
|
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
# This cannot handle matching paren, of course;-). |
|
604
|
|
|
|
|
|
|
sub re_subscript { |
|
605
|
10
|
|
|
10
|
0
|
32
|
my $name = shift->re_nsname; |
|
606
|
10
|
|
|
|
|
134
|
qr{[\[\(\{] |
|
607
|
|
|
|
|
|
|
[\w\.\-\+\$\[\]\{\}]*? |
|
608
|
|
|
|
|
|
|
[\}\)\]] |
|
609
|
|
|
|
|
|
|
|\. $name |
|
610
|
|
|
|
|
|
|
|\: [/\$\.\-\w]+ |
|
611
|
|
|
|
|
|
|
}xs; |
|
612
|
|
|
|
|
|
|
} |
|
613
|
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
# more extended |
|
615
|
|
|
|
|
|
|
sub re_entity_pathexpr { |
|
616
|
994
|
|
|
994
|
0
|
1825
|
my ($self, $capture, $ns) = @_; |
|
617
|
994
|
|
|
|
|
2258
|
$ns = $self->re_prefix(0, $self->entity_ns($ns), ''); |
|
618
|
994
|
|
|
|
|
2977
|
my $body = qr{[\w\$\-\+\*/%<>\.=\@\|!:\[\]\{\}\(,\)]*}; |
|
619
|
994
|
100
|
100
|
|
|
4585
|
if (defined $capture and $capture > 1) { |
|
620
|
492
|
|
|
|
|
17993
|
qr{&($ns\b$body);}xs; |
|
621
|
|
|
|
|
|
|
} else { |
|
622
|
502
|
|
|
|
|
18962
|
my $re = qr{&$ns\b$body;}xs; |
|
623
|
502
|
100
|
|
|
|
2743
|
$capture ? qr{($re)} : $re; |
|
624
|
|
|
|
|
|
|
} |
|
625
|
|
|
|
|
|
|
} |
|
626
|
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
sub entity_ns { |
|
628
|
994
|
|
|
994
|
0
|
1571
|
my ($self, $ns) = @_; |
|
629
|
|
|
|
|
|
|
my $special = $self->{cf_special_entities} |
|
630
|
994
|
100
|
|
|
|
4389
|
or return $ns; |
|
631
|
|
|
|
|
|
|
# XXX: die "entity_ns \$ns ($ns) is not yet implemented" if defined $ns; |
|
632
|
90
|
|
|
|
|
222
|
$self->re_ns(0, undef, $special); |
|
633
|
|
|
|
|
|
|
} |
|
634
|
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
# |
|
636
|
|
|
|
|
|
|
sub re_prefix { |
|
637
|
2976
|
|
|
2976
|
0
|
6993
|
(my MY $self, my ($capture, $ns, $pre, $suf)) = @_; |
|
638
|
2976
|
100
|
|
|
|
8138
|
$ns = $$self{re_ns} unless defined $ns; |
|
639
|
2976
|
100
|
|
|
|
6409
|
$pre = '' unless defined $pre; |
|
640
|
2976
|
50
|
|
|
|
6132
|
$suf = '' unless defined $suf; |
|
641
|
2976
|
100
|
66
|
|
|
12153
|
if (defined $ns and $ns ne '') { |
|
642
|
2974
|
100
|
100
|
|
|
9269
|
$ns = "($ns)" if $capture && $capture > 1; |
|
643
|
2974
|
|
|
|
|
8961
|
qq{$pre$ns$suf}; |
|
644
|
|
|
|
|
|
|
} else { |
|
645
|
2
|
|
|
|
|
5
|
'' |
|
646
|
|
|
|
|
|
|
} |
|
647
|
|
|
|
|
|
|
} |
|
648
|
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
1; |