line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# ====================================================================== |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# Copyright (C) 2000-2007 Paul Kulchenko (paulclinger@yahoo.com) |
4
|
|
|
|
|
|
|
# Copyright (C) 2008 Martin Kutter (martin.kutter@fen-net.de) |
5
|
|
|
|
|
|
|
# XML::Parser::Lite is free software; you can redistribute it |
6
|
|
|
|
|
|
|
# and/or modify it under the same terms as Perl itself. |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# ====================================================================== |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
package XML::Parser::Lite; |
11
|
|
|
|
|
|
|
|
12
|
2
|
|
|
2
|
|
1049
|
use 5.006; |
|
2
|
|
|
|
|
6
|
|
13
|
2
|
|
|
2
|
|
8
|
use strict; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
32
|
|
14
|
2
|
|
|
2
|
|
8
|
use warnings; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
220
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
our $VERSION = '0.722'; |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
sub new { |
19
|
6
|
|
|
6
|
1
|
4830
|
my $class = shift; |
20
|
|
|
|
|
|
|
|
21
|
6
|
100
|
|
|
|
20
|
return $class if ref $class; |
22
|
5
|
|
|
|
|
12
|
my $self = bless {} => $class; |
23
|
|
|
|
|
|
|
|
24
|
5
|
|
|
|
|
11
|
my %parameters = @_; |
25
|
5
|
|
|
|
|
13
|
$self->setHandlers(); # clear first |
26
|
5
|
100
|
|
|
|
6
|
$self->setHandlers(%{$parameters{Handlers} || {}}); |
|
5
|
|
|
|
|
30
|
|
27
|
|
|
|
|
|
|
|
28
|
5
|
|
|
|
|
24
|
return $self; |
29
|
|
|
|
|
|
|
} |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub setHandlers { |
32
|
17
|
|
|
17
|
1
|
352
|
my $self = shift; |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
# allow symbolic refs, avoid "subroutine redefined" warnings |
35
|
2
|
|
|
2
|
|
11
|
no strict 'refs'; ## no critic |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
63
|
|
36
|
2
|
|
|
2
|
|
10
|
no warnings qw(redefine); |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
305
|
|
37
|
|
|
|
|
|
|
# clear all handlers if called without parameters |
38
|
17
|
100
|
|
|
|
35
|
if (not @_) { |
39
|
11
|
|
|
|
|
30
|
for (qw(Start End Char Final Init Comment Doctype XMLDecl)) { |
40
|
|
|
|
337
|
|
|
*$_ = sub {} |
41
|
88
|
|
|
|
|
252
|
} |
42
|
|
|
|
|
|
|
} |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
# we could use each here, too... |
45
|
17
|
|
|
|
|
37
|
while (@_) { |
46
|
16
|
|
|
|
|
25
|
my($name, $func) = splice(@_, 0, 2); |
47
|
|
|
|
|
|
|
*$name = defined $func |
48
|
|
|
|
|
|
|
? $func |
49
|
|
|
|
2
|
|
|
: sub {} |
50
|
16
|
100
|
|
|
|
60
|
} |
51
|
17
|
|
|
|
|
34
|
return $self; |
52
|
|
|
|
|
|
|
} |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
sub _regexp { |
55
|
2
|
|
50
|
2
|
|
6
|
my $patch = shift || ''; |
56
|
2
|
|
|
|
|
4
|
my $package = __PACKAGE__; |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
# This parser is based on "shallow parser" http://www.cs.sfu.ca/~cameron/REX.html |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
# Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions", |
61
|
|
|
|
|
|
|
# Technical Report TR 1998-17, School of Computing Science, Simon Fraser University, November, 1998. |
62
|
|
|
|
|
|
|
# Copyright (c) 1998, Robert D. Cameron. |
63
|
|
|
|
|
|
|
# The following code may be freely used and distributed provided that |
64
|
|
|
|
|
|
|
# this copyright and citation notice remains intact and that modifications |
65
|
|
|
|
|
|
|
# or additions are clearly identified. |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
# Modifications may be tracked on XML::Parser::Lite's source code repository at |
68
|
|
|
|
|
|
|
# https://github.com/redhotpenguin/perl-XML-Parser-Lite |
69
|
|
|
|
|
|
|
# |
70
|
2
|
|
|
2
|
|
12
|
use re 'eval'; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
149
|
|
71
|
2
|
|
|
|
|
3
|
my $TextSE = "[^<]+"; |
72
|
2
|
|
|
|
|
3
|
my $UntilHyphen = "[^-]*-"; |
73
|
2
|
|
|
2
|
|
813
|
my $Until2Hyphens = "([^-]*)-(?:[^-]$[^-]*-)*-"; |
|
2
|
|
|
|
|
421
|
|
|
2
|
|
|
|
|
631
|
|
|
2
|
|
|
|
|
15
|
|
74
|
|
|
|
|
|
|
#my $CommentCE = "$Until2Hyphens(?{${package}::comment(\$2)})>?"; |
75
|
2
|
|
|
|
|
5
|
my $CommentCE = "(.+)--(?{${package}::comment(\$2)})>?"; |
76
|
|
|
|
|
|
|
# my $Until2Hyphens = "$UntilHyphen(?:[^-]$UntilHyphen)*-"; |
77
|
|
|
|
|
|
|
# my $CommentCE = "$Until2Hyphens>?"; |
78
|
2
|
|
|
|
|
4
|
my $UntilRSBs = "[^\\]]*](?:[^\\]]+])*]+"; |
79
|
2
|
|
|
|
|
5
|
my $CDATA_CE = "$UntilRSBs(?:[^\\]>]$UntilRSBs)*>"; |
80
|
2
|
|
|
|
|
3
|
my $S = "[ \\n\\t\\r]+"; |
81
|
2
|
|
|
|
|
3
|
my $NameStrt = "[A-Za-z_:]|[^\\x00-\\x7F]"; |
82
|
2
|
|
|
|
|
5
|
my $NameChar = "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]"; |
83
|
2
|
|
|
|
|
4
|
my $Name = "(?:$NameStrt)(?:$NameChar)*"; |
84
|
2
|
|
|
|
|
3
|
my $QuoteSE = "\"[^\"]*\"|'[^']*'"; |
85
|
2
|
|
|
|
|
6
|
my $DT_IdentSE = "$Name(?:$S(?:$Name|$QuoteSE))*"; |
86
|
|
|
|
|
|
|
# my $DT_IdentSE = "$S$Name(?:$S(?:$Name|$QuoteSE))*"; |
87
|
2
|
|
|
|
|
4
|
my $MarkupDeclCE = "(?:[^\\]\"'><]+|$QuoteSE)*>"; |
88
|
2
|
|
|
|
|
3
|
my $S1 = "[\\n\\r\\t ]"; |
89
|
2
|
|
|
|
|
4
|
my $UntilQMs = "[^?]*\\?"; |
90
|
2
|
|
|
|
|
4
|
my $PI_Tail = "\\?>|$S1$UntilQMs(?:[^>?]$UntilQMs)*"; |
91
|
2
|
|
|
|
|
8
|
my $DT_ItemSE = "<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail>))|%$Name;|$S"; |
92
|
2
|
|
|
|
|
7
|
my $DocTypeCE = "$S($DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?)>(?{${package}::_doctype(\$3)})"; |
93
|
|
|
|
|
|
|
# my $PI_Tail = "\\?>|$S1$UntilQMs(?:[^>?]$UntilQMs)*>"; |
94
|
|
|
|
|
|
|
# my $DT_ItemSE = "<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail))|%$Name;|$S"; |
95
|
|
|
|
|
|
|
# my $DocTypeCE = "$DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?>?"; |
96
|
2
|
|
|
|
|
6
|
my $DeclCE = "--(?:$CommentCE)?|\\[CDATA\\[(?:$CDATA_CE)?|DOCTYPE(?:$DocTypeCE)?"; |
97
|
|
|
|
|
|
|
# my $PI_CE = "$Name(?:$PI_Tail)?"; |
98
|
2
|
|
|
|
|
5
|
my $PI_CE = "($Name(?:$PI_Tail))>(?{${package}::_xmldecl(\$5)})"; |
99
|
|
|
|
|
|
|
# these expressions were modified for backtracking and events |
100
|
|
|
|
|
|
|
# my $EndTagCE = "($Name)(?{${package}::_end(\$2)})(?:$S)?>"; |
101
|
2
|
|
|
|
|
6
|
my $EndTagCE = "($Name)(?{${package}::_end(\$6)})(?:$S)?>"; |
102
|
2
|
|
|
|
|
4
|
my $AttValSE = "\"([^<\"]*)\"|'([^<']*)'"; |
103
|
|
|
|
|
|
|
# my $ElemTagCE = "($Name)(?:$S($Name)(?:$S)?=(?:$S)?(?:$AttValSE)(?{[\@{\$^R||[]},\$4=>defined\$5?\$5:\$6]}))*(?:$S)?(/)?>(?{${package}::_start( \$3,\@{\$^R||[]})})(?{\${7} and ${package}::_end(\$3)})"; |
104
|
2
|
|
|
|
|
11
|
my $ElemTagCE = "($Name)" |
105
|
|
|
|
|
|
|
. "(?:$S($Name)(?:$S)?=(?:$S)?(?:$AttValSE)" |
106
|
|
|
|
|
|
|
. "(?{[\@{\$^R||[]},\$8=>defined\$9?\$9:\$10]}))*(?:$S)?(/)?>" |
107
|
|
|
|
|
|
|
. "(?{${package}::_start(\$7,\@{\$^R||[]})})(?{\$11 and ${package}::_end(\$7)})"; |
108
|
|
|
|
|
|
|
|
109
|
2
|
|
|
|
|
6
|
my $MarkupSPE = "<(?:!(?:$DeclCE)?|\\?(?:$PI_CE)?|/(?:$EndTagCE)?|(?:$ElemTagCE)?)"; |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
# Next expression is under "black magic". |
112
|
|
|
|
|
|
|
# Ideally it should be '($TextSE)(?{${package}::char(\$1)})|$MarkupSPE', |
113
|
|
|
|
|
|
|
# but it doesn't work under Perl 5.005 and only magic with |
114
|
|
|
|
|
|
|
# (?:....)?? solved the problem. |
115
|
|
|
|
|
|
|
# I would appreciate if someone let me know what is the right thing to do |
116
|
|
|
|
|
|
|
# and what's the reason for all this magic. |
117
|
|
|
|
|
|
|
# Seems like a problem related to (?:....)? rather than to ?{} feature. |
118
|
|
|
|
|
|
|
# Tests are in t/31-xmlparserlite.t if you decide to play with it. |
119
|
|
|
|
|
|
|
#"(?{[]})(?:($TextSE)(?{${package}::_char(\$1)}))$patch|$MarkupSPE"; |
120
|
2
|
|
|
|
|
10
|
"(?:($TextSE)(?{${package}::_char(\$1)}))$patch|$MarkupSPE"; |
121
|
|
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
setHandlers(); |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
# Try 5.6 and 5.10 regex first |
126
|
|
|
|
|
|
|
my $REGEXP = _regexp('??'); |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
sub _parse_re { |
129
|
2
|
|
|
2
|
|
13
|
use re "eval"; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
85
|
|
130
|
25
|
|
|
25
|
|
37
|
undef $^R; |
131
|
2
|
|
|
2
|
|
11
|
no strict 'refs'; ## no critic |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
179
|
|
132
|
25
|
|
|
|
|
1694
|
1 while $_[0] =~ m{$REGEXP}go |
133
|
|
|
|
|
|
|
}; |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
# fixup regex if it does not work... |
136
|
|
|
|
|
|
|
{ |
137
|
|
|
|
|
|
|
if (not eval { _parse_re('bar'); 1; } ) { |
138
|
|
|
|
|
|
|
$REGEXP = _regexp(); |
139
|
|
|
|
|
|
|
local $^W; |
140
|
|
|
|
|
|
|
*_parse_re = sub { |
141
|
2
|
|
|
2
|
|
12
|
use re "eval"; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
784
|
|
142
|
|
|
|
|
|
|
undef $^R; |
143
|
|
|
|
|
|
|
1 while $_[0] =~ m{$REGEXP}go |
144
|
|
|
|
|
|
|
}; |
145
|
|
|
|
|
|
|
} |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
sub parse { |
149
|
23
|
|
|
23
|
1
|
3474
|
_init(); |
150
|
23
|
|
|
|
|
52
|
_parse_re($_[1]); |
151
|
14
|
|
|
|
|
23
|
_final(); |
152
|
|
|
|
|
|
|
} |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
my(@stack, $level); |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
sub _init { |
157
|
23
|
|
|
23
|
|
36
|
@stack = (); |
158
|
23
|
|
|
|
|
28
|
$level = 0; |
159
|
23
|
|
|
|
|
39
|
Init(__PACKAGE__, @_); |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
sub _final { |
163
|
14
|
100
|
|
14
|
|
42
|
die "not properly closed tag '$stack[-1]'\n" if @stack; |
164
|
11
|
100
|
|
|
|
31
|
die "no element found\n" unless $level; |
165
|
8
|
|
|
|
|
17
|
Final(__PACKAGE__, @_) |
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
sub _start { |
169
|
106
|
100
|
100
|
106
|
|
335
|
die "multiple roots, wrong element '$_[0]'\n" if $level++ && !@stack; |
170
|
104
|
|
|
|
|
189
|
push(@stack, $_[0]); |
171
|
104
|
|
|
|
|
159
|
my $r=Start(__PACKAGE__, @_); |
172
|
104
|
50
|
|
|
|
2151
|
return ref($r) eq 'ARRAY' ? $r : undef; |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
sub _char { |
176
|
170
|
100
|
|
170
|
|
360
|
Char(__PACKAGE__, $_[0]), return if @stack; |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
# check for junk before or after element |
179
|
|
|
|
|
|
|
# can't use split or regexp due to limitations in ?{} implementation, |
180
|
|
|
|
|
|
|
# will iterate with loop, but we'll do it no more than two times, so |
181
|
|
|
|
|
|
|
# it shouldn't affect performance |
182
|
11
|
|
|
|
|
34
|
for (my $i=0; $i < length $_[0]; $i++) { |
183
|
21
|
100
|
|
|
|
190
|
die "junk '$_[0]' @{[$level ? 'after' : 'before']} XML element\n" |
|
4
|
100
|
|
|
|
46
|
|
184
|
|
|
|
|
|
|
if index("\n\r\t ", substr($_[0],$i,1)) < 0; # or should '< $[' be there |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
} |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
sub _end { |
189
|
2
|
|
|
2
|
|
12
|
no warnings qw(uninitialized); |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
503
|
|
190
|
100
|
100
|
|
100
|
|
282
|
pop(@stack) eq $_[0] or die "mismatched tag '$_[0]'\n"; |
191
|
97
|
|
|
|
|
181
|
my $r=End(__PACKAGE__, $_[0]); |
192
|
97
|
50
|
|
|
|
1975
|
return ref($r) eq 'ARRAY' ? $r : undef; |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
sub comment { |
196
|
2
|
|
|
2
|
0
|
5
|
my $r=Comment(__PACKAGE__, $_[0]); |
197
|
2
|
50
|
|
|
|
40
|
return ref($r) eq 'ARRAY' ? $r : undef; |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
sub end { |
201
|
0
|
0
|
|
0
|
0
|
0
|
pop(@stack) eq $_[0] or die "mismatched tag '$_[0]'\n"; |
202
|
0
|
|
|
|
|
0
|
my $r=End(__PACKAGE__, $_[0]); |
203
|
0
|
0
|
|
|
|
0
|
return ref($r) eq 'ARRAY' ? $r : undef; |
204
|
|
|
|
|
|
|
} |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
sub _doctype { |
207
|
1
|
|
|
1
|
|
3
|
my $r=Doctype(__PACKAGE__, $_[0]); |
208
|
1
|
50
|
|
|
|
21
|
return ref($r) eq 'ARRAY' ? $r : undef; |
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
sub _xmldecl { |
212
|
2
|
|
|
2
|
|
4
|
XMLDecl(__PACKAGE__, $_[0]); |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
# ====================================================================== |
218
|
|
|
|
|
|
|
1; |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
__END__ |