| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# NOTE: This module originally came from SOAP::Lite, which you probably |
|
2
|
|
|
|
|
|
|
# don't have. It was first repackaged here just to avoid the huge |
|
3
|
|
|
|
|
|
|
# dependancy tree, but this version has several features (CDATA |
|
4
|
|
|
|
|
|
|
# support, better PI and Comment support) that have been added. |
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
# |
|
7
|
|
|
|
|
|
|
# Copyright (C) 2000-2007 Paul Kulchenko (paulclinger@yahoo.com) |
|
8
|
|
|
|
|
|
|
# Copyright (C) 2008 Martin Kutter (martin.kutter@fen-net.de) |
|
9
|
|
|
|
|
|
|
# Copyright (C) 2009-2011 Cal Henderson (cal@iamcal.com) |
|
10
|
|
|
|
|
|
|
# |
|
11
|
|
|
|
|
|
|
# SOAP::Lite is free software; you can redistribute it |
|
12
|
|
|
|
|
|
|
# and/or modify it under the same terms as Perl itself. |
|
13
|
|
|
|
|
|
|
# |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
package XML::Parser::LiteCopy; |
|
16
|
|
|
|
|
|
|
|
|
17
|
9
|
|
|
9
|
|
241732
|
use strict; |
|
|
9
|
|
|
|
|
19
|
|
|
|
9
|
|
|
|
|
354
|
|
|
18
|
9
|
|
|
9
|
|
66
|
use vars qw($VERSION); |
|
|
9
|
|
|
|
|
32
|
|
|
|
9
|
|
|
|
|
1930
|
|
|
19
|
|
|
|
|
|
|
$VERSION = '0.720.00'; |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
my $ReturnErrors = 0; |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
sub new { |
|
24
|
48
|
|
|
48
|
1
|
57505
|
my $class = shift; |
|
25
|
|
|
|
|
|
|
|
|
26
|
48
|
50
|
|
|
|
162
|
return $class if ref $class; |
|
27
|
48
|
|
|
|
|
146
|
my $self = bless {} => $class; |
|
28
|
|
|
|
|
|
|
|
|
29
|
48
|
|
|
|
|
129
|
my %parameters = @_; |
|
30
|
48
|
|
|
|
|
138
|
$self->setHandlers(); # clear first |
|
31
|
48
|
100
|
|
|
|
66
|
$self->setHandlers(%{$parameters{Handlers} || {}}); |
|
|
48
|
|
|
|
|
316
|
|
|
32
|
|
|
|
|
|
|
|
|
33
|
48
|
|
100
|
|
|
247
|
$ReturnErrors = $parameters{ReturnErrors} || 0; |
|
34
|
|
|
|
|
|
|
|
|
35
|
48
|
|
|
|
|
163
|
return $self; |
|
36
|
|
|
|
|
|
|
} |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
sub setHandlers { |
|
39
|
106
|
|
|
106
|
1
|
442
|
my $self = shift; |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# allow symbolic refs, avoid "subroutine redefined" warnings |
|
42
|
9
|
|
|
9
|
|
48
|
no strict 'refs'; local $^W; |
|
|
9
|
|
|
|
|
235
|
|
|
|
9
|
|
|
|
|
1642
|
|
|
|
106
|
|
|
|
|
317
|
|
|
43
|
|
|
|
|
|
|
# clear all handlers if called without parameters |
|
44
|
106
|
100
|
|
|
|
281
|
if (not @_) { |
|
45
|
64
|
|
|
|
|
151
|
for (qw(Start End Char Final Init CData Comment Doctype PI Error)) { |
|
46
|
190
|
|
|
190
|
|
3280
|
*$_ = sub {} |
|
47
|
640
|
|
|
|
|
3092
|
} |
|
48
|
|
|
|
|
|
|
} |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
# we could use each here, too... |
|
51
|
106
|
|
|
|
|
308
|
while (@_) { |
|
52
|
112
|
|
|
|
|
223
|
my($name, $func) = splice(@_, 0, 2); |
|
53
|
|
|
|
|
|
|
*$name = defined $func |
|
54
|
|
|
|
|
|
|
? $func |
|
55
|
0
|
|
|
0
|
|
0
|
: sub {} |
|
56
|
112
|
50
|
|
|
|
527
|
} |
|
57
|
106
|
|
|
|
|
245
|
return $self; |
|
58
|
|
|
|
|
|
|
} |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
sub _regexp { |
|
61
|
9
|
|
50
|
9
|
|
43
|
my $patch = shift || ''; |
|
62
|
9
|
|
|
|
|
19
|
my $package = __PACKAGE__; |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
# This parser is based on "shallow parser" http://www.cs.sfu.ca/~cameron/REX.html |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
# Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions", |
|
67
|
|
|
|
|
|
|
# Technical Report TR 1998-17, School of Computing Science, Simon Fraser University, November, 1998. |
|
68
|
|
|
|
|
|
|
# Copyright (c) 1998, Robert D. Cameron. |
|
69
|
|
|
|
|
|
|
# The following code may be freely used and distributed provided that |
|
70
|
|
|
|
|
|
|
# this copyright and citation notice remains intact and that modifications |
|
71
|
|
|
|
|
|
|
# or additions are clearly identified. |
|
72
|
|
|
|
|
|
|
|
|
73
|
9
|
|
|
9
|
|
112
|
use re 'eval'; |
|
|
9
|
|
|
|
|
16
|
|
|
|
9
|
|
|
|
|
5511
|
|
|
74
|
9
|
|
|
|
|
15
|
my $TextSE = "[^<]+"; |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# the following backrefs have been added: |
|
77
|
|
|
|
|
|
|
# 1 : TextSE |
|
78
|
|
|
|
|
|
|
# 2 : MarkupSPE / DeclCE / CommentCE |
|
79
|
|
|
|
|
|
|
# 3 : MarkupSPE / DeclCE / CDATA_CE |
|
80
|
|
|
|
|
|
|
# 4 : MarkupSPE / DeclCE / DocTypeCE |
|
81
|
|
|
|
|
|
|
# 5 : MarkupSPE / PI_CE |
|
82
|
|
|
|
|
|
|
# 6 : MarkupSPE / EndTagCE |
|
83
|
|
|
|
|
|
|
# 7+: MarkupSPE / ElemTagCE |
|
84
|
|
|
|
|
|
|
|
|
85
|
9
|
|
|
|
|
16
|
my $Until2Hyphens = "(?:[^-]*)-(?:[^-]+-)*-"; |
|
86
|
9
|
|
|
|
|
31
|
my $CommentCE = "($Until2Hyphens)(?{${package}::comment(\$2)})>?"; |
|
87
|
|
|
|
|
|
|
|
|
88
|
9
|
|
|
|
|
20
|
my $UntilRSBs = "[^\\]]*](?:[^\\]]+])*]+"; |
|
89
|
9
|
|
|
|
|
27
|
my $CDATA_CE = "($UntilRSBs(?:[^\\]>]$UntilRSBs)*)(?{${package}::cdata(\$3)})>"; |
|
90
|
|
|
|
|
|
|
|
|
91
|
9
|
|
|
|
|
17
|
my $S = "[ \\n\\t\\r]+"; |
|
92
|
9
|
|
|
|
|
15
|
my $NameStrt = "[A-Za-z_:]|[^\\x00-\\x7F]"; |
|
93
|
9
|
|
|
|
|
14
|
my $NameChar = "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]"; |
|
94
|
9
|
|
|
|
|
31
|
my $Name = "(?:$NameStrt)(?:$NameChar)*"; |
|
95
|
9
|
|
|
|
|
17
|
my $QuoteSE = "\"[^\"]*\"|'[^']*'"; |
|
96
|
9
|
|
|
|
|
33
|
my $DT_IdentSE = "$Name(?:$S(?:$Name|$QuoteSE))*"; |
|
97
|
9
|
|
|
|
|
23
|
my $MarkupDeclCE = "(?:[^\\]\"'><]+|$QuoteSE)*>"; |
|
98
|
9
|
|
|
|
|
12
|
my $S1 = "[\\n\\r\\t ]"; |
|
99
|
9
|
|
|
|
|
16
|
my $UntilQMs = "[^?]*\\?+"; |
|
100
|
|
|
|
|
|
|
|
|
101
|
9
|
|
|
|
|
28
|
my $PI_Tail = "\\?|$S1$UntilQMs(?:[^>?]$UntilQMs)*"; |
|
102
|
9
|
|
|
|
|
39
|
my $DT_ItemSE = "<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail>))|%$Name;|$S"; |
|
103
|
9
|
|
|
|
|
37
|
my $DocTypeCE = "$S($DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?)>(?{${package}::_doctype(\$4)})"; |
|
104
|
|
|
|
|
|
|
|
|
105
|
9
|
|
|
|
|
38
|
my $DeclCE = "--(?:$CommentCE)?|\\[CDATA\\[(?:$CDATA_CE)?|DOCTYPE(?:$DocTypeCE)?"; |
|
106
|
|
|
|
|
|
|
|
|
107
|
9
|
|
|
|
|
74
|
my $PI_CE = "($Name(?:$PI_Tail))>(?{${package}::_pi(\$5); undef})"; |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
# these expressions were modified for backtracking and events |
|
110
|
|
|
|
|
|
|
|
|
111
|
9
|
|
|
|
|
33
|
my $EndTagCE = "($Name)(?{${package}::_end(\$6); undef})(?:$S)?>"; |
|
112
|
9
|
|
|
|
|
20
|
my $AttValSE = "\"([^<\"]*)\"|'([^<']*)'"; |
|
113
|
|
|
|
|
|
|
|
|
114
|
9
|
|
|
|
|
121
|
my $ElemTagCE = "($Name)" |
|
115
|
|
|
|
|
|
|
. "(?:$S($Name)(?:$S)?=(?:$S)?(?:$AttValSE)" |
|
116
|
|
|
|
|
|
|
. "(?{[\@{\$^R||[]},\$8=>defined\$9?\$9:\$10]}))*(?:$S)?(/)?>" |
|
117
|
|
|
|
|
|
|
. "(?{${package}::_start(\$7,\@{\$^R||[]}),\$^R=[]})(?{\$11 and ${package}::_end(\$7); undef})"; |
|
118
|
|
|
|
|
|
|
|
|
119
|
9
|
|
|
|
|
43
|
my $MarkupSPE = "<(?:!(?:$DeclCE)?|\\?(?:$PI_CE)?|/(?:$EndTagCE)?|(?:$ElemTagCE)?)"; |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
# Next expression is under "black magic". |
|
122
|
|
|
|
|
|
|
# Ideally it should be '($TextSE)(?{${package}::char(\$1)})|$MarkupSPE', |
|
123
|
|
|
|
|
|
|
# but it doesn't work under Perl 5.005 and only magic with |
|
124
|
|
|
|
|
|
|
# (?:....)?? solved the problem. |
|
125
|
|
|
|
|
|
|
# I would appreciate if someone let me know what is the right thing to do |
|
126
|
|
|
|
|
|
|
# and what's the reason for all this magic. |
|
127
|
|
|
|
|
|
|
# Seems like a problem related to (?:....)? rather than to ?{} feature. |
|
128
|
|
|
|
|
|
|
# Tests are in t/31-xmlparserlite.t if you decide to play with it. |
|
129
|
|
|
|
|
|
|
#"(?{[]})(?:($TextSE)(?{${package}::_char(\$1)}))$patch|$MarkupSPE"; |
|
130
|
9
|
|
|
|
|
99
|
"(?:($TextSE)(?{${package}::_char(\$1)}))$patch|$MarkupSPE"; |
|
131
|
|
|
|
|
|
|
} |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
setHandlers(); |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
# Try 5.6 and 5.10 regex first |
|
136
|
|
|
|
|
|
|
my $REGEXP = _regexp('??'); |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
sub _parse_re { |
|
139
|
9
|
|
|
9
|
|
59
|
use re "eval"; |
|
|
9
|
|
|
|
|
51
|
|
|
|
9
|
|
|
|
|
1193
|
|
|
140
|
57
|
|
|
57
|
|
104
|
undef $^R; |
|
141
|
57
|
|
|
|
|
10521
|
1 while $_[0] =~ m{$REGEXP}go |
|
142
|
|
|
|
|
|
|
}; |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
# fixup regex if it does not work... |
|
145
|
|
|
|
|
|
|
{ |
|
146
|
|
|
|
|
|
|
if (not eval { _parse_re('bar'); 1; } ) { |
|
147
|
|
|
|
|
|
|
$REGEXP = _regexp(); |
|
148
|
|
|
|
|
|
|
local $^W; |
|
149
|
|
|
|
|
|
|
*_parse_re = sub { |
|
150
|
9
|
|
|
9
|
|
50
|
use re "eval"; |
|
|
9
|
|
|
|
|
18
|
|
|
|
9
|
|
|
|
|
9936
|
|
|
151
|
|
|
|
|
|
|
undef $^R; |
|
152
|
|
|
|
|
|
|
1 while $_[0] =~ m{$REGEXP}go |
|
153
|
|
|
|
|
|
|
}; |
|
154
|
|
|
|
|
|
|
} |
|
155
|
|
|
|
|
|
|
} |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
sub parse { |
|
158
|
48
|
|
|
48
|
1
|
277
|
_init(); |
|
159
|
48
|
|
|
|
|
147
|
_parse_re($_[1]); |
|
160
|
44
|
|
|
|
|
125
|
_final(); |
|
161
|
|
|
|
|
|
|
} |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
my(@stack, $level); |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub _init { |
|
166
|
48
|
|
|
48
|
|
90
|
@stack = (); |
|
167
|
48
|
|
|
|
|
89
|
$level = 0; |
|
168
|
48
|
|
|
|
|
139
|
Init(__PACKAGE__, @_); |
|
169
|
|
|
|
|
|
|
} |
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
sub _final { |
|
172
|
44
|
100
|
|
44
|
|
125
|
return _error("not properly closed tag '$stack[-1]'") if @stack; |
|
173
|
42
|
100
|
|
|
|
126
|
return _error("no element found") unless $level; |
|
174
|
40
|
|
|
|
|
100
|
Final(__PACKAGE__, @_) |
|
175
|
|
|
|
|
|
|
} |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
sub _start { |
|
178
|
80
|
100
|
100
|
80
|
|
343
|
return _error("multiple roots, wrong element '$_[0]'") if $level++ && !@stack; |
|
179
|
78
|
|
|
|
|
194
|
push(@stack, $_[0]); |
|
180
|
78
|
|
|
|
|
194
|
Start(__PACKAGE__, @_); |
|
181
|
|
|
|
|
|
|
} |
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
sub _char { |
|
184
|
65
|
100
|
|
65
|
|
237
|
Char(__PACKAGE__, $_[0]), return if @stack; |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
# check for junk before or after element |
|
187
|
|
|
|
|
|
|
# can't use split or regexp due to limitations in ?{} implementation, |
|
188
|
|
|
|
|
|
|
# will iterate with loop, but we'll do it no more than two times, so |
|
189
|
|
|
|
|
|
|
# it shouldn't affect performance |
|
190
|
16
|
|
|
|
|
101
|
for (my $i=0; $i < length $_[0]; $i++) { |
|
191
|
24
|
100
|
|
|
|
458
|
return _error("junk '$_[0]' @{[$level ? 'after' : 'before']} XML element") |
|
|
4
|
100
|
|
|
|
23
|
|
|
192
|
|
|
|
|
|
|
if index("\n\r\t ", substr($_[0],$i,1)) < 0; # or should '< $[' be there |
|
193
|
|
|
|
|
|
|
} |
|
194
|
|
|
|
|
|
|
} |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
sub _end { |
|
197
|
76
|
100
|
|
76
|
|
225
|
return _error("unexpected closing tag '$_[0]'") if !@stack; |
|
198
|
75
|
100
|
|
|
|
272
|
pop(@stack) eq $_[0] or return _error("mismatched tag '$_[0]'"); |
|
199
|
72
|
|
|
|
|
341
|
End(__PACKAGE__, $_[0]); |
|
200
|
|
|
|
|
|
|
} |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
sub comment { |
|
203
|
9
|
|
|
9
|
0
|
35
|
Comment(__PACKAGE__, substr $_[0], 0, -2); |
|
204
|
|
|
|
|
|
|
} |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
sub end { |
|
207
|
0
|
0
|
|
0
|
0
|
0
|
pop(@stack) eq $_[0] or return _error("mismatched tag '$_[0]'"); |
|
208
|
0
|
|
|
|
|
0
|
End(__PACKAGE__, $_[0]); |
|
209
|
|
|
|
|
|
|
} |
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
sub cdata { |
|
212
|
9
|
50
|
|
9
|
0
|
22
|
return _error("CDATA outside of tag stack") unless @stack; |
|
213
|
9
|
|
|
|
|
38
|
CData(__PACKAGE__, substr $_[0], 0, -2); |
|
214
|
|
|
|
|
|
|
} |
|
215
|
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
sub _doctype { |
|
217
|
1
|
|
|
1
|
|
5
|
Doctype(__PACKAGE__, $_[0]); |
|
218
|
|
|
|
|
|
|
} |
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
sub _pi { |
|
221
|
6
|
|
|
6
|
|
27
|
PI(__PACKAGE__, substr $_[0], 0, -1); |
|
222
|
|
|
|
|
|
|
} |
|
223
|
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
sub _error { |
|
225
|
14
|
100
|
|
14
|
|
48
|
if ($ReturnErrors){ |
|
226
|
8
|
|
|
|
|
23
|
Error(__PACKAGE__, $_[0]); |
|
227
|
8
|
|
|
|
|
225
|
return; |
|
228
|
|
|
|
|
|
|
} |
|
229
|
6
|
|
|
|
|
42
|
die "$_[0]\n"; |
|
230
|
|
|
|
|
|
|
} |
|
231
|
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
# ====================================================================== |
|
233
|
|
|
|
|
|
|
1; |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
__END__ |