line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# NOTE: This module originally came from SOAP::Lite, which you probably |
2
|
|
|
|
|
|
|
# don't have. It was first repackaged here just to avoid the huge |
3
|
|
|
|
|
|
|
# dependancy tree, but this version has several features (CDATA |
4
|
|
|
|
|
|
|
# support, better PI and Comment support) that have been added. |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
# |
7
|
|
|
|
|
|
|
# Copyright (C) 2000-2007 Paul Kulchenko (paulclinger@yahoo.com) |
8
|
|
|
|
|
|
|
# Copyright (C) 2008 Martin Kutter (martin.kutter@fen-net.de) |
9
|
|
|
|
|
|
|
# Copyright (C) 2009-2011 Cal Henderson (cal@iamcal.com) |
10
|
|
|
|
|
|
|
# |
11
|
|
|
|
|
|
|
# SOAP::Lite is free software; you can redistribute it |
12
|
|
|
|
|
|
|
# and/or modify it under the same terms as Perl itself. |
13
|
|
|
|
|
|
|
# |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
package XML::Parser::LiteCopy; |
16
|
|
|
|
|
|
|
|
17
|
9
|
|
|
9
|
|
241732
|
use strict; |
|
9
|
|
|
|
|
19
|
|
|
9
|
|
|
|
|
354
|
|
18
|
9
|
|
|
9
|
|
66
|
use vars qw($VERSION); |
|
9
|
|
|
|
|
32
|
|
|
9
|
|
|
|
|
1930
|
|
19
|
|
|
|
|
|
|
$VERSION = '0.720.00'; |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
my $ReturnErrors = 0; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
sub new { |
24
|
48
|
|
|
48
|
1
|
57505
|
my $class = shift; |
25
|
|
|
|
|
|
|
|
26
|
48
|
50
|
|
|
|
162
|
return $class if ref $class; |
27
|
48
|
|
|
|
|
146
|
my $self = bless {} => $class; |
28
|
|
|
|
|
|
|
|
29
|
48
|
|
|
|
|
129
|
my %parameters = @_; |
30
|
48
|
|
|
|
|
138
|
$self->setHandlers(); # clear first |
31
|
48
|
100
|
|
|
|
66
|
$self->setHandlers(%{$parameters{Handlers} || {}}); |
|
48
|
|
|
|
|
316
|
|
32
|
|
|
|
|
|
|
|
33
|
48
|
|
100
|
|
|
247
|
$ReturnErrors = $parameters{ReturnErrors} || 0; |
34
|
|
|
|
|
|
|
|
35
|
48
|
|
|
|
|
163
|
return $self; |
36
|
|
|
|
|
|
|
} |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
sub setHandlers { |
39
|
106
|
|
|
106
|
1
|
442
|
my $self = shift; |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# allow symbolic refs, avoid "subroutine redefined" warnings |
42
|
9
|
|
|
9
|
|
48
|
no strict 'refs'; local $^W; |
|
9
|
|
|
|
|
235
|
|
|
9
|
|
|
|
|
1642
|
|
|
106
|
|
|
|
|
317
|
|
43
|
|
|
|
|
|
|
# clear all handlers if called without parameters |
44
|
106
|
100
|
|
|
|
281
|
if (not @_) { |
45
|
64
|
|
|
|
|
151
|
for (qw(Start End Char Final Init CData Comment Doctype PI Error)) { |
46
|
190
|
|
|
190
|
|
3280
|
*$_ = sub {} |
47
|
640
|
|
|
|
|
3092
|
} |
48
|
|
|
|
|
|
|
} |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
# we could use each here, too... |
51
|
106
|
|
|
|
|
308
|
while (@_) { |
52
|
112
|
|
|
|
|
223
|
my($name, $func) = splice(@_, 0, 2); |
53
|
|
|
|
|
|
|
*$name = defined $func |
54
|
|
|
|
|
|
|
? $func |
55
|
0
|
|
|
0
|
|
0
|
: sub {} |
56
|
112
|
50
|
|
|
|
527
|
} |
57
|
106
|
|
|
|
|
245
|
return $self; |
58
|
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
sub _regexp { |
61
|
9
|
|
50
|
9
|
|
43
|
my $patch = shift || ''; |
62
|
9
|
|
|
|
|
19
|
my $package = __PACKAGE__; |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
# This parser is based on "shallow parser" http://www.cs.sfu.ca/~cameron/REX.html |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
# Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions", |
67
|
|
|
|
|
|
|
# Technical Report TR 1998-17, School of Computing Science, Simon Fraser University, November, 1998. |
68
|
|
|
|
|
|
|
# Copyright (c) 1998, Robert D. Cameron. |
69
|
|
|
|
|
|
|
# The following code may be freely used and distributed provided that |
70
|
|
|
|
|
|
|
# this copyright and citation notice remains intact and that modifications |
71
|
|
|
|
|
|
|
# or additions are clearly identified. |
72
|
|
|
|
|
|
|
|
73
|
9
|
|
|
9
|
|
112
|
use re 'eval'; |
|
9
|
|
|
|
|
16
|
|
|
9
|
|
|
|
|
5511
|
|
74
|
9
|
|
|
|
|
15
|
my $TextSE = "[^<]+"; |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# the following backrefs have been added: |
77
|
|
|
|
|
|
|
# 1 : TextSE |
78
|
|
|
|
|
|
|
# 2 : MarkupSPE / DeclCE / CommentCE |
79
|
|
|
|
|
|
|
# 3 : MarkupSPE / DeclCE / CDATA_CE |
80
|
|
|
|
|
|
|
# 4 : MarkupSPE / DeclCE / DocTypeCE |
81
|
|
|
|
|
|
|
# 5 : MarkupSPE / PI_CE |
82
|
|
|
|
|
|
|
# 6 : MarkupSPE / EndTagCE |
83
|
|
|
|
|
|
|
# 7+: MarkupSPE / ElemTagCE |
84
|
|
|
|
|
|
|
|
85
|
9
|
|
|
|
|
16
|
my $Until2Hyphens = "(?:[^-]*)-(?:[^-]+-)*-"; |
86
|
9
|
|
|
|
|
31
|
my $CommentCE = "($Until2Hyphens)(?{${package}::comment(\$2)})>?"; |
87
|
|
|
|
|
|
|
|
88
|
9
|
|
|
|
|
20
|
my $UntilRSBs = "[^\\]]*](?:[^\\]]+])*]+"; |
89
|
9
|
|
|
|
|
27
|
my $CDATA_CE = "($UntilRSBs(?:[^\\]>]$UntilRSBs)*)(?{${package}::cdata(\$3)})>"; |
90
|
|
|
|
|
|
|
|
91
|
9
|
|
|
|
|
17
|
my $S = "[ \\n\\t\\r]+"; |
92
|
9
|
|
|
|
|
15
|
my $NameStrt = "[A-Za-z_:]|[^\\x00-\\x7F]"; |
93
|
9
|
|
|
|
|
14
|
my $NameChar = "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]"; |
94
|
9
|
|
|
|
|
31
|
my $Name = "(?:$NameStrt)(?:$NameChar)*"; |
95
|
9
|
|
|
|
|
17
|
my $QuoteSE = "\"[^\"]*\"|'[^']*'"; |
96
|
9
|
|
|
|
|
33
|
my $DT_IdentSE = "$Name(?:$S(?:$Name|$QuoteSE))*"; |
97
|
9
|
|
|
|
|
23
|
my $MarkupDeclCE = "(?:[^\\]\"'><]+|$QuoteSE)*>"; |
98
|
9
|
|
|
|
|
12
|
my $S1 = "[\\n\\r\\t ]"; |
99
|
9
|
|
|
|
|
16
|
my $UntilQMs = "[^?]*\\?+"; |
100
|
|
|
|
|
|
|
|
101
|
9
|
|
|
|
|
28
|
my $PI_Tail = "\\?|$S1$UntilQMs(?:[^>?]$UntilQMs)*"; |
102
|
9
|
|
|
|
|
39
|
my $DT_ItemSE = "<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail>))|%$Name;|$S"; |
103
|
9
|
|
|
|
|
37
|
my $DocTypeCE = "$S($DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?)>(?{${package}::_doctype(\$4)})"; |
104
|
|
|
|
|
|
|
|
105
|
9
|
|
|
|
|
38
|
my $DeclCE = "--(?:$CommentCE)?|\\[CDATA\\[(?:$CDATA_CE)?|DOCTYPE(?:$DocTypeCE)?"; |
106
|
|
|
|
|
|
|
|
107
|
9
|
|
|
|
|
74
|
my $PI_CE = "($Name(?:$PI_Tail))>(?{${package}::_pi(\$5); undef})"; |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
# these expressions were modified for backtracking and events |
110
|
|
|
|
|
|
|
|
111
|
9
|
|
|
|
|
33
|
my $EndTagCE = "($Name)(?{${package}::_end(\$6); undef})(?:$S)?>"; |
112
|
9
|
|
|
|
|
20
|
my $AttValSE = "\"([^<\"]*)\"|'([^<']*)'"; |
113
|
|
|
|
|
|
|
|
114
|
9
|
|
|
|
|
121
|
my $ElemTagCE = "($Name)" |
115
|
|
|
|
|
|
|
. "(?:$S($Name)(?:$S)?=(?:$S)?(?:$AttValSE)" |
116
|
|
|
|
|
|
|
. "(?{[\@{\$^R||[]},\$8=>defined\$9?\$9:\$10]}))*(?:$S)?(/)?>" |
117
|
|
|
|
|
|
|
. "(?{${package}::_start(\$7,\@{\$^R||[]}),\$^R=[]})(?{\$11 and ${package}::_end(\$7); undef})"; |
118
|
|
|
|
|
|
|
|
119
|
9
|
|
|
|
|
43
|
my $MarkupSPE = "<(?:!(?:$DeclCE)?|\\?(?:$PI_CE)?|/(?:$EndTagCE)?|(?:$ElemTagCE)?)"; |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
# Next expression is under "black magic". |
122
|
|
|
|
|
|
|
# Ideally it should be '($TextSE)(?{${package}::char(\$1)})|$MarkupSPE', |
123
|
|
|
|
|
|
|
# but it doesn't work under Perl 5.005 and only magic with |
124
|
|
|
|
|
|
|
# (?:....)?? solved the problem. |
125
|
|
|
|
|
|
|
# I would appreciate if someone let me know what is the right thing to do |
126
|
|
|
|
|
|
|
# and what's the reason for all this magic. |
127
|
|
|
|
|
|
|
# Seems like a problem related to (?:....)? rather than to ?{} feature. |
128
|
|
|
|
|
|
|
# Tests are in t/31-xmlparserlite.t if you decide to play with it. |
129
|
|
|
|
|
|
|
#"(?{[]})(?:($TextSE)(?{${package}::_char(\$1)}))$patch|$MarkupSPE"; |
130
|
9
|
|
|
|
|
99
|
"(?:($TextSE)(?{${package}::_char(\$1)}))$patch|$MarkupSPE"; |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
setHandlers(); |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
# Try 5.6 and 5.10 regex first |
136
|
|
|
|
|
|
|
my $REGEXP = _regexp('??'); |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
sub _parse_re { |
139
|
9
|
|
|
9
|
|
59
|
use re "eval"; |
|
9
|
|
|
|
|
51
|
|
|
9
|
|
|
|
|
1193
|
|
140
|
57
|
|
|
57
|
|
104
|
undef $^R; |
141
|
57
|
|
|
|
|
10521
|
1 while $_[0] =~ m{$REGEXP}go |
142
|
|
|
|
|
|
|
}; |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
# fixup regex if it does not work... |
145
|
|
|
|
|
|
|
{ |
146
|
|
|
|
|
|
|
if (not eval { _parse_re('bar'); 1; } ) { |
147
|
|
|
|
|
|
|
$REGEXP = _regexp(); |
148
|
|
|
|
|
|
|
local $^W; |
149
|
|
|
|
|
|
|
*_parse_re = sub { |
150
|
9
|
|
|
9
|
|
50
|
use re "eval"; |
|
9
|
|
|
|
|
18
|
|
|
9
|
|
|
|
|
9936
|
|
151
|
|
|
|
|
|
|
undef $^R; |
152
|
|
|
|
|
|
|
1 while $_[0] =~ m{$REGEXP}go |
153
|
|
|
|
|
|
|
}; |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
sub parse { |
158
|
48
|
|
|
48
|
1
|
277
|
_init(); |
159
|
48
|
|
|
|
|
147
|
_parse_re($_[1]); |
160
|
44
|
|
|
|
|
125
|
_final(); |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
my(@stack, $level); |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub _init { |
166
|
48
|
|
|
48
|
|
90
|
@stack = (); |
167
|
48
|
|
|
|
|
89
|
$level = 0; |
168
|
48
|
|
|
|
|
139
|
Init(__PACKAGE__, @_); |
169
|
|
|
|
|
|
|
} |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
sub _final { |
172
|
44
|
100
|
|
44
|
|
125
|
return _error("not properly closed tag '$stack[-1]'") if @stack; |
173
|
42
|
100
|
|
|
|
126
|
return _error("no element found") unless $level; |
174
|
40
|
|
|
|
|
100
|
Final(__PACKAGE__, @_) |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
sub _start { |
178
|
80
|
100
|
100
|
80
|
|
343
|
return _error("multiple roots, wrong element '$_[0]'") if $level++ && !@stack; |
179
|
78
|
|
|
|
|
194
|
push(@stack, $_[0]); |
180
|
78
|
|
|
|
|
194
|
Start(__PACKAGE__, @_); |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
sub _char { |
184
|
65
|
100
|
|
65
|
|
237
|
Char(__PACKAGE__, $_[0]), return if @stack; |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
# check for junk before or after element |
187
|
|
|
|
|
|
|
# can't use split or regexp due to limitations in ?{} implementation, |
188
|
|
|
|
|
|
|
# will iterate with loop, but we'll do it no more than two times, so |
189
|
|
|
|
|
|
|
# it shouldn't affect performance |
190
|
16
|
|
|
|
|
101
|
for (my $i=0; $i < length $_[0]; $i++) { |
191
|
24
|
100
|
|
|
|
458
|
return _error("junk '$_[0]' @{[$level ? 'after' : 'before']} XML element") |
|
4
|
100
|
|
|
|
23
|
|
192
|
|
|
|
|
|
|
if index("\n\r\t ", substr($_[0],$i,1)) < 0; # or should '< $[' be there |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
sub _end { |
197
|
76
|
100
|
|
76
|
|
225
|
return _error("unexpected closing tag '$_[0]'") if !@stack; |
198
|
75
|
100
|
|
|
|
272
|
pop(@stack) eq $_[0] or return _error("mismatched tag '$_[0]'"); |
199
|
72
|
|
|
|
|
341
|
End(__PACKAGE__, $_[0]); |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
sub comment { |
203
|
9
|
|
|
9
|
0
|
35
|
Comment(__PACKAGE__, substr $_[0], 0, -2); |
204
|
|
|
|
|
|
|
} |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
sub end { |
207
|
0
|
0
|
|
0
|
0
|
0
|
pop(@stack) eq $_[0] or return _error("mismatched tag '$_[0]'"); |
208
|
0
|
|
|
|
|
0
|
End(__PACKAGE__, $_[0]); |
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
sub cdata { |
212
|
9
|
50
|
|
9
|
0
|
22
|
return _error("CDATA outside of tag stack") unless @stack; |
213
|
9
|
|
|
|
|
38
|
CData(__PACKAGE__, substr $_[0], 0, -2); |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
sub _doctype { |
217
|
1
|
|
|
1
|
|
5
|
Doctype(__PACKAGE__, $_[0]); |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
sub _pi { |
221
|
6
|
|
|
6
|
|
27
|
PI(__PACKAGE__, substr $_[0], 0, -1); |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
sub _error { |
225
|
14
|
100
|
|
14
|
|
48
|
if ($ReturnErrors){ |
226
|
8
|
|
|
|
|
23
|
Error(__PACKAGE__, $_[0]); |
227
|
8
|
|
|
|
|
225
|
return; |
228
|
|
|
|
|
|
|
} |
229
|
6
|
|
|
|
|
42
|
die "$_[0]\n"; |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
# ====================================================================== |
233
|
|
|
|
|
|
|
1; |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
__END__ |