line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package simpleXMLParse; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# Perl Module: simpleXMLParse |
4
|
|
|
|
|
|
|
# Author: Daniel Edward Graham |
5
|
|
|
|
|
|
|
# Copyright (c) Daniel Edward Graham 2008-2018 |
6
|
|
|
|
|
|
|
# Date: 01/01/2018 |
7
|
|
|
|
|
|
|
# License: LGPL 3.0 |
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
require Exporter; |
11
|
1
|
|
|
1
|
|
4164
|
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
59
|
|
12
|
1
|
|
|
1
|
|
472
|
use Data::Dumper; |
|
1
|
|
|
|
|
5307
|
|
|
1
|
|
|
|
|
82
|
|
13
|
|
|
|
|
|
|
@ISA = qw(Exporter); |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
# This allows declaration use simpleXMLParse ':all'; |
16
|
|
|
|
|
|
|
# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK |
17
|
|
|
|
|
|
|
# will save memory. |
18
|
|
|
|
|
|
|
%EXPORT_TAGS = ( 'all' => [ qw( |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
) ] ); |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
@EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
@EXPORT = qw( |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
); |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
$VERSION = '3.1'; |
29
|
|
|
|
|
|
|
|
30
|
1
|
|
|
1
|
|
6
|
use Carp; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
40
|
|
31
|
1
|
|
|
1
|
|
4
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
16
|
|
32
|
1
|
|
|
1
|
|
3
|
no warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
2639
|
|
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
#use open ':encoding(utf8)'; |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
my @cdata; |
37
|
|
|
|
|
|
|
my $cdataInd = 0; |
38
|
|
|
|
|
|
|
my $MAXIND = 10000; |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
sub new { |
41
|
0
|
|
|
0
|
0
|
|
my $class = shift; |
42
|
0
|
0
|
|
|
|
|
my %args = (@_ == 1) ? ((ref($_[0]) eq 'HASH') ? %{$_[0]}:(input => $_[0])):@_; |
|
0
|
0
|
|
|
|
|
|
43
|
0
|
|
|
|
|
|
my $altstyle = 0; |
44
|
0
|
|
|
|
|
|
my $fn; |
45
|
0
|
|
|
|
|
|
$fn = $args{"input"}; |
46
|
0
|
0
|
|
|
|
|
$altstyle = 1 if ($args{"style"} eq '2'); |
47
|
0
|
|
|
|
|
|
my $self = {}; |
48
|
0
|
|
|
|
|
|
$self->{"xml"} = undef; |
49
|
0
|
|
|
|
|
|
$self->{"data"} = undef; |
50
|
0
|
0
|
|
|
|
|
open (INFILE1, "$fn") or croak "Unable to process [$fn] $! \n"; |
51
|
0
|
|
|
|
|
|
binmode(INFILE1); |
52
|
0
|
|
|
|
|
|
my ($c1, $c2, $c3); |
53
|
0
|
|
|
|
|
|
read(INFILE1, $c1, 1); |
54
|
0
|
|
|
|
|
|
read(INFILE1, $c2, 1); |
55
|
0
|
|
|
|
|
|
read(INFILE1, $c3, 1); |
56
|
0
|
|
|
|
|
|
close(INFILE1); |
57
|
0
|
0
|
0
|
|
|
|
if (($c1 eq "\xFE" && $c2 eq "\xFF") || ($c1 eq "\xFF" && $c2 eq "\xFE")) { |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
58
|
|
|
|
|
|
|
# UTF-16 |
59
|
0
|
0
|
|
|
|
|
open(INFILE, '<:encoding(UTF-16)', "$fn") or croak "Unable to process [$fn] $!\n"; |
60
|
0
|
|
|
|
|
|
$self->{"xml"} = join '', ; |
61
|
|
|
|
|
|
|
} else { |
62
|
0
|
0
|
0
|
|
|
|
if ($c1 eq "\xEF" && $c2 eq "\xBB" && $c3 eq "\xBF") { |
|
|
|
0
|
|
|
|
|
63
|
|
|
|
|
|
|
# UTF-8 with BOM... |
64
|
0
|
0
|
|
|
|
|
open(INFILE, '<:encoding(UTF-8)', "$fn") or croak "Unable to process [$fn] $!\n"; |
65
|
0
|
|
|
|
|
|
my $str = join '', ; |
66
|
|
|
|
|
|
|
# $str =~ s/^\xEF\xBB\xBF//g; |
67
|
0
|
|
|
|
|
|
$str =~ s/^\x{fffe}//g; |
68
|
0
|
|
|
|
|
|
$str =~ s/^\x{feff}//g; |
69
|
0
|
|
|
|
|
|
$self->{"xml"} = $str; |
70
|
|
|
|
|
|
|
} else { |
71
|
|
|
|
|
|
|
# UTF-8 with NO BOM |
72
|
0
|
0
|
|
|
|
|
open(INFILE, '<:encoding(UTF-8)', "$fn") or croak "Unable to process [$fn] $!\n"; |
73
|
0
|
|
|
|
|
|
$self->{"xml"} = join '', ; |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
} |
76
|
0
|
|
|
|
|
|
close(INFILE); |
77
|
0
|
|
|
|
|
|
$self->{"data"} = _ParseXML( $self->{"xml"}, $altstyle ); |
78
|
0
|
|
|
|
|
|
my $ret = bless $self; |
79
|
0
|
0
|
|
|
|
|
if ($altstyle) { |
80
|
0
|
|
|
|
|
|
$ret->_convertToStyle(); |
81
|
|
|
|
|
|
|
} |
82
|
0
|
|
|
|
|
|
$cdataInd = $cdataInd % $MAXIND; |
83
|
0
|
|
|
|
|
|
return $ret; |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
sub parse { |
87
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
88
|
0
|
|
|
|
|
|
return $self->{data}; |
89
|
|
|
|
|
|
|
} |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
sub _convertToStyle { |
92
|
0
|
|
|
0
|
|
|
my $self = shift; |
93
|
0
|
|
|
|
|
|
my @recursearr = ($self->{"data"}); |
94
|
0
|
|
|
|
|
|
while (@recursearr) { |
95
|
0
|
|
|
|
|
|
my $i = pop @recursearr; |
96
|
0
|
0
|
|
|
|
|
if (ref($i) eq "HASH") { |
97
|
0
|
|
|
|
|
|
foreach my $j (keys %$i) { |
98
|
0
|
0
|
|
|
|
|
if ($j =~ /^(.*?)\_(.*?)\_([0-9]+)\_attr$/) { |
99
|
0
|
|
|
|
|
|
my ($attrnm, $tagnm, $cnt) = ($1, $2, $3); |
100
|
0
|
|
|
|
|
|
$attrnm =~ s/0x0/_/gs; |
101
|
0
|
|
|
|
|
|
$tagnm =~ s/0x0/_/gs; |
102
|
0
|
|
|
|
|
|
my $n = undef; |
103
|
0
|
0
|
|
|
|
|
if (ref($i->{$tagnm}) eq "ARRAY") { |
104
|
0
|
|
|
|
|
|
my $hold; |
105
|
0
|
0
|
|
|
|
|
if (ref($i->{$tagnm}->[$cnt]) eq '') { |
106
|
0
|
|
|
|
|
|
$hold = $i->{$tagnm}->[$cnt]; |
107
|
0
|
|
|
|
|
|
$i->{$tagnm}->[$cnt] = { }; |
108
|
0
|
0
|
|
|
|
|
if ($hold !~ /^\s*$/ ) { |
109
|
0
|
|
|
|
|
|
$i->{$tagnm}->[$cnt]->{content} = $hold; |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
} |
112
|
0
|
|
|
|
|
|
while (defined($i->{$tagnm}->[$cnt]->{$attrnm.$n})) { |
113
|
0
|
|
|
|
|
|
$n++; |
114
|
|
|
|
|
|
|
} |
115
|
0
|
|
|
|
|
|
$i->{$tagnm}->[$cnt]->{$attrnm.$n} = $i->{$j}; |
116
|
|
|
|
|
|
|
} else { |
117
|
0
|
0
|
|
|
|
|
if (ref($i->{$tagnm}) eq "HASH") { |
118
|
0
|
|
|
|
|
|
my $n = undef; |
119
|
0
|
|
|
|
|
|
while (defined($i->{$tagnm}->{$attrnm.$n})) { |
120
|
0
|
|
|
|
|
|
$n++; |
121
|
|
|
|
|
|
|
} |
122
|
0
|
|
|
|
|
|
$i->{$tagnm}->{$attrnm.$n} = $i->{$j}; |
123
|
|
|
|
|
|
|
} else { |
124
|
0
|
|
|
|
|
|
my $hold; |
125
|
0
|
|
|
|
|
|
$hold = $i->{$tagnm}; |
126
|
0
|
|
|
|
|
|
$i->{$tagnm} = { }; |
127
|
0
|
0
|
|
|
|
|
if ($hold !~ /^\s*$/) { |
128
|
0
|
|
|
|
|
|
$i->{$tagnm}->{content} = $hold; |
129
|
|
|
|
|
|
|
} |
130
|
0
|
|
|
|
|
|
$i->{$tagnm}->{$attrnm} = $i->{$j}; |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
} |
133
|
0
|
|
|
|
|
|
delete $i->{$j}; |
134
|
|
|
|
|
|
|
} else { |
135
|
0
|
|
|
|
|
|
push @recursearr, $i->{$j}; |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
} else { |
139
|
0
|
0
|
|
|
|
|
if (ref($i) eq "ARRAY") { |
140
|
0
|
|
|
|
|
|
foreach my $j (@$i) { |
141
|
0
|
|
|
|
|
|
push @recursearr, $j; |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
} |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
sub _cdatasub { |
149
|
0
|
|
|
0
|
|
|
my $cdata = shift; |
150
|
0
|
|
|
|
|
|
my $tmpind = $cdataInd++; |
151
|
0
|
|
|
|
|
|
$cdata[$tmpind] = $cdata; |
152
|
0
|
|
|
|
|
|
return "0x0CDATA0x0".($tmpind)."0x0"; |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
sub _cdatasubout { |
156
|
0
|
|
|
0
|
|
|
my $ind = shift; |
157
|
0
|
|
|
|
|
|
my $cdata = $cdata[$ind]; |
158
|
0
|
|
|
|
|
|
return $cdata; |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
sub _unescp { |
162
|
0
|
|
|
0
|
|
|
my $firsttag = shift; |
163
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\\/\\/gs; |
164
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\*/\*/gs; |
165
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\|/\|/gs; |
166
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\$/\$/gs; |
167
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\?/\?/gs; |
168
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\{/\{/gs; |
169
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\}/\}/gs; |
170
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\(/\(/gs; |
171
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\)/\)/gs; |
172
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\+/\+/gs; |
173
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\[/\[/gs; |
174
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\]/\]/gs; |
175
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\./\./gs; |
176
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\^/\^/gs; |
177
|
0
|
|
|
|
|
|
$firsttag =~ s/\\\-/\-/gs; |
178
|
0
|
|
|
|
|
|
return $firsttag; |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub hconv { |
182
|
0
|
|
|
0
|
0
|
|
my $arg = $_[0]; |
183
|
0
|
|
|
|
|
|
my $p = pack "H*", $arg; |
184
|
0
|
|
|
|
|
|
return $p; |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
sub _entity { |
188
|
0
|
|
|
0
|
|
|
my $text = shift; |
189
|
0
|
|
|
|
|
|
$text =~ s/\<\;/\
|
190
|
0
|
|
|
|
|
|
$text =~ s/\>\;/\>/g; |
191
|
0
|
|
|
|
|
|
$text =~ s/\&\;/\&/g; |
192
|
0
|
|
|
|
|
|
$text =~ s/\&apos\;/\'/g; |
193
|
0
|
|
|
|
|
|
$text =~ s/\"\;/\"/g; |
194
|
0
|
|
|
|
|
|
$text =~ s/\&\#x([0-9a-fA-F]+)\;/&hconv($1)/ge; |
|
0
|
|
|
|
|
|
|
195
|
0
|
|
|
|
|
|
return $text; |
196
|
|
|
|
|
|
|
} |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
sub _ParseXML { |
199
|
0
|
|
|
0
|
|
|
my ($xml, $altstyle) = @_; |
200
|
|
|
|
|
|
|
# $xml =~ s/\n//g; |
201
|
0
|
|
|
|
|
|
$xml =~ s/\<\!\[CDATA\[(.*?)\]\]\>/&_cdatasub($1)/egs; |
|
0
|
|
|
|
|
|
|
202
|
0
|
|
|
|
|
|
$xml =~ s/\<\!\-\-.*?\-\-\>//gs; |
203
|
0
|
|
|
|
|
|
$xml =~ s/\<\?xml.*?\?\>//gs; |
204
|
0
|
|
|
|
|
|
$xml =~ s/\<\?[^\>]*?\?\>//gs; |
205
|
0
|
|
|
|
|
|
$xml =~ s/\<\!\-\-[^\>]*?\-\-\>//gs; |
206
|
0
|
|
|
|
|
|
$xml =~ s/\<\!ELEMENT[^\>]*?\>//gs; |
207
|
0
|
|
|
|
|
|
$xml =~ s/\<\!ENTITY[^\>]*?\>//gs; |
208
|
0
|
|
|
|
|
|
$xml =~ s/\<\!ATTLIST[^\>]*?\>//gs; |
209
|
0
|
|
|
|
|
|
$xml =~ s/\<\!DOCTYPE[^\>]*?\>//gs; |
210
|
0
|
|
|
|
|
|
my $rethash = (); |
211
|
0
|
|
|
|
|
|
my @retarr; |
212
|
0
|
|
|
|
|
|
my $firsttag = $xml; |
213
|
0
|
|
|
|
|
|
my ( $attr, $innerxml, $xmlfragment ); |
214
|
0
|
|
|
|
|
|
$firsttag =~ s/^[\s\n]*\<([^\s\>\n\/]*).*$/$1/gs; |
215
|
0
|
|
|
|
|
|
$firsttag =~ s/\\/\\\\/gs; |
216
|
0
|
|
|
|
|
|
$firsttag =~ s/\*/\\\*/gs; |
217
|
0
|
|
|
|
|
|
$firsttag =~ s/\|/\\\|/gs; |
218
|
0
|
|
|
|
|
|
$firsttag =~ s/\$/\\\$/gs; |
219
|
0
|
|
|
|
|
|
$firsttag =~ s/\?/\\\?/gs; |
220
|
0
|
|
|
|
|
|
$firsttag =~ s/\{/\\\{/gs; |
221
|
0
|
|
|
|
|
|
$firsttag =~ s/\}/\\\}/gs; |
222
|
0
|
|
|
|
|
|
$firsttag =~ s/\(/\\\(/gs; |
223
|
0
|
|
|
|
|
|
$firsttag =~ s/\)/\\\)/gs; |
224
|
0
|
|
|
|
|
|
$firsttag =~ s/\+/\\\+/gs; |
225
|
0
|
|
|
|
|
|
$firsttag =~ s/\[/\\\[/gs; |
226
|
0
|
|
|
|
|
|
$firsttag =~ s/\]/\\\]/gs; |
227
|
0
|
|
|
|
|
|
$firsttag =~ s/\./\\\./gs; |
228
|
0
|
|
|
|
|
|
$firsttag =~ s/\^/\\\^/gs; |
229
|
0
|
|
|
|
|
|
$firsttag =~ s/\-/\\\-/gs; |
230
|
|
|
|
|
|
|
|
231
|
0
|
0
|
|
|
|
|
if ( $xml =~ /^[\s\n]*\<${firsttag}(\>|[\s\n]\>|[\s\n][^\>]*[^\/]\>)(.*?)\<\/${firsttag}[\s\n]*\>(.*)$/s ) |
232
|
|
|
|
|
|
|
{ |
233
|
0
|
|
|
|
|
|
$attr = $1; |
234
|
0
|
|
|
|
|
|
$innerxml = $2; |
235
|
0
|
|
|
|
|
|
$xmlfragment = $3; |
236
|
0
|
|
|
|
|
|
$attr =~ s/\>$//gs; |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
else { |
239
|
0
|
0
|
|
|
|
|
if ( $xml =~ /^[\s\n]*\<${firsttag}(\/\>|[\s\n][^\>]*\/\>)(.*)$/s ) { |
240
|
0
|
|
|
|
|
|
$attr = $1; |
241
|
0
|
|
|
|
|
|
$innerxml = ""; |
242
|
0
|
|
|
|
|
|
$xmlfragment = $2; |
243
|
0
|
|
|
|
|
|
$attr =~ s/\/\>$//gs; |
244
|
|
|
|
|
|
|
} else { |
245
|
0
|
0
|
|
|
|
|
if (!ref($xml)) { |
246
|
0
|
|
|
|
|
|
$xml = _entity($xml); |
247
|
0
|
|
|
|
|
|
$xml =~ s/0x0CDATA0x0(\d+?)0x0/&_cdatasubout($1)/egs; |
|
0
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
} |
249
|
0
|
0
|
|
|
|
|
if ($xml eq '') { |
250
|
0
|
|
|
|
|
|
return {}; |
251
|
|
|
|
|
|
|
} else { |
252
|
0
|
|
|
|
|
|
return $xml; |
253
|
|
|
|
|
|
|
} |
254
|
|
|
|
|
|
|
} |
255
|
|
|
|
|
|
|
} |
256
|
0
|
|
|
|
|
|
my $ixml = $innerxml; |
257
|
0
|
|
|
|
|
|
while ($ixml =~ /^.*?\<${firsttag}(\>|[\s\n]\>|[\s\n][^\>]*[^\/]\>)(.*?)$/s) { |
258
|
0
|
|
|
|
|
|
$ixml = $2; |
259
|
0
|
|
|
|
|
|
$innerxml .= "${firsttag}>"; |
260
|
0
|
0
|
|
|
|
|
if ($xmlfragment =~ /^(.*?)\<\/${firsttag}[\s\n]*\>(.*)$/s) { |
261
|
0
|
|
|
|
|
|
my $ix = $1; |
262
|
0
|
|
|
|
|
|
$innerxml .= $ix; |
263
|
0
|
|
|
|
|
|
$ixml .= $ix; |
264
|
0
|
|
|
|
|
|
$xmlfragment = $2; |
265
|
|
|
|
|
|
|
} else { |
266
|
0
|
|
|
|
|
|
die "Invalid XML innerxml: $innerxml\nixml: $ixml\nxmlfragment: $xmlfragment\n"; |
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
} |
269
|
0
|
|
|
|
|
|
my $nextparse = _ParseXML($innerxml, $altstyle); |
270
|
0
|
|
|
|
|
|
$rethash->{&_unescp($firsttag)} = $nextparse; |
271
|
0
|
|
|
|
|
|
my @attrarr; |
272
|
0
|
|
|
|
|
|
while ( $attr =~ s/^[\s\n]*([^\s\=\n]+)\s*\=\s*(\".*?\"|\'.*?\')(.*)$/$3/gs ) { |
273
|
0
|
|
|
|
|
|
my ($name, $val) = ($1, $2); |
274
|
0
|
|
|
|
|
|
$val =~ s/^\'(.*)\'$/$1/gs; |
275
|
0
|
|
|
|
|
|
$val =~ s/^\"(.*)\"$/$1/gs; |
276
|
0
|
|
|
|
|
|
push @attrarr, $name; |
277
|
0
|
|
|
|
|
|
push @attrarr, _entity($val); |
278
|
|
|
|
|
|
|
} |
279
|
0
|
|
|
|
|
|
my $attrcnt = 0; |
280
|
0
|
|
|
|
|
|
while ( my $val = shift(@attrarr) ) { |
281
|
0
|
|
|
|
|
|
my ($val1, $firsttag1) = ($val, $firsttag); |
282
|
0
|
0
|
|
|
|
|
if ($altstyle) { |
283
|
0
|
|
|
|
|
|
$val1 =~ s/_/0x0/gs; |
284
|
0
|
|
|
|
|
|
$firsttag1 =~ s/_/0x0/gs; |
285
|
|
|
|
|
|
|
} |
286
|
0
|
|
|
|
|
|
$rethash->{ "$val1" . "_".&_unescp(${firsttag1})."_" . $attrcnt . "_attr" } = shift(@attrarr); |
287
|
|
|
|
|
|
|
} |
288
|
0
|
|
|
|
|
|
my $retflag = 0; |
289
|
0
|
|
|
|
|
|
my ( $xmlfragment1, $xmlfragment2 ); |
290
|
0
|
|
|
|
|
|
my %attrhash; |
291
|
0
|
|
|
|
|
|
$attrcnt++; |
292
|
0
|
|
|
|
|
|
while (1) { |
293
|
0
|
0
|
|
|
|
|
if ( $xmlfragment =~ |
294
|
|
|
|
|
|
|
/^(.*?)\<${firsttag}(\>|[\s\n]\>|[\s\n][^\>]*[^\/]\>)(.*?)\<\/${firsttag}[\s\n]*\>(.*)$/s ) |
295
|
|
|
|
|
|
|
{ |
296
|
0
|
0
|
|
|
|
|
if ( !$retflag ) { |
297
|
0
|
|
|
|
|
|
push @retarr, $nextparse; |
298
|
|
|
|
|
|
|
} |
299
|
0
|
|
|
|
|
|
$retflag = 1; |
300
|
0
|
|
|
|
|
|
$xmlfragment1 = $1; |
301
|
0
|
|
|
|
|
|
$attr = $2; |
302
|
0
|
|
|
|
|
|
$innerxml = $3; |
303
|
0
|
|
|
|
|
|
$xmlfragment2 = $4; |
304
|
|
|
|
|
|
|
} else { |
305
|
0
|
0
|
|
|
|
|
if ( $xmlfragment =~ /^(.*?)\<${firsttag}(\/\>|[\s\n][^\>]*\/\>)(.*)$/s ) { |
306
|
0
|
0
|
|
|
|
|
if ( !$retflag ) { |
307
|
0
|
|
|
|
|
|
push @retarr, $nextparse; |
308
|
|
|
|
|
|
|
} |
309
|
0
|
|
|
|
|
|
$retflag = 1; |
310
|
0
|
|
|
|
|
|
$xmlfragment1 = $1; |
311
|
0
|
|
|
|
|
|
$attr = $2; |
312
|
0
|
|
|
|
|
|
$innerxml = ""; |
313
|
0
|
|
|
|
|
|
$xmlfragment2 = $3; |
314
|
|
|
|
|
|
|
} else { |
315
|
0
|
|
|
|
|
|
last; |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
} |
318
|
0
|
|
|
|
|
|
$attr =~ s/\/\>$//gs; |
319
|
0
|
|
|
|
|
|
$attr =~ s/\>$//gs; |
320
|
0
|
|
|
|
|
|
my %opening = ( ); |
321
|
0
|
|
|
|
|
|
my %closing = ( ); |
322
|
0
|
|
|
|
|
|
my $frag = $xmlfragment1; |
323
|
0
|
|
|
|
|
|
while ($frag =~ /^(.*?)\<([^\s\n\/\>]+)(\>|[\s\n]\>|[\s\n][^\>]*[^\/]\>)(.*)$/s) { |
324
|
0
|
|
|
|
|
|
my $tg = $2; |
325
|
0
|
|
|
|
|
|
$frag = $4; |
326
|
0
|
|
|
|
|
|
$opening{$tg}++; |
327
|
|
|
|
|
|
|
} |
328
|
0
|
|
|
|
|
|
my $frag = $xmlfragment1; |
329
|
0
|
|
|
|
|
|
while ($frag =~ /^(.*?)\<\/([^\s\n\>]+)[\s\n]*\>(.*)$/s) { |
330
|
0
|
|
|
|
|
|
my $tg = $2; |
331
|
0
|
|
|
|
|
|
$frag = $3; |
332
|
0
|
|
|
|
|
|
$closing{$tg}++; |
333
|
|
|
|
|
|
|
} |
334
|
0
|
|
|
|
|
|
my $frag = $xmlfragment1; |
335
|
0
|
|
|
|
|
|
while ($frag =~ /^(.*?)\<([^\s\n\/\>]+)[^\>]*?\/\>(.*)$/s) { |
336
|
0
|
|
|
|
|
|
my $tg = $2; |
337
|
0
|
|
|
|
|
|
$frag = $3; |
338
|
0
|
|
|
|
|
|
$opening{$tg}++; |
339
|
0
|
|
|
|
|
|
$closing{$tg}++; |
340
|
|
|
|
|
|
|
} |
341
|
0
|
|
|
|
|
|
my $flag = 0; |
342
|
0
|
|
|
|
|
|
foreach my $k (keys %opening) { |
343
|
0
|
0
|
|
|
|
|
if ($opening{$k} > $closing{$k}) { |
344
|
0
|
|
|
|
|
|
$xmlfragment = $xmlfragment1 . "<${firsttag}0x0 ${attr}>${innerxml}${firsttag}0x0>". $xmlfragment2; |
345
|
0
|
|
|
|
|
|
$flag = 1; |
346
|
0
|
|
|
|
|
|
last; |
347
|
|
|
|
|
|
|
} |
348
|
|
|
|
|
|
|
} |
349
|
0
|
0
|
|
|
|
|
next if ($flag); |
350
|
0
|
|
|
|
|
|
my $ixml = $innerxml; |
351
|
0
|
|
|
|
|
|
while ($ixml =~ /.*?\<${firsttag}(\>|[\s\n]\>|[\s\n][^\>]*[^\/]\>)(.*?)$/s) { |
352
|
0
|
|
|
|
|
|
$ixml = $2; |
353
|
0
|
|
|
|
|
|
$innerxml .= "${firsttag}>"; |
354
|
0
|
0
|
|
|
|
|
if ($xmlfragment2 =~ /(.*?)\<\/${firsttag}[\s\n]*\>(.*)$/s) { |
355
|
0
|
|
|
|
|
|
my $ix = $1; |
356
|
0
|
|
|
|
|
|
$innerxml .= $ix; |
357
|
0
|
|
|
|
|
|
$ixml .= $ix; |
358
|
0
|
|
|
|
|
|
$xmlfragment2 = $2; |
359
|
|
|
|
|
|
|
} else { |
360
|
0
|
|
|
|
|
|
die "Invalid XML"; |
361
|
|
|
|
|
|
|
} |
362
|
|
|
|
|
|
|
} |
363
|
0
|
|
|
|
|
|
$xmlfragment = $xmlfragment1 . $xmlfragment2; |
364
|
0
|
|
|
|
|
|
while ( $attr =~ s/^[\s\n]*([^\s\=\n]+)\s*\=\s*(\".*?\"|\'.*?\')(.*)$/$3/gs ) { |
365
|
0
|
|
|
|
|
|
my ($name, $val) = ($1, $2); |
366
|
0
|
|
|
|
|
|
$val =~ s/^\'(.*)\'$/$1/gs; |
367
|
0
|
|
|
|
|
|
$val =~ s/^\"(.*)\"$/$1/gs; |
368
|
0
|
|
|
|
|
|
push @attrarr, $name; |
369
|
0
|
|
|
|
|
|
push @attrarr, _entity($val); |
370
|
|
|
|
|
|
|
} |
371
|
0
|
|
|
|
|
|
while ( my $val = shift(@attrarr) ) { |
372
|
0
|
|
|
|
|
|
my ($val1, $firsttag1) = ($val, $firsttag); |
373
|
0
|
0
|
|
|
|
|
if ($altstyle) { |
374
|
0
|
|
|
|
|
|
$val1 =~ s/_/0x0/gs; |
375
|
0
|
|
|
|
|
|
$firsttag1 =~ s/_/0x0/gs; |
376
|
|
|
|
|
|
|
} |
377
|
0
|
|
|
|
|
|
$rethash->{ "$val1" . "_".&_unescp(${firsttag1})."_" . $attrcnt . "_attr" } = shift(@attrarr); |
378
|
|
|
|
|
|
|
} |
379
|
0
|
|
|
|
|
|
$attrcnt++; |
380
|
0
|
|
|
|
|
|
$nextparse = _ParseXML($innerxml, $altstyle); |
381
|
0
|
|
|
|
|
|
push @retarr, $nextparse; |
382
|
|
|
|
|
|
|
} |
383
|
0
|
0
|
|
|
|
|
if (@retarr) { |
384
|
0
|
0
|
|
|
|
|
if (@retarr == 1) { |
385
|
0
|
|
|
|
|
|
$rethash->{_unescp($firsttag)} = $retarr[0]; |
386
|
|
|
|
|
|
|
} else { |
387
|
0
|
|
|
|
|
|
$rethash->{_unescp($firsttag)} = \@retarr; |
388
|
|
|
|
|
|
|
} |
389
|
|
|
|
|
|
|
} |
390
|
0
|
|
|
|
|
|
$xmlfragment =~ s/${firsttag}0x0/${firsttag}/gs; |
391
|
0
|
|
|
|
|
|
my $remainderparse = _ParseXML($xmlfragment, $altstyle); |
392
|
0
|
|
|
|
|
|
my $attrcnt; |
393
|
|
|
|
|
|
|
my $attrfrag; |
394
|
0
|
0
|
|
|
|
|
if ( ref($remainderparse) eq "HASH" ) { |
395
|
0
|
|
|
|
|
|
foreach ( keys %{$remainderparse} ) { |
|
0
|
|
|
|
|
|
|
396
|
0
|
|
|
|
|
|
$rethash->{&_unescp($_)} = $remainderparse->{&_unescp($_)}; |
397
|
|
|
|
|
|
|
} |
398
|
|
|
|
|
|
|
} |
399
|
0
|
0
|
|
|
|
|
if ( keys %{$rethash} ) { |
|
0
|
|
|
|
|
|
|
400
|
0
|
|
|
|
|
|
return $rethash; |
401
|
|
|
|
|
|
|
} |
402
|
|
|
|
|
|
|
else { |
403
|
|
|
|
|
|
|
# return undef; |
404
|
0
|
|
|
|
|
|
return {}; |
405
|
|
|
|
|
|
|
} |
406
|
|
|
|
|
|
|
} |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
1; |
409
|
|
|
|
|
|
|
__END__ |