line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package XML::Reader; |
2
|
|
|
|
|
|
|
$XML::Reader::VERSION = '0.67'; |
3
|
1
|
|
|
1
|
|
1015
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
29
|
|
4
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
22
|
|
5
|
1
|
|
|
1
|
|
4
|
use Carp; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
55
|
|
6
|
|
|
|
|
|
|
|
7
|
1
|
|
|
1
|
|
517
|
use Acme::HTTP; |
|
1
|
|
|
|
|
3084
|
|
|
1
|
|
|
|
|
6117
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
set_timeout(10); |
10
|
|
|
|
|
|
|
set_redir_max(5); |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
require Exporter; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
15
|
|
|
|
|
|
|
our %EXPORT_TAGS = ( all => [ qw(slurp_xml) ] ); |
16
|
|
|
|
|
|
|
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); |
17
|
|
|
|
|
|
|
our @EXPORT = qw(); |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
my $use_module; |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
sub import { |
22
|
1
|
|
|
1
|
|
10
|
my $calling_module = shift; |
23
|
|
|
|
|
|
|
|
24
|
1
|
|
|
|
|
2
|
my @plist; |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
my $act_module; |
27
|
|
|
|
|
|
|
|
28
|
1
|
|
|
|
|
3
|
for my $sub (@_) { |
29
|
0
|
0
|
0
|
|
|
0
|
if ($sub eq 'XML::Parser' or $sub eq 'XML::Parsepp') { |
30
|
0
|
0
|
|
|
|
0
|
if (defined $act_module) { |
31
|
0
|
|
|
|
|
0
|
die "Duplicate module ('$act_module' and '$sub')"; |
32
|
|
|
|
|
|
|
} |
33
|
0
|
|
|
|
|
0
|
$act_module = $sub; |
34
|
|
|
|
|
|
|
} |
35
|
|
|
|
|
|
|
else { |
36
|
0
|
|
|
|
|
0
|
push @plist, $sub; |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
|
40
|
1
|
|
|
|
|
4
|
activate($act_module); |
41
|
|
|
|
|
|
|
|
42
|
1
|
|
|
|
|
107
|
XML::Reader->export_to_level(1, $calling_module, @plist); |
43
|
|
|
|
|
|
|
} |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
sub activate { |
46
|
1
|
|
|
1
|
0
|
2
|
my ($mod) = @_; |
47
|
|
|
|
|
|
|
|
48
|
1
|
50
|
|
|
|
5
|
if (defined $mod) { |
49
|
0
|
0
|
|
|
|
0
|
if ($mod eq 'XML::Parser') { |
|
|
0
|
|
|
|
|
|
50
|
0
|
|
|
|
|
0
|
require XML::Parser; |
51
|
|
|
|
|
|
|
} |
52
|
|
|
|
|
|
|
elsif ($mod eq 'XML::Parsepp') { |
53
|
0
|
|
|
|
|
0
|
require XML::Parsepp; |
54
|
|
|
|
|
|
|
} |
55
|
|
|
|
|
|
|
else { |
56
|
0
|
|
|
|
|
0
|
die "Can't identify module = '$mod'"; |
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
else { # No backend provided - try to do the right thing |
60
|
1
|
|
|
|
|
2
|
$mod = 'XML::Parser'; |
61
|
1
|
|
|
|
|
2
|
eval { require XML::Parser; }; |
|
1
|
|
|
|
|
182
|
|
62
|
1
|
50
|
|
|
|
8
|
if ($@) { |
63
|
1
|
|
|
|
|
2
|
$mod = 'XML::Parsepp'; |
64
|
1
|
|
|
|
|
2
|
eval { require XML::Parsepp; }; |
|
1
|
|
|
|
|
669
|
|
65
|
1
|
50
|
|
|
|
10199
|
if ($@) { |
66
|
0
|
|
|
|
|
0
|
die "Error: Either XML::Parser or XML::Parsepp must be installed to run XML::Reader"; |
67
|
|
|
|
|
|
|
} |
68
|
|
|
|
|
|
|
} |
69
|
|
|
|
|
|
|
} |
70
|
|
|
|
|
|
|
|
71
|
1
|
|
|
|
|
4
|
$use_module = $mod; |
72
|
|
|
|
|
|
|
} |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
# deprecated functions (Klaus EICHNER, 28 Apr 2010, ver. 0.35): |
75
|
|
|
|
|
|
|
# only for backward compatibility |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# Klaus EICHNER, 28 Oct 2011, ver 0.39): |
78
|
|
|
|
|
|
|
# remove deprecated functions newhd() and rstem() |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
# sub newhd { new(@_); } # newhd() is now deprecated, use new() instead |
81
|
|
|
|
|
|
|
# sub rstem { path(@_); } # rstem() is now deprecated, use path() instead |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
sub new { |
84
|
0
|
|
|
0
|
0
|
|
my $class = shift; |
85
|
0
|
|
|
|
|
|
my $self = {}; |
86
|
|
|
|
|
|
|
|
87
|
0
|
|
|
|
|
|
my %opt; |
88
|
0
|
0
|
|
|
|
|
%opt = %{$_[1]} if defined $_[1]; |
|
0
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
|
90
|
0
|
0
|
|
|
|
|
if (defined $opt{mode}) { |
91
|
0
|
|
|
|
|
|
my $flt; |
92
|
0
|
0
|
|
|
|
|
if ($opt{mode} eq 'attr-bef-start') { $flt = 2; } # attributes appear on seperate lines * before * . |
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
93
|
0
|
|
|
|
|
|
elsif ($opt{mode} eq 'attr-in-hash') { $flt = 3; } # no seperate lines for attributes, they appear in the hash %{$rdr->att_hash}. |
94
|
0
|
|
|
|
|
|
elsif ($opt{mode} eq 'pyx') { $flt = 4; } # pyx compatible way: delivers attributes, , characters, on individual lines. |
95
|
0
|
|
|
|
|
|
elsif ($opt{mode} eq 'branches') { $flt = 5; } # reads roots and branches: $rdr->rx, $rdr->rvalue and $rdr->rval |
96
|
|
|
|
|
|
|
else { |
97
|
0
|
|
|
|
|
|
croak "Failed assertion #0010 in XML::Reader->new: invalid mode = '$opt{mode}', expected 'attr-bef-start', 'attr-in-hash', 'pyx' or 'branches'"; |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
|
100
|
0
|
0
|
|
|
|
|
if (defined $opt{filter}) { |
101
|
0
|
0
|
|
|
|
|
unless ($opt{filter} eq $flt) { |
102
|
0
|
|
|
|
|
|
croak "Failed assertion #0020 in XML::Reader->new: filter = '$opt{filter}' does not match mode = '$opt{mode}' (which corresponds to filter = '$flt')"; |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
else { |
106
|
0
|
|
|
|
|
|
$opt{filter} = $flt; |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
|
110
|
0
|
0
|
|
|
|
|
$opt{strip} = 1 unless defined $opt{strip}; |
111
|
0
|
0
|
|
|
|
|
$opt{filter} = 2 unless defined $opt{filter}; |
112
|
0
|
0
|
|
|
|
|
$opt{parse_pi} = 0 unless defined $opt{parse_pi}; |
113
|
0
|
0
|
|
|
|
|
$opt{parse_ct} = 0 unless defined $opt{parse_ct}; |
114
|
|
|
|
|
|
|
|
115
|
0
|
0
|
0
|
|
|
|
unless ($opt{filter} == 2 or $opt{filter} == 3 or $opt{filter} == 4 or $opt{filter} == 5) { |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
116
|
0
|
|
|
|
|
|
croak "Failed assertion #0030 in XML::Reader->new: filter is set to '$opt{filter}', but must be 2, 3, 4 or 5"; |
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
|
119
|
0
|
|
|
|
|
|
my @parser_opt; |
120
|
|
|
|
|
|
|
|
121
|
0
|
0
|
0
|
|
|
|
if (defined $opt{dupatt} and $opt{dupatt} ne '') { |
122
|
0
|
0
|
|
|
|
|
unless ($use_module eq 'XML::Parsepp') { |
123
|
0
|
|
|
|
|
|
croak "Failed assertion #0035 in XML::Reader->new: expected use qw(XML::Parsepp), but found use qw($use_module)"; |
124
|
|
|
|
|
|
|
} |
125
|
0
|
|
|
|
|
|
@parser_opt = (dupatt => $opt{dupatt}); |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
|
128
|
0
|
0
|
|
|
|
|
my $XmlParser = $use_module->new(@parser_opt) |
129
|
|
|
|
|
|
|
or croak "Failed assertion #0040 in XML::Reader->new: Can't create $use_module -> new(@parser_opt)"; |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
# The following references to the handler-functions from the XML::Parser/XML::Parsepp object will be |
132
|
|
|
|
|
|
|
# copied into the ExpatNB object during the later call to XML::Parser/XML::Parsepp->parse_start. |
133
|
|
|
|
|
|
|
|
134
|
0
|
|
|
|
|
|
$XmlParser->setHandlers( |
135
|
|
|
|
|
|
|
Start => \&handle_start, |
136
|
|
|
|
|
|
|
End => \&handle_end, |
137
|
|
|
|
|
|
|
Proc => \&handle_procinst, |
138
|
|
|
|
|
|
|
XMLDecl => \&handle_decl, |
139
|
|
|
|
|
|
|
Char => \&handle_char, |
140
|
|
|
|
|
|
|
Comment => \&handle_comment, |
141
|
|
|
|
|
|
|
); |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
# We are trying to open the file (the filename is held in in $_[0]). If the filename |
144
|
|
|
|
|
|
|
# happens to be a reference to a scalar, then it is opened quite naturally as an |
145
|
|
|
|
|
|
|
# 'in-memory-file'. If the open fails, then we return failure from XML::Reader->new |
146
|
|
|
|
|
|
|
# and the calling program has to check $! to handle the failed call. |
147
|
|
|
|
|
|
|
# If, however, the filename is already a filehandle (i.e. ref($_[0]) eq 'GLOB'), then |
148
|
|
|
|
|
|
|
# we use that filehandle directly |
149
|
|
|
|
|
|
|
|
150
|
0
|
|
|
|
|
|
my $fh; |
151
|
|
|
|
|
|
|
|
152
|
0
|
0
|
|
|
|
|
if (ref($_[0]) eq 'GLOB') { |
153
|
0
|
|
|
|
|
|
$fh = $_[0]; |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
else { |
156
|
0
|
0
|
|
|
|
|
if ($_[0] =~ m{\A https?:}xms) { |
157
|
0
|
0
|
|
|
|
|
$fh = Acme::HTTP->new($_[0]) |
158
|
|
|
|
|
|
|
or croak "Failed assertion #0042 in XML::Reader->new: Can't Acme::HTTP->new('$_[0]') because $@"; |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
else { |
161
|
0
|
0
|
|
|
|
|
open $fh, '<', $_[0] or croak "Failed assertion #0045 in XML::Reader->new: Can't open < '$_[0]' because $!"; |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
# Now we bless into XML::Reader, and we bless *before* creating the ExpatNB-object. |
166
|
|
|
|
|
|
|
# Thereby, to avoid a memory leak, we ensure that for each ExpatNB-object we call |
167
|
|
|
|
|
|
|
# XML::Reader->DESTROY when the object goes away. (-- by the way, we create that |
168
|
|
|
|
|
|
|
# ExpatNB-object by calling the XML::Parser/XML::Parsepp->parse_start method --) |
169
|
|
|
|
|
|
|
|
170
|
0
|
|
|
|
|
|
bless $self, $class; |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
# Now we are ready to call XML::Parser/XML::Parsepp->parse_start -- XML::Parser/XML::Parsepp->parse_start() |
173
|
|
|
|
|
|
|
# returns an object of type XML::Parser/XML::Parsepp::ExpatNB. The XML::Parser/XML::Parsepp::ExpatNB object |
174
|
|
|
|
|
|
|
# is where all the heavy lifting happens. |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
# By calling the XML::Parser/XML::Parsepp::Expat->new method (-- XML::Parser::Expat is a super-class |
177
|
|
|
|
|
|
|
# of XML::Parser::ExpatNB --) we will have created a circular reference in |
178
|
|
|
|
|
|
|
# $self->{ExpatNB}{parser}. |
179
|
|
|
|
|
|
|
# |
180
|
|
|
|
|
|
|
# (-- unfortunately, the circular reference does not show up in Data::Dumper, there |
181
|
|
|
|
|
|
|
# is just an integer in $self->{ExpatNB}{parser} that represents a data-structure |
182
|
|
|
|
|
|
|
# within the C-function ParserCreate() --). |
183
|
|
|
|
|
|
|
# |
184
|
|
|
|
|
|
|
# See also the following line of code taken from XML::Parser::Expat->new: |
185
|
|
|
|
|
|
|
# |
186
|
|
|
|
|
|
|
# $args{Parser} = ParserCreate($self, $args{ProtocolEncoding}, $args{Namespaces}); |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
# This means that, in order to avoid a memory leak, we have to break this circular |
189
|
|
|
|
|
|
|
# reference when we are done with the processing. The breaking of the circular reference |
190
|
|
|
|
|
|
|
# will be performed in XML::Reader->DESTROY, which calls XML::Parser::Expat->release. |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
# This is an important moment (-- in terms of memory management, at least --). |
193
|
|
|
|
|
|
|
# XML::Parser/XML::Parsepp->parse_start creates an XML::Parser/XML::Parsepp::ExpatNB-object, which in turn generates |
194
|
|
|
|
|
|
|
# a circular reference (invisible with Data::Dumper). That circular reference will have to |
195
|
|
|
|
|
|
|
# be cleaned up when the XML::Reader-object goes away (see XML::Reader->DESTROY). |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
$self->{ExpatNB} = $XmlParser->parse_start( |
198
|
|
|
|
|
|
|
XR_Data => [], |
199
|
|
|
|
|
|
|
XR_Text => '', |
200
|
|
|
|
|
|
|
XR_Comment => '', |
201
|
|
|
|
|
|
|
XR_fh => $fh, |
202
|
|
|
|
|
|
|
XR_Att => [], |
203
|
|
|
|
|
|
|
XR_ProcInst => [], |
204
|
|
|
|
|
|
|
XR_Decl => {}, |
205
|
|
|
|
|
|
|
XR_Prv_SPECD => '', |
206
|
|
|
|
|
|
|
XR_Emit_attr => ($opt{filter} == 3 ? 0 : 1), |
207
|
|
|
|
|
|
|
XR_Split_up => ($opt{filter} == 4 || $opt{filter} == 5 ? 1 : 0), |
208
|
|
|
|
|
|
|
XR_Strip => $opt{strip}, |
209
|
|
|
|
|
|
|
XR_ParseInst => $opt{parse_pi}, |
210
|
|
|
|
|
|
|
XR_ParseComm => $opt{parse_ct}, |
211
|
0
|
0
|
0
|
|
|
|
) or croak "Failed assertion #0050 in subroutine XML::Reader->new: Can't create $use_module -> parse_start"; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
# for XML::Reader, version 0.21 (12-Sep-2009): |
214
|
|
|
|
|
|
|
# inject an {XR_debug} into $self->{ExpatNB}, if so requested by $opt{debug} |
215
|
|
|
|
|
|
|
|
216
|
0
|
0
|
|
|
|
|
if (exists $opt{debug}) { $self->{ExpatNB}{XR_debug} = $opt{debug}; } |
|
0
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
# The instruction "XR_Data => []" (-- the 'XR_...' prefix stands for 'Xml::Reader...' --) |
219
|
|
|
|
|
|
|
# inside XML::Parser/XML::Parsepp->parse_start() creates an empty array $ExpatNB{XR_Data} = [] |
220
|
|
|
|
|
|
|
# inside the ExpatNB object. This array is the place where the handlers put their data. |
221
|
|
|
|
|
|
|
# |
222
|
|
|
|
|
|
|
# Likewise, the instructions "XR_Text => ''", "XR_Comment => ''", and "XR_fh => $fh" , etc... |
223
|
|
|
|
|
|
|
# create corresponding elements inside the $ExpatNB-object. |
224
|
|
|
|
|
|
|
|
225
|
0
|
0
|
|
|
|
|
$self->{sepchar} = defined $opt{sepchar} ? $opt{sepchar} : ''; |
226
|
0
|
|
|
|
|
|
$self->{filter} = $opt{filter}; |
227
|
0
|
0
|
|
|
|
|
$self->{using} = !defined($opt{using}) ? [] : ref($opt{using}) ? $opt{using} : [$opt{using}]; |
|
|
0
|
|
|
|
|
|
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
# ******************************************************************************************** |
230
|
|
|
|
|
|
|
# The following lines have been disabled by Klaus Eichner, 30 Oct 2009 (for version 0.29) |
231
|
|
|
|
|
|
|
# ******************************************************************************************** |
232
|
|
|
|
|
|
|
# remove all spaces and then all leading and trailing '/', then put back a single leading '/' |
233
|
|
|
|
|
|
|
# for my $check (@{$self->{using}}) { |
234
|
|
|
|
|
|
|
# $check =~ s{\s}''xmsg; |
235
|
|
|
|
|
|
|
# $check =~ s{\A /+}''xms; |
236
|
|
|
|
|
|
|
# $check =~ s{/+ \z}''xms; |
237
|
|
|
|
|
|
|
# $check = '/'.$check; |
238
|
|
|
|
|
|
|
# } |
239
|
|
|
|
|
|
|
# ******************************************************************************************** |
240
|
|
|
|
|
|
|
|
241
|
0
|
|
|
|
|
|
$self->{bush} = []; |
242
|
0
|
|
|
|
|
|
$self->{rlist} = []; |
243
|
|
|
|
|
|
|
|
244
|
0
|
0
|
|
|
|
|
if ($self->{filter} == 5) { |
245
|
0
|
|
|
|
|
|
for my $object (@_[2..$#_]) { |
246
|
0
|
|
|
|
|
|
$object->{brna} = []; |
247
|
|
|
|
|
|
|
|
248
|
0
|
0
|
|
|
|
|
if (ref($object->{branch}) eq 'ARRAY') { |
249
|
0
|
|
|
|
|
|
for my $j (0..$#{$object->{branch}}) { |
|
0
|
|
|
|
|
|
|
250
|
0
|
|
|
|
|
|
$object->{branch}[$j] =~ s{\A ([^/\s])}{/$1}xms; |
251
|
|
|
|
|
|
|
|
252
|
0
|
|
|
|
|
|
$object->{brna}[$j] = []; |
253
|
|
|
|
|
|
|
|
254
|
0
|
|
|
|
|
|
my $b_level = 0; |
255
|
0
|
|
|
|
|
|
my $b_branch = $object->{branch}[$j]; |
256
|
0
|
|
|
|
|
|
$object->{branch}[$j] =~ s{\[ [^/\]]* \]}''xmsg; |
257
|
|
|
|
|
|
|
|
258
|
0
|
|
|
|
|
|
$b_branch =~ s{\A /+}''xms; |
259
|
|
|
|
|
|
|
|
260
|
0
|
|
|
|
|
|
for my $ele (split(m{/}xms, $b_branch)) { |
261
|
0
|
|
|
|
|
|
$b_level++; |
262
|
|
|
|
|
|
|
|
263
|
0
|
0
|
|
|
|
|
if ($ele =~ m{\[ \@ ([^\[\]=\s]+) = ['"] ([^'"]*) ['"] \]}xms) { |
264
|
0
|
|
|
|
|
|
push @{$object->{brna}[$j]}, [ $b_level - 1, $1, $2 ]; |
|
0
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
} |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
} |
269
|
|
|
|
|
|
|
|
270
|
0
|
|
|
|
|
|
$object->{rota} = []; |
271
|
|
|
|
|
|
|
|
272
|
0
|
|
|
|
|
|
my $a_level = 0; |
273
|
0
|
|
|
|
|
|
my $a_root = $object->{root}; |
274
|
0
|
|
|
|
|
|
$object->{root} =~ s{\[ [^/\]]* \]}''xmsg; |
275
|
0
|
|
|
|
|
|
$a_root =~ s{\A /+}''xms; |
276
|
|
|
|
|
|
|
|
277
|
0
|
|
|
|
|
|
for my $ele (split(m{/}xms, $a_root)) { |
278
|
0
|
|
|
|
|
|
$a_level++; |
279
|
|
|
|
|
|
|
|
280
|
0
|
0
|
|
|
|
|
if ($ele =~ m{\[ \@ ([^\[\]=\s]+) = ['"] ([^'"]*) ['"] \]}xms) { |
281
|
0
|
|
|
|
|
|
push @{$object->{rota}}, [ $a_level - 1, $1, $2 ]; |
|
0
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
} |
284
|
|
|
|
|
|
|
|
285
|
0
|
0
|
0
|
|
|
|
if ($object->{root} =~ m{\A // ([^/] .*) \z}xms |
286
|
|
|
|
|
|
|
or $object->{root} =~ m{\A ([^/] .*) \z}xms) { |
287
|
0
|
|
|
|
|
|
my $chunk = $1; |
288
|
0
|
|
|
|
|
|
push @{$self->{rlist}}, { |
289
|
|
|
|
|
|
|
root => undef, |
290
|
|
|
|
|
|
|
qr1 => qr{\A (.*) / \Q$chunk\E \z}xms, |
291
|
|
|
|
|
|
|
rota => $object->{rota}, |
292
|
|
|
|
|
|
|
qrfix => undef, |
293
|
|
|
|
|
|
|
branch => $object->{branch}, |
294
|
|
|
|
|
|
|
brna => $object->{brna}, |
295
|
0
|
|
|
|
|
|
}; |
296
|
|
|
|
|
|
|
} |
297
|
|
|
|
|
|
|
else { |
298
|
0
|
|
|
|
|
|
push @{$self->{rlist}}, { |
299
|
|
|
|
|
|
|
root => $object->{root}, |
300
|
|
|
|
|
|
|
rota => $object->{rota}, |
301
|
|
|
|
|
|
|
qr1 => undef, |
302
|
|
|
|
|
|
|
qrfix => undef, |
303
|
|
|
|
|
|
|
branch => $object->{branch}, |
304
|
|
|
|
|
|
|
brna => $object->{brna}, |
305
|
0
|
|
|
|
|
|
}; |
306
|
|
|
|
|
|
|
} |
307
|
|
|
|
|
|
|
} |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
#~ use Data::Dump; |
310
|
|
|
|
|
|
|
#~ dd \@_; |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
|
313
|
0
|
|
|
|
|
|
$self->{plist} = []; |
314
|
0
|
|
|
|
|
|
$self->{alist} = []; |
315
|
0
|
|
|
|
|
|
$self->{path} = '/'; |
316
|
0
|
|
|
|
|
|
$self->{prefix} = ''; |
317
|
0
|
|
|
|
|
|
$self->{tag} = ''; |
318
|
0
|
|
|
|
|
|
$self->{value} = ''; |
319
|
0
|
|
|
|
|
|
$self->{att_hash} = {}; |
320
|
0
|
|
|
|
|
|
$self->{dec_hash} = {}; |
321
|
0
|
|
|
|
|
|
$self->{comment} = ''; |
322
|
0
|
|
|
|
|
|
$self->{pyx} = ''; |
323
|
0
|
|
|
|
|
|
$self->{rx} = 0; |
324
|
0
|
|
|
|
|
|
$self->{rvalue} = []; |
325
|
0
|
|
|
|
|
|
$self->{rresult} = []; |
326
|
0
|
|
|
|
|
|
$self->{proc} = ''; |
327
|
0
|
|
|
|
|
|
$self->{type} = '?'; |
328
|
0
|
|
|
|
|
|
$self->{is_start} = 0; |
329
|
0
|
|
|
|
|
|
$self->{is_end} = 0; |
330
|
0
|
|
|
|
|
|
$self->{is_decl} = 0; |
331
|
0
|
|
|
|
|
|
$self->{is_proc} = 0; |
332
|
0
|
|
|
|
|
|
$self->{is_comment} = 0; |
333
|
0
|
|
|
|
|
|
$self->{is_text} = 0; |
334
|
0
|
|
|
|
|
|
$self->{is_attr} = 0; |
335
|
0
|
|
|
|
|
|
$self->{is_value} = 0; |
336
|
0
|
|
|
|
|
|
$self->{level} = 0; |
337
|
0
|
|
|
|
|
|
$self->{item} = ''; |
338
|
|
|
|
|
|
|
|
339
|
0
|
|
|
|
|
|
return $self; |
340
|
|
|
|
|
|
|
} |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
# path() and value() are the two main functions: |
343
|
|
|
|
|
|
|
# ********************************************** |
344
|
|
|
|
|
|
|
|
345
|
0
|
|
|
0
|
1
|
|
sub path { $_[0]{path}; } |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
sub value { |
348
|
0
|
0
|
|
0
|
1
|
|
if ($_[0]{filter} == 5) { |
349
|
0
|
0
|
|
|
|
|
ref $_[0]{rvalue} eq 'ARRAY' ? @{$_[0]{rvalue}} : $_[0]{rvalue}; |
|
0
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
} |
351
|
|
|
|
|
|
|
else { |
352
|
0
|
|
|
|
|
|
$_[0]{value}; |
353
|
|
|
|
|
|
|
} |
354
|
|
|
|
|
|
|
} |
355
|
|
|
|
|
|
|
|
356
|
0
|
|
|
0
|
1
|
|
sub tag { $_[0]{tag}; } |
357
|
0
|
|
|
0
|
1
|
|
sub attr { $_[0]{attr}; } |
358
|
0
|
|
|
0
|
1
|
|
sub att_hash { $_[0]{att_hash}; } |
359
|
0
|
|
|
0
|
1
|
|
sub dec_hash { $_[0]{dec_hash}; } |
360
|
0
|
|
|
0
|
1
|
|
sub type { $_[0]{type}; } |
361
|
0
|
|
|
0
|
1
|
|
sub level { $_[0]{level}; } |
362
|
0
|
|
|
0
|
1
|
|
sub prefix { $_[0]{prefix}; } |
363
|
0
|
|
|
0
|
1
|
|
sub comment { $_[0]{comment}; } |
364
|
0
|
|
|
0
|
1
|
|
sub pyx { $_[0]{pyx}; } |
365
|
0
|
|
|
0
|
1
|
|
sub rx { $_[0]{rx}; } |
366
|
0
|
|
|
0
|
1
|
|
sub rvalue { $_[0]{rvalue}; } |
367
|
0
|
|
|
0
|
1
|
|
sub proc_tgt { $_[0]{proc_tgt}; } |
368
|
0
|
|
|
0
|
1
|
|
sub proc_data { $_[0]{proc_data}; } |
369
|
0
|
|
|
0
|
1
|
|
sub is_decl { $_[0]{is_decl}; } |
370
|
0
|
|
|
0
|
1
|
|
sub is_start { $_[0]{is_start}; } |
371
|
0
|
|
|
0
|
1
|
|
sub is_proc { $_[0]{is_proc}; } |
372
|
0
|
|
|
0
|
1
|
|
sub is_comment { $_[0]{is_comment}; } |
373
|
0
|
|
|
0
|
1
|
|
sub is_text { $_[0]{is_text}; } |
374
|
0
|
|
|
0
|
1
|
|
sub is_attr { $_[0]{is_attr}; } |
375
|
0
|
|
|
0
|
1
|
|
sub is_value { $_[0]{is_value}; } |
376
|
0
|
|
|
0
|
1
|
|
sub is_end { $_[0]{is_end}; } |
377
|
|
|
|
|
|
|
|
378
|
0
|
|
|
0
|
0
|
|
sub NB_data { $_[0]{ExpatNB}{XR_Data}; } |
379
|
0
|
|
|
0
|
0
|
|
sub NB_fh { $_[0]{ExpatNB}{XR_fh}; } |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
sub iterate { |
382
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
{ |
385
|
0
|
0
|
|
|
|
|
if ($self->{filter} == 5) { |
|
0
|
|
|
|
|
|
|
386
|
0
|
|
|
|
|
|
my $res = shift @{$self->{rresult}}; |
|
0
|
|
|
|
|
|
|
387
|
0
|
0
|
|
|
|
|
if ($res) { |
388
|
0
|
|
|
|
|
|
$self->{rx} = $res->[0]; |
389
|
0
|
|
|
|
|
|
$self->{rvalue} = $res->[1]; |
390
|
0
|
|
|
|
|
|
return 1; |
391
|
|
|
|
|
|
|
} |
392
|
|
|
|
|
|
|
} |
393
|
|
|
|
|
|
|
|
394
|
0
|
|
|
|
|
|
my $token = $self->get_token; |
395
|
0
|
0
|
|
|
|
|
unless (defined $token) { |
396
|
0
|
|
|
|
|
|
return; |
397
|
|
|
|
|
|
|
} |
398
|
|
|
|
|
|
|
|
399
|
0
|
0
|
|
|
|
|
if ($token->found_start_tag) { |
400
|
0
|
|
|
|
|
|
push @{$self->{plist}}, $token->extract_tag; |
|
0
|
|
|
|
|
|
|
401
|
0
|
|
|
|
|
|
push @{$self->{alist}}, {}; |
|
0
|
|
|
|
|
|
|
402
|
0
|
|
|
|
|
|
redo; |
403
|
|
|
|
|
|
|
} |
404
|
|
|
|
|
|
|
|
405
|
0
|
0
|
|
|
|
|
if ($token->found_end_tag) { |
406
|
0
|
|
|
|
|
|
pop @{$self->{plist}}; |
|
0
|
|
|
|
|
|
|
407
|
0
|
|
|
|
|
|
pop @{$self->{alist}}; |
|
0
|
|
|
|
|
|
|
408
|
0
|
|
|
|
|
|
redo; |
409
|
|
|
|
|
|
|
} |
410
|
|
|
|
|
|
|
|
411
|
0
|
|
|
|
|
|
my $prv_SPECD = $token->extract_prv_SPECD; |
412
|
0
|
|
|
|
|
|
my $nxt_SPECD = $token->extract_nxt_SPECD; |
413
|
|
|
|
|
|
|
|
414
|
0
|
|
|
|
|
|
$self->{rx} = 0; |
415
|
0
|
|
|
|
|
|
$self->{rvalue} = []; |
416
|
|
|
|
|
|
|
|
417
|
0
|
0
|
|
|
|
|
if ($token->found_text) { |
|
|
0
|
|
|
|
|
|
418
|
0
|
|
|
|
|
|
my $text = $token->extract_text; |
419
|
0
|
|
|
|
|
|
my $comment = $token->extract_comment; |
420
|
|
|
|
|
|
|
|
421
|
0
|
|
|
|
|
|
my $proc_tgt = ''; |
422
|
0
|
|
|
|
|
|
my $proc_data = ''; |
423
|
0
|
0
|
|
|
|
|
if (@{$token->extract_proc} == 2) { |
|
0
|
|
|
|
|
|
|
424
|
0
|
|
|
|
|
|
$proc_tgt = ${$token->extract_proc}[0]; |
|
0
|
|
|
|
|
|
|
425
|
0
|
|
|
|
|
|
$proc_data = ${$token->extract_proc}[1]; |
|
0
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
} |
427
|
|
|
|
|
|
|
|
428
|
0
|
0
|
|
|
|
|
$self->{is_decl} = $prv_SPECD eq 'D' ? 1 : 0; |
429
|
0
|
0
|
|
|
|
|
$self->{is_start} = $prv_SPECD eq 'S' ? 1 : 0; |
430
|
0
|
0
|
|
|
|
|
$self->{is_proc} = $prv_SPECD eq 'P' ? 1 : 0; |
431
|
0
|
0
|
|
|
|
|
$self->{is_comment} = $prv_SPECD eq 'C' ? 1 : 0; |
432
|
0
|
0
|
|
|
|
|
$self->{is_end} = $nxt_SPECD eq 'E' ? 1 : 0; |
433
|
|
|
|
|
|
|
|
434
|
0
|
|
|
|
|
|
$self->{is_text} = 1; |
435
|
0
|
|
|
|
|
|
$self->{is_attr} = 0; |
436
|
|
|
|
|
|
|
|
437
|
0
|
|
|
|
|
|
$self->{path} = '/'.join('/', @{$self->{plist}}); |
|
0
|
|
|
|
|
|
|
438
|
0
|
|
|
|
|
|
$self->{attr} = ''; |
439
|
0
|
|
|
|
|
|
$self->{value} = $text; |
440
|
0
|
|
|
|
|
|
$self->{comment} = $comment; |
441
|
0
|
|
|
|
|
|
$self->{proc_tgt} = $proc_tgt; |
442
|
0
|
|
|
|
|
|
$self->{proc_data} = $proc_data; |
443
|
0
|
|
|
|
|
|
$self->{level} = @{$self->{plist}}; |
|
0
|
|
|
|
|
|
|
444
|
0
|
0
|
|
|
|
|
$self->{tag} = @{$self->{plist}} ? ${$self->{plist}}[-1] : ''; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
445
|
0
|
|
|
|
|
|
$self->{type} = 'T'; |
446
|
0
|
|
|
|
|
|
$self->{att_hash} = {@{$token->extract_attr}}; |
|
0
|
|
|
|
|
|
|
447
|
0
|
|
|
|
|
|
$self->{dec_hash} = {@{$token->extract_decl}}; |
|
0
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
|
449
|
0
|
|
|
|
|
|
for (keys %{$self->{att_hash}}) { |
|
0
|
|
|
|
|
|
|
450
|
0
|
|
|
|
|
|
$self->{alist}[-1]{$_} = $self->{att_hash}{$_}; |
451
|
|
|
|
|
|
|
} |
452
|
|
|
|
|
|
|
} |
453
|
|
|
|
|
|
|
elsif ($token->found_attr) { |
454
|
0
|
|
|
|
|
|
my $key = $token->extract_attkey; |
455
|
0
|
|
|
|
|
|
my $val = $token->extract_attval; |
456
|
|
|
|
|
|
|
|
457
|
0
|
|
|
|
|
|
$self->{is_decl} = 0; |
458
|
0
|
|
|
|
|
|
$self->{is_start} = 0; |
459
|
0
|
|
|
|
|
|
$self->{is_proc} = 0; |
460
|
0
|
|
|
|
|
|
$self->{is_comment} = 0; |
461
|
0
|
|
|
|
|
|
$self->{is_end} = 0; |
462
|
|
|
|
|
|
|
|
463
|
0
|
|
|
|
|
|
$self->{is_text} = 0; |
464
|
0
|
|
|
|
|
|
$self->{is_attr} = 1; |
465
|
|
|
|
|
|
|
|
466
|
0
|
|
|
|
|
|
$self->{path} = '/'.join('/', @{$self->{plist}}).'/@'.$key; |
|
0
|
|
|
|
|
|
|
467
|
0
|
|
|
|
|
|
$self->{attr} = $key; |
468
|
0
|
|
|
|
|
|
$self->{value} = $val; |
469
|
0
|
|
|
|
|
|
$self->{comment} = ''; |
470
|
0
|
|
|
|
|
|
$self->{proc_tgt} = ''; |
471
|
0
|
|
|
|
|
|
$self->{proc_data} = ''; |
472
|
0
|
|
|
|
|
|
$self->{level} = @{$self->{plist}} + 1; |
|
0
|
|
|
|
|
|
|
473
|
0
|
|
|
|
|
|
$self->{tag} = '@'.$key; |
474
|
0
|
|
|
|
|
|
$self->{type} = '@'; |
475
|
0
|
|
|
|
|
|
$self->{att_hash} = {}; |
476
|
0
|
|
|
|
|
|
$self->{dec_hash} = {}; |
477
|
|
|
|
|
|
|
|
478
|
0
|
|
|
|
|
|
$self->{alist}[-1]{$key} = $val; |
479
|
|
|
|
|
|
|
} |
480
|
|
|
|
|
|
|
else { |
481
|
0
|
|
|
|
|
|
croak "Failed assertion #0060 in subroutine XML::Reader->iterate: Found data type '".$token->[0]."'"; |
482
|
|
|
|
|
|
|
} |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
# for {filter => 4 or 5} |
485
|
|
|
|
|
|
|
# - promote $self->{type} -- from 'T'/'@' to any of the following codes: 'D', '?', 'S', 'E', '#', 'T', '@' |
486
|
|
|
|
|
|
|
# - update $self->{is_text} |
487
|
|
|
|
|
|
|
# - setup $self->{pyx} |
488
|
|
|
|
|
|
|
|
489
|
0
|
0
|
0
|
|
|
|
if ($self->{filter} == 4 or $self->{filter} == 5) { |
490
|
0
|
0
|
|
|
|
|
if ($self->{type} eq '@') { $self->{pyx} = 'A'.$self->{attr}.' '.$self->{value}; } |
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
491
|
0
|
|
|
|
|
|
elsif ($self->{is_decl}) { my $dc = $self->{dec_hash}; |
492
|
0
|
|
|
|
|
|
$self->{type} = 'D'; $self->{pyx} = '?xml'.join('', map {" $_='$dc->{$_}'"} sort {$b cmp $a} keys %$dc); } |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
493
|
0
|
|
|
|
|
|
elsif ($self->{is_proc}) { $self->{type} = '?'; $self->{pyx} = '?'.$self->{proc_tgt}.' '.$self->{proc_data}; } |
|
0
|
|
|
|
|
|
|
494
|
0
|
|
|
|
|
|
elsif ($self->{is_start}) { $self->{type} = 'S'; $self->{pyx} = '('.$self->{tag}; } |
|
0
|
|
|
|
|
|
|
495
|
0
|
|
|
|
|
|
elsif ($self->{is_end}) { $self->{type} = 'E'; $self->{pyx} = ')'.$self->{tag}; } |
|
0
|
|
|
|
|
|
|
496
|
0
|
|
|
|
|
|
elsif ($self->{is_comment}) { $self->{type} = '#'; $self->{pyx} = '#'.$self->{comment}; } |
|
0
|
|
|
|
|
|
|
497
|
0
|
|
|
|
|
|
else { $self->{type} = 'T'; $self->{pyx} = '-'.$self->{value}; } |
|
0
|
|
|
|
|
|
|
498
|
0
|
|
|
|
|
|
$self->{pyx} =~ s{\\}'\\\\'xmsg; # replace each backslash by a double-backslash |
499
|
0
|
|
|
|
|
|
$self->{pyx} =~ s{\t}'\\t'xmsg; # replace tabs by a literal "\\t" |
500
|
0
|
|
|
|
|
|
$self->{pyx} =~ s{\n}'\\n'xmsg; # replace newlines by a literal "\\n" |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
# update $self->{is_text} |
503
|
0
|
0
|
|
|
|
|
$self->{is_text} = $self->{type} eq 'T' ? 1 : 0; |
504
|
|
|
|
|
|
|
} |
505
|
|
|
|
|
|
|
else { |
506
|
0
|
|
|
|
|
|
$self->{pyx} = undef; |
507
|
|
|
|
|
|
|
} |
508
|
|
|
|
|
|
|
|
509
|
0
|
0
|
0
|
|
|
|
$self->{is_value} = ($self->{is_text} || $self->{is_attr}) ? 1 : 0; |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
# for {filter => 5} check roots |
512
|
0
|
0
|
|
|
|
|
if ($self->{filter} == 5) { |
513
|
0
|
|
|
|
|
|
for my $r (0..$#{$self->{rlist}}) { |
|
0
|
|
|
|
|
|
|
514
|
0
|
|
|
|
|
|
my $param = $self->{rlist}[$r]; |
515
|
|
|
|
|
|
|
|
516
|
0
|
|
|
|
|
|
my $twig; |
517
|
|
|
|
|
|
|
my $border; |
518
|
|
|
|
|
|
|
|
519
|
0
|
|
|
|
|
|
my $root; |
520
|
0
|
|
|
|
|
|
my $rotn = 0; |
521
|
|
|
|
|
|
|
|
522
|
0
|
0
|
|
|
|
|
if (defined $param->{root}) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
523
|
0
|
|
|
|
|
|
$root = $param->{root}; |
524
|
|
|
|
|
|
|
} |
525
|
|
|
|
|
|
|
elsif (defined $param->{qrfix}) { |
526
|
0
|
|
|
|
|
|
$root = $param->{qrfix}; |
527
|
|
|
|
|
|
|
} |
528
|
|
|
|
|
|
|
elsif (defined $param->{qr1}) { |
529
|
0
|
0
|
|
|
|
|
if ($self->{path} =~ $param->{qr1}) { my $prf = $1; |
|
0
|
|
|
|
|
|
|
530
|
0
|
|
|
|
|
|
$rotn = () = $prf =~ m{/}xmsg; |
531
|
0
|
|
|
|
|
|
$root = $self->{path}; |
532
|
0
|
|
|
|
|
|
$param->{qrfix} = $root; |
533
|
|
|
|
|
|
|
} |
534
|
|
|
|
|
|
|
} |
535
|
|
|
|
|
|
|
|
536
|
0
|
0
|
|
|
|
|
if (defined $root) { |
537
|
0
|
0
|
|
|
|
|
if ($root eq '/') { |
538
|
0
|
0
|
|
|
|
|
if (@{$self->{plist}} == 1) { |
|
0
|
0
|
|
|
|
|
|
539
|
0
|
|
|
|
|
|
$twig = $self->{path}; |
540
|
0
|
|
|
|
|
|
$border = 1; |
541
|
|
|
|
|
|
|
} |
542
|
0
|
|
|
|
|
|
elsif (@{$self->{plist}} > 1) { |
543
|
0
|
|
|
|
|
|
$twig = $self->{path}; |
544
|
0
|
|
|
|
|
|
$border = 0; |
545
|
|
|
|
|
|
|
} |
546
|
|
|
|
|
|
|
} |
547
|
|
|
|
|
|
|
else { |
548
|
0
|
0
|
|
|
|
|
if ($self->{path} eq $root) { |
|
|
0
|
|
|
|
|
|
549
|
0
|
|
|
|
|
|
$twig = '/'; |
550
|
0
|
|
|
|
|
|
$border = 1; |
551
|
|
|
|
|
|
|
} |
552
|
|
|
|
|
|
|
elsif (substr($self->{path}, 0, length($root) + 1) eq $root.'/') { |
553
|
0
|
|
|
|
|
|
$twig = substr($self->{path}, length($root)); |
554
|
0
|
|
|
|
|
|
$border = 0; |
555
|
|
|
|
|
|
|
} |
556
|
|
|
|
|
|
|
} |
557
|
|
|
|
|
|
|
} |
558
|
|
|
|
|
|
|
|
559
|
0
|
0
|
|
|
|
|
next unless defined $twig; |
560
|
|
|
|
|
|
|
|
561
|
0
|
|
|
|
|
|
my $block = 0; |
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
#~ if (@{$param->{rota}}) { |
564
|
|
|
|
|
|
|
#~ use Data::Dump; |
565
|
|
|
|
|
|
|
#~ print "\nDeb-0010: param->{rota}:\n"; |
566
|
|
|
|
|
|
|
#~ dd $param->{rota}; |
567
|
|
|
|
|
|
|
#~ print "\nDeb-0020: self->{alist}:\n"; |
568
|
|
|
|
|
|
|
#~ dd $self->{alist}; |
569
|
|
|
|
|
|
|
#~ } |
570
|
|
|
|
|
|
|
|
571
|
0
|
|
|
|
|
|
for (@{$param->{rota}}) { |
|
0
|
|
|
|
|
|
|
572
|
0
|
|
|
|
|
|
my ($offset, $attr, $val) = ($_->[0] + $rotn, $_->[1], $_->[2]); |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
#~ print "Deb-0030: offset = $offset ($_->[0] + $rotn), attr = '$attr', val = '$val'\n"; |
575
|
|
|
|
|
|
|
|
576
|
0
|
|
|
|
|
|
my $e = $self->{alist}[$offset]; |
577
|
|
|
|
|
|
|
|
578
|
0
|
0
|
|
|
|
|
unless ($e) { |
579
|
|
|
|
|
|
|
#~ print "Deb-0100: Block-01\n"; |
580
|
0
|
|
|
|
|
|
$block++; |
581
|
0
|
|
|
|
|
|
next; |
582
|
|
|
|
|
|
|
} |
583
|
|
|
|
|
|
|
|
584
|
0
|
|
|
|
|
|
my $v = $e->{$attr}; |
585
|
|
|
|
|
|
|
|
586
|
0
|
0
|
|
|
|
|
unless (defined $v) { |
587
|
|
|
|
|
|
|
#~ print "Deb-0110: Block-02\n"; |
588
|
0
|
|
|
|
|
|
$block++; |
589
|
0
|
|
|
|
|
|
next; |
590
|
|
|
|
|
|
|
} |
591
|
|
|
|
|
|
|
|
592
|
0
|
0
|
|
|
|
|
unless ($v eq $val) { |
593
|
|
|
|
|
|
|
#~ print "Deb-0120: Block-03\n"; |
594
|
0
|
|
|
|
|
|
$block++; |
595
|
0
|
|
|
|
|
|
next; |
596
|
|
|
|
|
|
|
} |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
#~ print "Deb-0150: Good...\n"; |
599
|
|
|
|
|
|
|
} |
600
|
|
|
|
|
|
|
|
601
|
0
|
0
|
|
|
|
|
next if $block; |
602
|
|
|
|
|
|
|
|
603
|
0
|
|
|
|
|
|
my $bran; |
604
|
|
|
|
|
|
|
|
605
|
0
|
0
|
|
|
|
|
if ($root eq '/') { |
606
|
0
|
|
|
|
|
|
$bran = 0; |
607
|
|
|
|
|
|
|
} |
608
|
|
|
|
|
|
|
else { |
609
|
0
|
|
|
|
|
|
$bran = () = $root =~ m{/}xmsg; |
610
|
|
|
|
|
|
|
} |
611
|
|
|
|
|
|
|
|
612
|
0
|
0
|
|
|
|
|
if (ref $param->{branch}) { # here we have an array of branches... |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
613
|
0
|
0
|
0
|
|
|
|
if ($border and $self->{is_start}) { |
614
|
0
|
|
|
|
|
|
$self->{bush}[$r] = []; |
615
|
|
|
|
|
|
|
} |
616
|
|
|
|
|
|
|
|
617
|
0
|
0
|
|
|
|
|
if ($self->{is_value}) { |
618
|
0
|
|
|
|
|
|
for my $i (0..$#{$param->{branch}}) { |
|
0
|
|
|
|
|
|
|
619
|
0
|
0
|
|
|
|
|
if ($param->{branch}[$i] eq $twig) { |
620
|
|
|
|
|
|
|
|
621
|
0
|
|
|
|
|
|
my $block = 0; |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
#~ if (@{$param->{brna}[$i]}) { |
624
|
|
|
|
|
|
|
#~ use Data::Dump; |
625
|
|
|
|
|
|
|
#~ print "\nDeb-0010: param->{brna}[$i]:\n"; |
626
|
|
|
|
|
|
|
#~ dd $param->{brna}[$i]; |
627
|
|
|
|
|
|
|
#~ print "\nDeb-0020: self->{alist}:\n"; |
628
|
|
|
|
|
|
|
#~ dd $self->{alist}; |
629
|
|
|
|
|
|
|
#~ } |
630
|
|
|
|
|
|
|
|
631
|
0
|
|
|
|
|
|
for (@{$param->{brna}[$i]}) { |
|
0
|
|
|
|
|
|
|
632
|
0
|
|
|
|
|
|
my ($offset, $attr, $val) = ($_->[0] + $bran, $_->[1], $_->[2]); |
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
#~ print "Deb-0030: offset = $offset ($_->[0] + $bran), attr = '$attr', val = '$val'\n"; |
635
|
|
|
|
|
|
|
|
636
|
0
|
|
|
|
|
|
my $e = $self->{alist}[$offset]; |
637
|
|
|
|
|
|
|
|
638
|
0
|
0
|
|
|
|
|
unless ($e) { |
639
|
|
|
|
|
|
|
#~ print "Deb-0100: Block-01\n"; |
640
|
0
|
|
|
|
|
|
$block++; |
641
|
0
|
|
|
|
|
|
next; |
642
|
|
|
|
|
|
|
} |
643
|
|
|
|
|
|
|
|
644
|
0
|
|
|
|
|
|
my $v = $e->{$attr}; |
645
|
|
|
|
|
|
|
|
646
|
0
|
0
|
|
|
|
|
unless (defined $v) { |
647
|
|
|
|
|
|
|
#~ print "Deb-0110: Block-02\n"; |
648
|
0
|
|
|
|
|
|
$block++; |
649
|
0
|
|
|
|
|
|
next; |
650
|
|
|
|
|
|
|
} |
651
|
|
|
|
|
|
|
|
652
|
0
|
0
|
|
|
|
|
unless ($v eq $val) { |
653
|
|
|
|
|
|
|
#~ print "Deb-0120: Block-03\n"; |
654
|
0
|
|
|
|
|
|
$block++; |
655
|
0
|
|
|
|
|
|
next; |
656
|
|
|
|
|
|
|
} |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
#~ print "Deb-0150: Good...\n"; |
659
|
|
|
|
|
|
|
} |
660
|
|
|
|
|
|
|
|
661
|
0
|
0
|
|
|
|
|
unless ($block) { |
662
|
0
|
|
|
|
|
|
my $ref = \$self->{bush}[$r][$i]; |
663
|
0
|
0
|
|
|
|
|
$$ref .= (defined $$ref ? $self->{sepchar} : '').$self->{value}; |
664
|
|
|
|
|
|
|
} |
665
|
|
|
|
|
|
|
} |
666
|
|
|
|
|
|
|
} |
667
|
|
|
|
|
|
|
} |
668
|
|
|
|
|
|
|
} |
669
|
|
|
|
|
|
|
elsif ($param->{branch} eq '+') { # collect PYX array, addition for ver 0.39 (Klaus Eichner, 28th Oct 2011) |
670
|
0
|
0
|
0
|
|
|
|
if ($border and $self->{is_start}) { |
671
|
0
|
|
|
|
|
|
$self->{bush}[$r] = []; |
672
|
|
|
|
|
|
|
} |
673
|
0
|
|
|
|
|
|
push @{$self->{bush}[$r]}, $self->{pyx}; |
|
0
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
} |
675
|
|
|
|
|
|
|
elsif ($param->{branch} eq '*') { # collect pure XML data, addition for ver 0.34 (Klaus Eichner, 26th Apr 2010) |
676
|
0
|
0
|
0
|
|
|
|
if ($border and $self->{is_start}) { |
677
|
0
|
|
|
|
|
|
$self->{bush}[$r] = ''; |
678
|
|
|
|
|
|
|
} |
679
|
|
|
|
|
|
|
|
680
|
0
|
|
|
|
|
|
my $element = ''; |
681
|
0
|
0
|
|
|
|
|
if ($self->{is_decl}) { |
682
|
0
|
|
|
|
|
|
$element .= ''; |
683
|
0
|
|
|
|
|
|
for my $key (sort keys %{$self->{dec_hash}}) { |
|
0
|
|
|
|
|
|
|
684
|
0
|
|
|
|
|
|
my $kval = $self->{dec_hash}{$key}; |
685
|
0
|
|
|
|
|
|
$kval =~ s{&}'&'xmsg; |
686
|
0
|
|
|
|
|
|
$kval =~ s{'}'''xmsg; |
687
|
0
|
|
|
|
|
|
$kval =~ s{<}'<'xmsg; |
688
|
0
|
|
|
|
|
|
$kval =~ s{>}'>'xmsg; |
689
|
0
|
|
|
|
|
|
$element .= qq{ $key='$kval'}; |
690
|
|
|
|
|
|
|
} |
691
|
0
|
|
|
|
|
|
$element .= '?>'; |
692
|
|
|
|
|
|
|
} |
693
|
0
|
0
|
|
|
|
|
if ($self->{is_start}) { |
694
|
0
|
|
|
|
|
|
$element .= '<'.$self->{tag}; |
695
|
0
|
|
|
|
|
|
for my $key (sort keys %{$self->{att_hash}}) { |
|
0
|
|
|
|
|
|
|
696
|
0
|
|
|
|
|
|
my $kval = $self->{att_hash}{$key}; |
697
|
0
|
|
|
|
|
|
$kval =~ s{&}'&'xmsg; |
698
|
0
|
|
|
|
|
|
$kval =~ s{'}'''xmsg; |
699
|
0
|
|
|
|
|
|
$kval =~ s{<}'<'xmsg; |
700
|
0
|
|
|
|
|
|
$kval =~ s{>}'>'xmsg; |
701
|
0
|
|
|
|
|
|
$element .= qq{ $key='$kval'}; |
702
|
|
|
|
|
|
|
} |
703
|
0
|
|
|
|
|
|
$element .= '>'; |
704
|
|
|
|
|
|
|
} |
705
|
0
|
0
|
|
|
|
|
if ($self->{is_proc}) { |
706
|
0
|
|
|
|
|
|
my $tgt = $self->{proc_tgt}; |
707
|
0
|
|
|
|
|
|
my $dat = $self->{proc_data}; |
708
|
0
|
|
|
|
|
|
for ($tgt, $dat) { |
709
|
0
|
|
|
|
|
|
s{&}'&'xmsg; |
710
|
0
|
|
|
|
|
|
s{'}'''xmsg; |
711
|
0
|
|
|
|
|
|
s{<}'<'xmsg; |
712
|
0
|
|
|
|
|
|
s{>}'>'xmsg; |
713
|
|
|
|
|
|
|
} |
714
|
0
|
|
|
|
|
|
$element .= "$tgt $dat?>"; |
715
|
|
|
|
|
|
|
} |
716
|
0
|
0
|
|
|
|
|
if ($self->{is_text}) { |
717
|
0
|
|
|
|
|
|
my $tval = $self->{value}; |
718
|
0
|
0
|
|
|
|
|
if ($tval ne '') { |
719
|
0
|
|
|
|
|
|
$tval =~ s{&}'&'xmsg; |
720
|
0
|
|
|
|
|
|
$tval =~ s{<}'<'xmsg; |
721
|
0
|
|
|
|
|
|
$tval =~ s{>}'>'xmsg; |
722
|
0
|
|
|
|
|
|
$element .= $tval; |
723
|
|
|
|
|
|
|
} |
724
|
|
|
|
|
|
|
} |
725
|
0
|
0
|
|
|
|
|
if ($self->{is_comment}) { |
726
|
0
|
|
|
|
|
|
my $tval = $self->{comment}; |
727
|
0
|
|
|
|
|
|
$tval =~ s{&}'&'xmsg; |
728
|
0
|
|
|
|
|
|
$tval =~ s{<}'<'xmsg; |
729
|
0
|
|
|
|
|
|
$tval =~ s{>}'>'xmsg; |
730
|
0
|
|
|
|
|
|
$element .= ""; |
731
|
|
|
|
|
|
|
} |
732
|
0
|
0
|
|
|
|
|
if ($self->{is_end}) { |
733
|
0
|
|
|
|
|
|
$element .= ''.$self->{tag}.'>'; |
734
|
|
|
|
|
|
|
} |
735
|
|
|
|
|
|
|
|
736
|
0
|
|
|
|
|
|
$self->{bush}[$r] .= $element; |
737
|
|
|
|
|
|
|
} |
738
|
|
|
|
|
|
|
|
739
|
0
|
0
|
0
|
|
|
|
if ($border and $self->{is_end}) { |
740
|
0
|
|
|
|
|
|
push @{$self->{rresult}}, [$r, $self->{bush}[$r]]; |
|
0
|
|
|
|
|
|
|
741
|
0
|
|
|
|
|
|
$param->{qrfix} = undef; |
742
|
|
|
|
|
|
|
} |
743
|
|
|
|
|
|
|
} |
744
|
0
|
|
|
|
|
|
redo; |
745
|
|
|
|
|
|
|
} |
746
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
# Here we check for the {using => ...} option |
748
|
0
|
|
|
|
|
|
$self->{prefix} = ''; |
749
|
|
|
|
|
|
|
|
750
|
0
|
|
|
|
|
|
for my $check (@{$self->{using}}) { |
|
0
|
|
|
|
|
|
|
751
|
0
|
0
|
|
|
|
|
if ($check eq $self->{path}) { |
752
|
0
|
|
|
|
|
|
$self->{prefix} = $check; |
753
|
0
|
|
|
|
|
|
$self->{path} = '/'; |
754
|
0
|
|
|
|
|
|
$self->{level} = 0; |
755
|
0
|
|
|
|
|
|
$self->{tag} = ''; # unfortunately we have to nullify the tag here... |
756
|
0
|
|
|
|
|
|
last; |
757
|
|
|
|
|
|
|
} |
758
|
0
|
0
|
|
|
|
|
if ($check.'/' eq substr($self->{path}, 0, length($check) + 1)) { my @temp = split m{/}xms, $check; |
|
0
|
|
|
|
|
|
|
759
|
0
|
|
|
|
|
|
$self->{prefix} = $check; |
760
|
0
|
|
|
|
|
|
$self->{path} = substr($self->{path}, length($check)); |
761
|
0
|
|
|
|
|
|
$self->{level} -= @temp - 1; |
762
|
0
|
|
|
|
|
|
last; |
763
|
|
|
|
|
|
|
} |
764
|
|
|
|
|
|
|
} |
765
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
# check if option {using => ...} has been requested, and if so, then skip all |
767
|
|
|
|
|
|
|
# lines that don't have a prefix... |
768
|
0
|
0
|
0
|
|
|
|
if (@{$self->{using}} and $self->{prefix} eq '') { |
|
0
|
|
|
|
|
|
|
769
|
0
|
|
|
|
|
|
redo; |
770
|
|
|
|
|
|
|
} |
771
|
|
|
|
|
|
|
} |
772
|
|
|
|
|
|
|
|
773
|
0
|
|
|
|
|
|
return 1; |
774
|
|
|
|
|
|
|
} |
775
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
sub get_token { |
777
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
778
|
|
|
|
|
|
|
|
779
|
0
|
|
|
|
|
|
until (@{$self->NB_data}) { |
|
0
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
# Here is the all important reading of a chunk of XML-data from the filehandle... |
781
|
|
|
|
|
|
|
|
782
|
0
|
|
|
|
|
|
my $buf; |
783
|
|
|
|
|
|
|
|
784
|
0
|
0
|
|
|
|
|
if (ref($self->NB_fh) eq 'Acme::HTTP') { |
785
|
0
|
|
|
|
|
|
my $ct = $self->NB_fh->read_entity_body($buf, 4096); # returns number of bytes read, or undef if IO-Error |
786
|
0
|
0
|
|
|
|
|
last unless $ct; |
787
|
|
|
|
|
|
|
} |
788
|
|
|
|
|
|
|
else { |
789
|
0
|
|
|
|
|
|
read($self->NB_fh, $buf, 4096); |
790
|
|
|
|
|
|
|
} |
791
|
|
|
|
|
|
|
|
792
|
|
|
|
|
|
|
# We leave immediately as soon as there is no more data left (EOF) |
793
|
0
|
0
|
|
|
|
|
last if $buf eq ''; |
794
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
# and here is the all important parsing of that chunk: |
796
|
|
|
|
|
|
|
# and we could get exceptions thrown here if the XML is invalid... |
797
|
|
|
|
|
|
|
|
798
|
0
|
|
|
|
|
|
$self->{ExpatNB}->parse_more($buf); |
799
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
# ...the recommended way to catch those exceptions is not here, but by wrapping |
801
|
|
|
|
|
|
|
# eval{} around $rdr->iterate like follows |
802
|
|
|
|
|
|
|
# |
803
|
|
|
|
|
|
|
# while (eval{$rdr->iterate}) { |
804
|
|
|
|
|
|
|
# my $text = $rdr->value; |
805
|
|
|
|
|
|
|
# # ... |
806
|
|
|
|
|
|
|
# } |
807
|
|
|
|
|
|
|
# if ($@) { |
808
|
|
|
|
|
|
|
# print "found an error: $@\n"; |
809
|
|
|
|
|
|
|
# } |
810
|
|
|
|
|
|
|
} |
811
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
# return failure if end-of-file... |
813
|
0
|
0
|
|
|
|
|
unless (@{$self->NB_data}) { |
|
0
|
|
|
|
|
|
|
814
|
0
|
|
|
|
|
|
return; |
815
|
|
|
|
|
|
|
} |
816
|
|
|
|
|
|
|
|
817
|
0
|
|
|
|
|
|
my $token = shift @{$self->NB_data}; |
|
0
|
|
|
|
|
|
|
818
|
0
|
|
|
|
|
|
bless $token, 'XML::Reader::Token'; |
819
|
|
|
|
|
|
|
} |
820
|
|
|
|
|
|
|
sub handle_decl { |
821
|
0
|
|
|
0
|
0
|
|
my ($ExpatNB, $ver, $encoding, $standalone) = @_; |
822
|
|
|
|
|
|
|
|
823
|
0
|
0
|
|
|
|
|
return unless $ExpatNB->{XR_ParseInst}; |
824
|
|
|
|
|
|
|
|
825
|
0
|
|
|
|
|
|
convert_structure($ExpatNB, 'D'); |
826
|
0
|
0
|
|
|
|
|
$ExpatNB->{XR_Decl} = [(defined $ver ? (version => $ver) : ()), |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
827
|
|
|
|
|
|
|
(defined $encoding ? (encoding => $encoding) : ()), |
828
|
|
|
|
|
|
|
(defined $standalone ? (standalone => ($standalone ? 'yes' : 'no')) : ()), |
829
|
|
|
|
|
|
|
]; |
830
|
|
|
|
|
|
|
} |
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
sub handle_procinst { |
833
|
0
|
|
|
0
|
0
|
|
my ($ExpatNB, $target, $data) = @_; |
834
|
|
|
|
|
|
|
|
835
|
0
|
0
|
|
|
|
|
return unless $ExpatNB->{XR_ParseInst}; |
836
|
|
|
|
|
|
|
|
837
|
0
|
|
|
|
|
|
convert_structure($ExpatNB, 'P'); |
838
|
0
|
|
|
|
|
|
$ExpatNB->{XR_ProcInst} = [$target, $data]; |
839
|
|
|
|
|
|
|
} |
840
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
sub handle_comment { |
842
|
0
|
|
|
0
|
0
|
|
my ($ExpatNB, $comment) = @_; |
843
|
|
|
|
|
|
|
|
844
|
0
|
0
|
|
|
|
|
return unless $ExpatNB->{XR_ParseComm}; |
845
|
|
|
|
|
|
|
|
846
|
0
|
|
|
|
|
|
convert_structure($ExpatNB, 'C'); |
847
|
0
|
|
|
|
|
|
$ExpatNB->{XR_Comment} = $comment; |
848
|
|
|
|
|
|
|
} |
849
|
|
|
|
|
|
|
|
850
|
|
|
|
|
|
|
sub handle_start { |
851
|
0
|
|
|
0
|
0
|
|
my ($ExpatNB, $element, @attr) = @_; |
852
|
|
|
|
|
|
|
|
853
|
0
|
|
|
|
|
|
convert_structure($ExpatNB, 'S'); |
854
|
0
|
|
|
|
|
|
$ExpatNB->{XR_Att} = \@attr; |
855
|
0
|
|
|
|
|
|
push @{$ExpatNB->{XR_Data}}, ['<', $element]; |
|
0
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
} |
857
|
|
|
|
|
|
|
|
858
|
|
|
|
|
|
|
sub handle_end { |
859
|
0
|
|
|
0
|
0
|
|
my ($ExpatNB, $element) = @_; |
860
|
|
|
|
|
|
|
|
861
|
0
|
|
|
|
|
|
convert_structure($ExpatNB, 'E'); |
862
|
0
|
|
|
|
|
|
push @{$ExpatNB->{XR_Data}}, ['>', $element]; |
|
0
|
|
|
|
|
|
|
863
|
|
|
|
|
|
|
} |
864
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
sub handle_char { |
866
|
0
|
|
|
0
|
0
|
|
my ($ExpatNB, $text) = @_; |
867
|
|
|
|
|
|
|
|
868
|
0
|
|
|
|
|
|
$ExpatNB->{XR_Text} .= $text; |
869
|
|
|
|
|
|
|
} |
870
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
sub convert_structure { |
872
|
0
|
|
|
0
|
0
|
|
my ($ExpatNB, $Param_SPECD) = @_; # $Param_SPECD can be either 'S', 'P', 'E', 'C' or 'D' (or even '*') |
873
|
|
|
|
|
|
|
|
874
|
|
|
|
|
|
|
# These are the text and comment that may be stripped |
875
|
0
|
|
|
|
|
|
my $text = $ExpatNB->{XR_Text}; |
876
|
0
|
|
|
|
|
|
my $comment = $ExpatNB->{XR_Comment}; |
877
|
|
|
|
|
|
|
|
878
|
|
|
|
|
|
|
# strip spaces if requested... |
879
|
0
|
0
|
|
|
|
|
if ($ExpatNB->{XR_Strip}) { |
880
|
0
|
|
|
|
|
|
for my $item ($text, $comment) { |
881
|
0
|
|
|
|
|
|
$item =~ s{\A \s+}''xms; |
882
|
0
|
|
|
|
|
|
$item =~ s{\s+ \z}''xms; |
883
|
0
|
|
|
|
|
|
$item =~ s{\s+}' 'xmsg; |
884
|
|
|
|
|
|
|
} |
885
|
|
|
|
|
|
|
} |
886
|
|
|
|
|
|
|
|
887
|
|
|
|
|
|
|
# Don't do anything for the first tag... |
888
|
0
|
0
|
|
|
|
|
unless ($ExpatNB->{XR_Prv_SPECD} eq '') { |
889
|
|
|
|
|
|
|
# Here we save the previous 'SPECD' and the current (i.e. next) 'SPECD' into lexicals |
890
|
|
|
|
|
|
|
# so that we can manipulate them |
891
|
0
|
|
|
|
|
|
my $prev_SPECD = $ExpatNB->{XR_Prv_SPECD}; |
892
|
0
|
|
|
|
|
|
my $next_SPECD = $Param_SPECD; |
893
|
|
|
|
|
|
|
|
894
|
|
|
|
|
|
|
# Do we want , , and pi ?> split up into separate lines ? |
895
|
0
|
0
|
|
|
|
|
if ($ExpatNB->{XR_Split_up}) { |
896
|
0
|
0
|
|
|
|
|
if ($prev_SPECD ne 'E') { |
897
|
|
|
|
|
|
|
# emit the opening tag with empty text |
898
|
0
|
|
|
|
|
|
push @{$ExpatNB->{XR_Data}}, |
899
|
0
|
|
|
|
|
|
['T', '', $comment, $prev_SPECD, '*', $ExpatNB->{XR_Att}, $ExpatNB->{XR_ProcInst}, $ExpatNB->{XR_Decl}]; |
900
|
|
|
|
|
|
|
} |
901
|
|
|
|
|
|
|
|
902
|
0
|
0
|
|
|
|
|
if ($ExpatNB->{XR_Emit_attr}) { |
903
|
|
|
|
|
|
|
# Here we emit attributes on their proper lines -- *after* the start-line (see above) ... |
904
|
0
|
|
|
|
|
|
my %at = @{$ExpatNB->{XR_Att}}; |
|
0
|
|
|
|
|
|
|
905
|
0
|
|
|
|
|
|
for my $key (sort keys %at) { |
906
|
0
|
|
|
|
|
|
push @{$ExpatNB->{XR_Data}}, ['A', $key, $at{$key}]; |
|
0
|
|
|
|
|
|
|
907
|
|
|
|
|
|
|
} |
908
|
|
|
|
|
|
|
} |
909
|
|
|
|
|
|
|
|
910
|
|
|
|
|
|
|
# emit text (only if it is not empty) |
911
|
0
|
0
|
|
|
|
|
unless ($text eq '') { |
912
|
0
|
|
|
|
|
|
push @{$ExpatNB->{XR_Data}}, |
|
0
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
['T', $text, '', '-', '*', [], [], []]; |
914
|
|
|
|
|
|
|
} |
915
|
|
|
|
|
|
|
|
916
|
0
|
0
|
|
|
|
|
if ($next_SPECD eq 'E') { |
917
|
|
|
|
|
|
|
# emit the closing tag with empty text |
918
|
0
|
|
|
|
|
|
push @{$ExpatNB->{XR_Data}}, |
|
0
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
['T', '', '', '*', $next_SPECD, [], [], []]; |
920
|
|
|
|
|
|
|
} |
921
|
|
|
|
|
|
|
} |
922
|
|
|
|
|
|
|
# Here we don't want , , and pi ?> split up into separate lines ! |
923
|
|
|
|
|
|
|
else { |
924
|
|
|
|
|
|
|
# Do we really want to emit attributes on their proper lines ? -- or do we just |
925
|
|
|
|
|
|
|
# want to publish the attributes on element ${$ExpatNB->{XR_Data}}[5] ? |
926
|
0
|
0
|
|
|
|
|
if ($ExpatNB->{XR_Emit_attr}) { |
927
|
|
|
|
|
|
|
|
928
|
0
|
|
|
|
|
|
my %at = @{$ExpatNB->{XR_Att}}; |
|
0
|
|
|
|
|
|
|
929
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
# Here we emit attributes on their proper lines -- *before* the start line (see below) ... |
931
|
0
|
|
|
|
|
|
for my $key (sort keys %at) { |
932
|
0
|
|
|
|
|
|
push @{$ExpatNB->{XR_Data}}, ['A', $key, $at{$key}]; |
|
0
|
|
|
|
|
|
|
933
|
|
|
|
|
|
|
} |
934
|
|
|
|
|
|
|
} |
935
|
|
|
|
|
|
|
|
936
|
|
|
|
|
|
|
# And here we emit the text |
937
|
0
|
|
|
|
|
|
push @{$ExpatNB->{XR_Data}}, |
938
|
0
|
|
|
|
|
|
['T', $text, $comment, $prev_SPECD, $next_SPECD, $ExpatNB->{XR_Att}, $ExpatNB->{XR_ProcInst}, $ExpatNB->{XR_Decl}]; |
939
|
|
|
|
|
|
|
} |
940
|
|
|
|
|
|
|
} |
941
|
|
|
|
|
|
|
|
942
|
|
|
|
|
|
|
# Initialise values: |
943
|
0
|
|
|
|
|
|
$ExpatNB->{XR_Text} = ''; |
944
|
0
|
|
|
|
|
|
$ExpatNB->{XR_Comment} = ''; |
945
|
0
|
|
|
|
|
|
$ExpatNB->{XR_Att} = []; |
946
|
0
|
|
|
|
|
|
$ExpatNB->{XR_ProcInst} = []; |
947
|
0
|
|
|
|
|
|
$ExpatNB->{XR_Decl} = []; |
948
|
|
|
|
|
|
|
|
949
|
0
|
|
|
|
|
|
$ExpatNB->{XR_Prv_SPECD} = $Param_SPECD; |
950
|
|
|
|
|
|
|
} |
951
|
|
|
|
|
|
|
|
952
|
|
|
|
|
|
|
sub DESTROY { |
953
|
0
|
|
|
0
|
|
|
my $self = shift; |
954
|
|
|
|
|
|
|
|
955
|
|
|
|
|
|
|
# There are circular references inside an XML::Parser::ExpatNB-object |
956
|
|
|
|
|
|
|
# which need to be cleaned up by calling XML::Parser::Expat->release. |
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
# I quote from the documentation of 'XML::Parser::Expat' (-- XML::Parser::Expat |
959
|
|
|
|
|
|
|
# is a super-class of XML::Parser::ExpatNB --) |
960
|
|
|
|
|
|
|
# |
961
|
|
|
|
|
|
|
# >> ------------------------------------------------------------------------ |
962
|
|
|
|
|
|
|
# >> =item release |
963
|
|
|
|
|
|
|
# >> |
964
|
|
|
|
|
|
|
# >> There are data structures used by XML::Parser::Expat that have circular |
965
|
|
|
|
|
|
|
# >> references. This means that these structures will never be garbage |
966
|
|
|
|
|
|
|
# >> collected unless these references are explicitly broken. Calling this |
967
|
|
|
|
|
|
|
# >> method breaks those references (and makes the instance unusable.) |
968
|
|
|
|
|
|
|
# >> |
969
|
|
|
|
|
|
|
# >> Normally, higher level calls handle this for you, but if you are using |
970
|
|
|
|
|
|
|
# >> XML::Parser::Expat directly, then it's your responsibility to call it. |
971
|
|
|
|
|
|
|
# >> ------------------------------------------------------------------------ |
972
|
|
|
|
|
|
|
|
973
|
|
|
|
|
|
|
# There is a possibility that the XML::Parser::ExpatNB-object did not get |
974
|
|
|
|
|
|
|
# created, while still blessing the XML::Reader object. Therefore we have to |
975
|
|
|
|
|
|
|
# test for this case before calling XML::Parser::ExpatNB->release. |
976
|
|
|
|
|
|
|
|
977
|
0
|
0
|
|
|
|
|
if ($self->{ExpatNB}) { |
978
|
0
|
|
|
|
|
|
$self->{ExpatNB}->release; # ...and not $self->{ExpatNB}->parse_done; |
979
|
|
|
|
|
|
|
} |
980
|
|
|
|
|
|
|
} |
981
|
|
|
|
|
|
|
|
982
|
|
|
|
|
|
|
sub slurp_xml { |
983
|
0
|
|
|
0
|
1
|
|
my $data = shift; |
984
|
|
|
|
|
|
|
|
985
|
0
|
|
|
|
|
|
my @roots; |
986
|
0
|
|
|
|
|
|
my $filter = { filter => 5 }; |
987
|
|
|
|
|
|
|
|
988
|
0
|
|
|
|
|
|
for my $r (@_) { |
989
|
0
|
0
|
|
|
|
|
if (defined $r->{dupatt}) { |
990
|
0
|
|
|
|
|
|
$filter->{dupatt} = $r->{dupatt}; |
991
|
|
|
|
|
|
|
} |
992
|
|
|
|
|
|
|
else { |
993
|
0
|
|
|
|
|
|
push @roots, $r; |
994
|
|
|
|
|
|
|
} |
995
|
|
|
|
|
|
|
} |
996
|
|
|
|
|
|
|
|
997
|
0
|
|
|
|
|
|
my @tree = map {[]} @roots; # start with as many empty lists as there are roots |
|
0
|
|
|
|
|
|
|
998
|
|
|
|
|
|
|
|
999
|
0
|
|
|
|
|
|
my $rdr = XML::Reader->new($data, $filter, @roots); |
1000
|
|
|
|
|
|
|
|
1001
|
0
|
|
|
|
|
|
while ($rdr->iterate) { |
1002
|
0
|
|
|
|
|
|
push @{$tree[$rdr->rx]}, $rdr->rvalue; |
|
0
|
|
|
|
|
|
|
1003
|
|
|
|
|
|
|
} |
1004
|
|
|
|
|
|
|
|
1005
|
0
|
|
|
|
|
|
return \@tree; |
1006
|
|
|
|
|
|
|
} |
1007
|
|
|
|
|
|
|
|
1008
|
|
|
|
|
|
|
# The package used here - XML::Reader::Token |
1009
|
|
|
|
|
|
|
# has been inspired by XML::TokeParser::Token |
1010
|
|
|
|
|
|
|
|
1011
|
|
|
|
|
|
|
package XML::Reader::Token; |
1012
|
|
|
|
|
|
|
$XML::Reader::Token::VERSION = '0.67'; |
1013
|
0
|
|
|
0
|
|
|
sub found_start_tag { $_[0][0] eq '<'; } |
1014
|
0
|
|
|
0
|
|
|
sub found_end_tag { $_[0][0] eq '>'; } |
1015
|
0
|
|
|
0
|
|
|
sub found_attr { $_[0][0] eq 'A'; } |
1016
|
0
|
|
|
0
|
|
|
sub found_text { $_[0][0] eq 'T'; } |
1017
|
|
|
|
|
|
|
|
1018
|
0
|
|
|
0
|
|
|
sub extract_tag { $_[0][1]; } # type eq '<' or '>' |
1019
|
|
|
|
|
|
|
|
1020
|
0
|
|
|
0
|
|
|
sub extract_attkey { $_[0][1]; } # type eq 'A' |
1021
|
0
|
|
|
0
|
|
|
sub extract_attval { $_[0][2]; } # type eq 'A' |
1022
|
|
|
|
|
|
|
|
1023
|
0
|
|
|
0
|
|
|
sub extract_text { $_[0][1]; } # type eq 'T' |
1024
|
0
|
|
|
0
|
|
|
sub extract_comment { $_[0][2]; } # type eq 'T' |
1025
|
|
|
|
|
|
|
|
1026
|
0
|
|
|
0
|
|
|
sub extract_prv_SPECD { $_[0][3]; } # type eq 'T' |
1027
|
0
|
|
|
0
|
|
|
sub extract_nxt_SPECD { $_[0][4]; } # type eq 'T' |
1028
|
0
|
|
|
0
|
|
|
sub extract_attr { $_[0][5]; } # type eq 'T' |
1029
|
0
|
|
|
0
|
|
|
sub extract_proc { $_[0][6]; } # type eq 'T' |
1030
|
0
|
|
|
0
|
|
|
sub extract_decl { $_[0][7]; } # type eq 'T' |
1031
|
|
|
|
|
|
|
|
1032
|
|
|
|
|
|
|
1; |
1033
|
|
|
|
|
|
|
|
1034
|
|
|
|
|
|
|
__END__ |