line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
1
|
|
|
1
|
|
69682
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
31
|
|
2
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
54
|
|
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
package XML::Saxtract; |
5
|
|
|
|
|
|
|
$XML::Saxtract::VERSION = '1.03'; |
6
|
|
|
|
|
|
|
# ABSTRACT: Streaming parse XML data into a result hash based upon a specification hash |
7
|
|
|
|
|
|
|
# PODNAME: XML::Saxtract |
8
|
|
|
|
|
|
|
|
9
|
1
|
|
|
1
|
|
3
|
use Exporter qw(import); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
54
|
|
10
|
|
|
|
|
|
|
our @EXPORT_OK = qw(saxtract_string saxtract_url); |
11
|
|
|
|
|
|
|
|
12
|
1
|
|
|
1
|
|
686
|
use LWP::UserAgent; |
|
1
|
|
|
|
|
72220
|
|
|
1
|
|
|
|
|
100
|
|
13
|
1
|
|
|
1
|
|
1291
|
use XML::SAX; |
|
1
|
|
|
|
|
6336
|
|
|
1
|
|
|
|
|
454
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
sub saxtract_string { |
16
|
10
|
|
|
10
|
1
|
12246
|
my $xml_string = shift; |
17
|
10
|
|
|
|
|
22
|
my $spec = shift; |
18
|
10
|
|
|
|
|
59
|
my %options = @_; |
19
|
|
|
|
|
|
|
|
20
|
10
|
|
|
|
|
118
|
my $handler = XML::Saxtract::ContentHandler->new( $spec, $options{object} ); |
21
|
10
|
|
|
|
|
105
|
my $parser = XML::SAX::ParserFactory->parser( Handler => $handler ); |
22
|
10
|
|
|
|
|
103626
|
$parser->parse_string($xml_string); |
23
|
|
|
|
|
|
|
|
24
|
10
|
|
|
|
|
1568
|
return $handler->get_result(); |
25
|
|
|
|
|
|
|
} |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
sub saxtract_url { |
28
|
1
|
|
|
1
|
1
|
3762
|
my $uri = shift; |
29
|
1
|
|
|
|
|
3
|
my $spec = shift; |
30
|
1
|
|
|
|
|
3
|
my %options = @_; |
31
|
|
|
|
|
|
|
|
32
|
1
|
|
33
|
|
|
27
|
my $agent = $options{agent} || LWP::UserAgent->new(); |
33
|
|
|
|
|
|
|
|
34
|
1
|
|
|
|
|
4371
|
my $response = $agent->get($uri); |
35
|
1
|
50
|
|
|
|
47250
|
if ( !$response->is_success() ) { |
36
|
0
|
0
|
|
|
|
0
|
if ( $options{die_on_failure} ) { |
37
|
0
|
|
|
|
|
0
|
die($response); |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
else { |
40
|
0
|
|
|
|
|
0
|
return; |
41
|
|
|
|
|
|
|
} |
42
|
|
|
|
|
|
|
} |
43
|
|
|
|
|
|
|
|
44
|
1
|
|
|
|
|
24
|
return saxtract_string( $response->content(), $spec, %options ); |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
package XML::Saxtract::ContentHandler; |
48
|
|
|
|
|
|
|
$XML::Saxtract::ContentHandler::VERSION = '1.03'; |
49
|
1
|
|
|
1
|
|
13
|
use parent qw(Class::Accessor); |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
10
|
|
50
|
|
|
|
|
|
|
__PACKAGE__->follow_best_practice; |
51
|
|
|
|
|
|
|
__PACKAGE__->mk_ro_accessors(qw(result)); |
52
|
|
|
|
|
|
|
|
53
|
1
|
|
|
1
|
|
5092
|
use Data::Dumper; |
|
1
|
|
|
|
|
10636
|
|
|
1
|
|
|
|
|
2086
|
|
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
sub new { |
56
|
10
|
|
|
10
|
|
37
|
my ( $class, @args ) = @_; |
57
|
10
|
|
|
|
|
29
|
my $self = bless( {}, $class ); |
58
|
|
|
|
|
|
|
|
59
|
10
|
|
|
|
|
59
|
return $self->_init(@args); |
60
|
|
|
|
|
|
|
} |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
sub _add_value { |
63
|
50
|
|
|
50
|
|
68
|
my $object = shift; |
64
|
50
|
|
|
|
|
58
|
my $spec = shift; |
65
|
50
|
|
|
|
|
71
|
my $value = shift; |
66
|
|
|
|
|
|
|
|
67
|
50
|
|
|
|
|
68
|
my $type = ref($spec); |
68
|
50
|
100
|
|
|
|
154
|
if ( !$type ) { |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
69
|
36
|
|
|
|
|
148
|
$object->{$spec} = $value; |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
elsif ( $type eq 'SCALAR' ) { |
72
|
0
|
|
|
|
|
0
|
$object->{$$spec} = $value; |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
elsif ( $type eq 'CODE' ) { |
75
|
1
|
|
|
|
|
5
|
&$spec( $object, $value ); |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
else { |
78
|
13
|
|
|
|
|
23
|
my $name = $spec->{name}; |
79
|
13
|
|
|
|
|
25
|
my $subspec_type = ref( $spec->{type} ); |
80
|
13
|
100
|
|
|
|
61
|
if ($subspec_type) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
81
|
2
|
50
|
|
|
|
9
|
if ( $subspec_type eq 'CODE' ) { |
82
|
2
|
|
|
|
|
5
|
my $subspec_object = $object->{$name}; |
83
|
2
|
100
|
|
|
|
8
|
unless ($subspec_object) { |
84
|
1
|
|
|
|
|
3
|
$subspec_object = {}; |
85
|
1
|
|
|
|
|
5
|
$object->{$name} = $subspec_object; |
86
|
|
|
|
|
|
|
} |
87
|
2
|
|
|
|
|
6
|
&{ $spec->{type} }( $subspec_object, $value ); |
|
2
|
|
|
|
|
10
|
|
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
} |
90
|
|
|
|
|
|
|
elsif ( $spec->{type} eq 'array' ) { |
91
|
2
|
100
|
|
|
|
10
|
if ( !defined( $object->{$name} ) ) { |
92
|
1
|
|
|
|
|
5
|
$object->{$name} = []; |
93
|
|
|
|
|
|
|
} |
94
|
2
|
|
|
|
|
3
|
push( @{ $object->{$name} }, $value ); |
|
2
|
|
|
|
|
9
|
|
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
elsif ( $spec->{type} eq 'map' ) { |
97
|
4
|
100
|
|
|
|
17
|
if ( !defined( $object->{$name} ) ) { |
98
|
2
|
|
|
|
|
9
|
$object->{$name} = {}; |
99
|
|
|
|
|
|
|
} |
100
|
4
|
|
|
|
|
29
|
$object->{$name}{ $value->{ $spec->{key} } } = $value; |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
elsif ( $spec->{type} eq 'first' ) { |
103
|
5
|
100
|
|
|
|
25
|
if ( !defined( $object->{$name} ) ) { |
104
|
3
|
|
|
|
|
14
|
$object->{$name} = $value; |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
else { |
108
|
|
|
|
|
|
|
# type 'last' or default |
109
|
0
|
|
|
|
|
0
|
$object->{$name} = $value; |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
sub characters { |
115
|
36
|
|
|
36
|
|
2319
|
my ( $self, $characters ) = @_; |
116
|
36
|
50
|
|
|
|
120
|
return if ( $self->{skip} > 0 ); |
117
|
|
|
|
|
|
|
|
118
|
36
|
50
|
|
|
|
89
|
if ( defined($characters) ) { |
119
|
36
|
|
|
|
|
77
|
push( @{ $self->{buffer} }, $characters->{Data} ); |
|
36
|
|
|
|
|
178
|
|
120
|
|
|
|
|
|
|
} |
121
|
|
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
sub end_element { |
124
|
30
|
|
|
30
|
|
4487
|
my ( $self, $element ) = @_; |
125
|
|
|
|
|
|
|
|
126
|
30
|
50
|
|
|
|
110
|
if ( $self->{skip} > 0 ) { |
127
|
0
|
|
|
|
|
0
|
$self->{skip}--; |
128
|
0
|
|
|
|
|
0
|
return; |
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
|
131
|
30
|
|
|
|
|
57
|
my $stack_element = pop( @{ $self->{element_stack} } ); |
|
30
|
|
|
|
|
80
|
|
132
|
30
|
|
|
|
|
63
|
my $name = $stack_element->{name}; |
133
|
30
|
|
|
|
|
46
|
my $attrs = $stack_element->{attrs}; |
134
|
30
|
|
|
|
|
43
|
my $spec = $stack_element->{spec}; |
135
|
30
|
|
|
|
|
46
|
my $path = $stack_element->{spec_path}; |
136
|
30
|
|
|
|
|
41
|
my $result = $stack_element->{result}; |
137
|
|
|
|
|
|
|
|
138
|
30
|
50
|
100
|
|
|
158
|
if ( defined( $spec->{$path} ) && scalar( @{ $self->{buffer} } ) ) { |
|
20
|
|
|
|
|
64
|
|
139
|
20
|
|
|
|
|
25
|
my $buffer_data = join( '', @{ $self->{buffer} } ); |
|
20
|
|
|
|
|
54
|
|
140
|
20
|
|
|
|
|
144
|
$buffer_data =~ s/^\s*//; |
141
|
20
|
|
|
|
|
118
|
$buffer_data =~ s/\s*$//; |
142
|
20
|
|
|
|
|
73
|
_add_value( $result, $spec->{$path}, $buffer_data ); |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
|
145
|
30
|
|
|
|
|
107
|
foreach my $attr ( values(%$attrs) ) { |
146
|
27
|
|
|
|
|
47
|
my $ns_uri = $attr->{NamespaceURI}; |
147
|
|
|
|
|
|
|
my $attr_path = join( '', |
148
|
|
|
|
|
|
|
$path, '/@', ( $ns_uri && $spec->{$ns_uri} ? "$spec->{$ns_uri}:" : '' ), |
149
|
27
|
100
|
66
|
|
|
147
|
$attr->{LocalName} ); |
150
|
|
|
|
|
|
|
|
151
|
27
|
100
|
|
|
|
88
|
if ( $spec->{$attr_path} ) { |
152
|
17
|
|
|
|
|
51
|
_add_value( $result, $spec->{$attr_path}, $attr->{Value} ); |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
30
|
50
|
100
|
|
|
94
|
if ( !$path && scalar( @{ $self->{element_stack} } ) ) { |
|
13
|
|
|
|
|
46
|
|
157
|
13
|
|
|
|
|
26
|
my $parent_element = $self->{element_stack}[-1]; |
158
|
13
|
|
|
|
|
39
|
my $path_in_parent = "$parent_element->{spec_path}/$name"; |
159
|
13
|
|
|
|
|
49
|
_add_value( $parent_element->{result}, $parent_element->{spec}{$path_in_parent}, |
160
|
|
|
|
|
|
|
$result ); |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
|
163
|
30
|
|
|
|
|
200
|
$self->{buffer} = []; |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
sub _init { |
167
|
10
|
|
|
10
|
|
23
|
my ( $self, $spec, $result ) = @_; |
168
|
|
|
|
|
|
|
|
169
|
10
|
|
50
|
|
|
87
|
$self->{result} = $result || {}; |
170
|
|
|
|
|
|
|
$self->{element_stack} = [ |
171
|
|
|
|
|
|
|
{ spec => $spec, |
172
|
|
|
|
|
|
|
spec_path => '', |
173
|
|
|
|
|
|
|
result => $self->{result} |
174
|
|
|
|
|
|
|
} |
175
|
10
|
|
|
|
|
66
|
]; |
176
|
10
|
|
|
|
|
23
|
$self->{buffer} = []; |
177
|
10
|
|
|
|
|
24
|
$self->{skip} = 0; |
178
|
|
|
|
|
|
|
|
179
|
10
|
|
|
|
|
35
|
return $self; |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
sub _spec_prefix { |
183
|
19
|
|
|
19
|
|
34
|
my ( $self, $uri ) = @_; |
184
|
|
|
|
|
|
|
|
185
|
19
|
|
|
|
|
19
|
for ( my $i = scalar( @{ $self->{element_stack} } ) - 1; $i >= 0; $i-- ) { |
|
19
|
|
|
|
|
93
|
|
186
|
23
|
|
|
|
|
71
|
my $spec_prefix = $self->{element_stack}[$i]->{spec}{$uri}; |
187
|
23
|
100
|
|
|
|
126
|
return $spec_prefix if ( defined($spec_prefix) ); |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
|
190
|
0
|
|
|
|
|
0
|
return; |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
sub start_element { |
194
|
30
|
|
|
30
|
|
24950
|
my ( $self, $element ) = @_; |
195
|
|
|
|
|
|
|
|
196
|
30
|
50
|
|
|
|
118
|
if ( $self->{skip} ) { |
197
|
0
|
|
|
|
|
0
|
$self->{skip}++; |
198
|
0
|
|
|
|
|
0
|
return; |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
|
201
|
30
|
|
|
|
|
60
|
my $stack_top = $self->{element_stack}[-1]; |
202
|
30
|
|
|
|
|
48
|
my $spec = $stack_top->{spec}; |
203
|
30
|
|
|
|
|
52
|
my $result = $stack_top->{result}; |
204
|
30
|
|
|
|
|
53
|
my $uri = $element->{NamespaceURI}; |
205
|
|
|
|
|
|
|
|
206
|
30
|
|
|
|
|
35
|
my $qname; |
207
|
30
|
100
|
|
|
|
67
|
if ($uri) { |
208
|
19
|
|
|
|
|
61
|
my $spec_prefix = $self->_spec_prefix($uri); |
209
|
19
|
50
|
|
|
|
78
|
if ( !defined($spec_prefix) ) { |
|
|
100
|
|
|
|
|
|
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
# uri is not in spec, so nothing could possibly match |
212
|
0
|
|
|
|
|
0
|
$self->{skip} = 1; |
213
|
0
|
|
|
|
|
0
|
return; |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
elsif ( $spec_prefix eq '' ) { |
216
|
11
|
|
|
|
|
27
|
$qname = $element->{LocalName}; |
217
|
|
|
|
|
|
|
} |
218
|
|
|
|
|
|
|
else { |
219
|
8
|
|
|
|
|
31
|
$qname = "$spec_prefix:$element->{LocalName}"; |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
else { |
223
|
11
|
|
|
|
|
21
|
$qname = $element->{LocalName}; |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
|
226
|
30
|
|
|
|
|
82
|
my $spec_path = "$stack_top->{spec_path}/$qname"; |
227
|
30
|
100
|
100
|
|
|
286
|
if ( defined( $spec->{$spec_path} ) |
|
|
|
66
|
|
|
|
|
228
|
|
|
|
|
|
|
&& ref( $spec->{$spec_path} ) eq 'HASH' |
229
|
|
|
|
|
|
|
&& defined( $spec->{$spec_path}{spec} ) ) |
230
|
|
|
|
|
|
|
{ |
231
|
13
|
|
|
|
|
30
|
$spec = $spec->{$spec_path}{spec}; |
232
|
13
|
|
|
|
|
20
|
$spec_path = ''; |
233
|
13
|
|
|
|
|
23
|
$result = {}; |
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
push( |
237
|
30
|
|
|
|
|
286
|
@{ $self->{element_stack} }, |
238
|
|
|
|
|
|
|
{ name => $qname, |
239
|
|
|
|
|
|
|
attrs => $element->{Attributes}, |
240
|
30
|
|
|
|
|
47
|
spec => $spec, |
241
|
|
|
|
|
|
|
spec_path => $spec_path, |
242
|
|
|
|
|
|
|
result => $result |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
); |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
1; |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
__END__ |