line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
1
|
|
|
1
|
|
29085
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
22
|
|
2
|
1
|
|
|
1
|
|
3
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
40
|
|
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
package XML::Saxtract; |
5
|
|
|
|
|
|
|
$XML::Saxtract::VERSION = '1.02'; |
6
|
|
|
|
|
|
|
# ABSTRACT: Streaming parse XML data into a result hash based upon a specification hash |
7
|
|
|
|
|
|
|
# PODNAME: XML::Saxtract |
8
|
|
|
|
|
|
|
|
9
|
1
|
|
|
1
|
|
3
|
use Exporter qw(import); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
31
|
|
10
|
|
|
|
|
|
|
our @EXPORT_OK = qw(saxtract_string saxtract_url); |
11
|
|
|
|
|
|
|
|
12
|
1
|
|
|
1
|
|
551
|
use LWP::UserAgent; |
|
1
|
|
|
|
|
28129
|
|
|
1
|
|
|
|
|
25
|
|
13
|
1
|
|
|
1
|
|
371
|
use XML::SAX; |
|
1
|
|
|
|
|
2733
|
|
|
1
|
|
|
|
|
166
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
sub saxtract_string { |
16
|
9
|
|
|
9
|
1
|
4844
|
my $xml_string = shift; |
17
|
9
|
|
|
|
|
11
|
my $spec = shift; |
18
|
9
|
|
|
|
|
12
|
my %options = @_; |
19
|
|
|
|
|
|
|
|
20
|
9
|
|
|
|
|
56
|
my $handler = XML::Saxtract::ContentHandler->new( $spec, $options{object} ); |
21
|
9
|
|
|
|
|
50
|
my $parser = XML::SAX::ParserFactory->parser( Handler => $handler ); |
22
|
9
|
|
|
|
|
41491
|
$parser->parse_string($xml_string); |
23
|
|
|
|
|
|
|
|
24
|
9
|
|
|
|
|
650
|
return $handler->get_result(); |
25
|
|
|
|
|
|
|
} |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
sub saxtract_url { |
28
|
1
|
|
|
1
|
1
|
1706
|
my $uri = shift; |
29
|
1
|
|
|
|
|
1
|
my $spec = shift; |
30
|
1
|
|
|
|
|
3
|
my %options = @_; |
31
|
|
|
|
|
|
|
|
32
|
1
|
|
33
|
|
|
13
|
my $agent = $options{agent} || LWP::UserAgent->new(); |
33
|
|
|
|
|
|
|
|
34
|
1
|
|
|
|
|
1944
|
my $response = $agent->get($uri); |
35
|
1
|
50
|
|
|
|
18374
|
if ( !$response->is_success() ) { |
36
|
0
|
0
|
|
|
|
0
|
if ( $options{die_on_failure} ) { |
37
|
0
|
|
|
|
|
0
|
die($response); |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
else { |
40
|
0
|
|
|
|
|
0
|
return; |
41
|
|
|
|
|
|
|
} |
42
|
|
|
|
|
|
|
} |
43
|
|
|
|
|
|
|
|
44
|
1
|
|
|
|
|
13
|
return saxtract_string( $response->content(), $spec, %options ); |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
package XML::Saxtract::ContentHandler; |
48
|
|
|
|
|
|
|
$XML::Saxtract::ContentHandler::VERSION = '1.02'; |
49
|
1
|
|
|
1
|
|
5
|
use parent qw(Class::Accessor); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
5
|
|
50
|
|
|
|
|
|
|
__PACKAGE__->follow_best_practice; |
51
|
|
|
|
|
|
|
__PACKAGE__->mk_ro_accessors(qw(result)); |
52
|
|
|
|
|
|
|
|
53
|
1
|
|
|
1
|
|
2263
|
use Data::Dumper; |
|
1
|
|
|
|
|
4647
|
|
|
1
|
|
|
|
|
740
|
|
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
sub new { |
56
|
9
|
|
|
9
|
|
17
|
my ( $class, @args ) = @_; |
57
|
9
|
|
|
|
|
15
|
my $self = bless( {}, $class ); |
58
|
|
|
|
|
|
|
|
59
|
9
|
|
|
|
|
20
|
return $self->_init(@args); |
60
|
|
|
|
|
|
|
} |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
sub _add_value { |
63
|
44
|
|
|
44
|
|
32
|
my $object = shift; |
64
|
44
|
|
|
|
|
37
|
my $spec = shift; |
65
|
44
|
|
|
|
|
32
|
my $value = shift; |
66
|
|
|
|
|
|
|
|
67
|
44
|
|
|
|
|
34
|
my $type = ref($spec); |
68
|
44
|
100
|
|
|
|
63
|
if ( !$type ) { |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
69
|
32
|
|
|
|
|
65
|
$object->{$spec} = $value; |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
elsif ( $type eq 'SCALAR' ) { |
72
|
0
|
|
|
|
|
0
|
$object->{$$spec} = $value; |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
elsif ( $type eq 'CODE' ) { |
75
|
1
|
|
|
|
|
3
|
&$spec( $object, $value ); |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
else { |
78
|
11
|
|
|
|
|
11
|
my $name = $spec->{name}; |
79
|
11
|
100
|
|
|
|
31
|
if ( $spec->{type} eq 'array' ) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
80
|
2
|
100
|
|
|
|
5
|
if ( !defined( $object->{$name} ) ) { |
81
|
1
|
|
|
|
|
2
|
$object->{$name} = []; |
82
|
|
|
|
|
|
|
} |
83
|
2
|
|
|
|
|
1
|
push( @{ $object->{$name} }, $value ); |
|
2
|
|
|
|
|
6
|
|
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
elsif ( $spec->{type} eq 'map' ) { |
86
|
4
|
100
|
|
|
|
10
|
if ( !defined( $object->{$name} ) ) { |
87
|
2
|
|
|
|
|
4
|
$object->{$name} = {}; |
88
|
|
|
|
|
|
|
} |
89
|
4
|
|
|
|
|
13
|
$object->{$name}{ $value->{ $spec->{key} } } = $value; |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
elsif ( $spec->{type} eq 'first' ) { |
92
|
5
|
100
|
|
|
|
11
|
if ( !defined( $object->{$name} ) ) { |
93
|
3
|
|
|
|
|
6
|
$object->{$name} = $value; |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
else { |
97
|
|
|
|
|
|
|
# type 'last' or default |
98
|
0
|
|
|
|
|
0
|
$object->{$name} = $value; |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
sub characters { |
104
|
29
|
|
|
29
|
|
929
|
my ( $self, $characters ) = @_; |
105
|
29
|
50
|
|
|
|
48
|
return if ( $self->{skip} > 0 ); |
106
|
|
|
|
|
|
|
|
107
|
29
|
50
|
|
|
|
42
|
if ( defined($characters) ) { |
108
|
29
|
|
|
|
|
29
|
push( @{ $self->{buffer} }, $characters->{Data} ); |
|
29
|
|
|
|
|
73
|
|
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
sub end_element { |
113
|
23
|
|
|
23
|
|
1647
|
my ( $self, $element ) = @_; |
114
|
|
|
|
|
|
|
|
115
|
23
|
50
|
|
|
|
43
|
if ( $self->{skip} > 0 ) { |
116
|
0
|
|
|
|
|
0
|
$self->{skip}--; |
117
|
0
|
|
|
|
|
0
|
return; |
118
|
|
|
|
|
|
|
} |
119
|
|
|
|
|
|
|
|
120
|
23
|
|
|
|
|
17
|
my $stack_element = pop( @{ $self->{element_stack} } ); |
|
23
|
|
|
|
|
29
|
|
121
|
23
|
|
|
|
|
23
|
my $name = $stack_element->{name}; |
122
|
23
|
|
|
|
|
18
|
my $attrs = $stack_element->{attrs}; |
123
|
23
|
|
|
|
|
17
|
my $spec = $stack_element->{spec}; |
124
|
23
|
|
|
|
|
20
|
my $path = $stack_element->{spec_path}; |
125
|
23
|
|
|
|
|
18
|
my $result = $stack_element->{result}; |
126
|
|
|
|
|
|
|
|
127
|
23
|
50
|
100
|
|
|
40
|
if ( defined( $spec->{$path} ) && scalar( @{ $self->{buffer} } ) ) { |
|
16
|
|
|
|
|
25
|
|
128
|
16
|
|
|
|
|
15
|
my $buffer_data = join( '', @{ $self->{buffer} } ); |
|
16
|
|
|
|
|
22
|
|
129
|
16
|
|
|
|
|
49
|
$buffer_data =~ s/^\s*//; |
130
|
16
|
|
|
|
|
49
|
$buffer_data =~ s/\s*$//; |
131
|
16
|
|
|
|
|
26
|
_add_value( $result, $spec->{$path}, $buffer_data ); |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
|
134
|
23
|
|
|
|
|
46
|
foreach my $attr ( values(%$attrs) ) { |
135
|
27
|
|
|
|
|
19
|
my $ns_uri = $attr->{NamespaceURI}; |
136
|
|
|
|
|
|
|
my $attr_path = join( '', |
137
|
|
|
|
|
|
|
$path, '/@', ( $ns_uri && $spec->{$ns_uri} ? "$spec->{$ns_uri}:" : '' ), |
138
|
27
|
100
|
66
|
|
|
74
|
$attr->{LocalName} ); |
139
|
|
|
|
|
|
|
|
140
|
27
|
100
|
|
|
|
45
|
if ( $spec->{$attr_path} ) { |
141
|
17
|
|
|
|
|
26
|
_add_value( $result, $spec->{$attr_path}, $attr->{Value} ); |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
|
145
|
23
|
50
|
100
|
|
|
37
|
if ( !$path && scalar( @{ $self->{element_stack} } ) ) { |
|
11
|
|
|
|
|
19
|
|
146
|
11
|
|
|
|
|
9
|
my $parent_element = $self->{element_stack}[-1]; |
147
|
11
|
|
|
|
|
18
|
my $path_in_parent = "$parent_element->{spec_path}/$name"; |
148
|
11
|
|
|
|
|
19
|
_add_value( $parent_element->{result}, $parent_element->{spec}{$path_in_parent}, |
149
|
|
|
|
|
|
|
$result ); |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
|
152
|
23
|
|
|
|
|
69
|
$self->{buffer} = []; |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
sub _init { |
156
|
9
|
|
|
9
|
|
12
|
my ( $self, $spec, $result ) = @_; |
157
|
|
|
|
|
|
|
|
158
|
9
|
|
50
|
|
|
39
|
$self->{result} = $result || {}; |
159
|
|
|
|
|
|
|
$self->{element_stack} = [ |
160
|
|
|
|
|
|
|
{ spec => $spec, |
161
|
|
|
|
|
|
|
spec_path => '', |
162
|
|
|
|
|
|
|
result => $self->{result} |
163
|
|
|
|
|
|
|
} |
164
|
9
|
|
|
|
|
27
|
]; |
165
|
9
|
|
|
|
|
11
|
$self->{buffer} = []; |
166
|
9
|
|
|
|
|
10
|
$self->{skip} = 0; |
167
|
|
|
|
|
|
|
|
168
|
9
|
|
|
|
|
14
|
return $self; |
169
|
|
|
|
|
|
|
} |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
sub _spec_prefix { |
172
|
19
|
|
|
19
|
|
16
|
my ( $self, $uri ) = @_; |
173
|
|
|
|
|
|
|
|
174
|
19
|
|
|
|
|
15
|
for ( my $i = scalar( @{ $self->{element_stack} } ) - 1; $i >= 0; $i-- ) { |
|
19
|
|
|
|
|
46
|
|
175
|
23
|
|
|
|
|
33
|
my $spec_prefix = $self->{element_stack}[$i]->{spec}{$uri}; |
176
|
23
|
100
|
|
|
|
45
|
return $spec_prefix if ( defined($spec_prefix) ); |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
|
179
|
0
|
|
|
|
|
0
|
return; |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
sub start_element { |
183
|
23
|
|
|
23
|
|
10423
|
my ( $self, $element ) = @_; |
184
|
|
|
|
|
|
|
|
185
|
23
|
50
|
|
|
|
57
|
if ( $self->{skip} ) { |
186
|
0
|
|
|
|
|
0
|
$self->{skip}++; |
187
|
0
|
|
|
|
|
0
|
return; |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
|
190
|
23
|
|
|
|
|
25
|
my $stack_top = $self->{element_stack}[-1]; |
191
|
23
|
|
|
|
|
26
|
my $spec = $stack_top->{spec}; |
192
|
23
|
|
|
|
|
20
|
my $result = $stack_top->{result}; |
193
|
23
|
|
|
|
|
19
|
my $uri = $element->{NamespaceURI}; |
194
|
|
|
|
|
|
|
|
195
|
23
|
|
|
|
|
12
|
my $qname; |
196
|
23
|
100
|
|
|
|
31
|
if ($uri) { |
197
|
19
|
|
|
|
|
26
|
my $spec_prefix = $self->_spec_prefix($uri); |
198
|
19
|
50
|
|
|
|
37
|
if ( !defined($spec_prefix) ) { |
|
|
100
|
|
|
|
|
|
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
# uri is not in spec, so nothing could possibly match |
201
|
0
|
|
|
|
|
0
|
$self->{skip} = 1; |
202
|
0
|
|
|
|
|
0
|
return; |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
elsif ( $spec_prefix eq '' ) { |
205
|
11
|
|
|
|
|
14
|
$qname = $element->{LocalName}; |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
else { |
208
|
8
|
|
|
|
|
17
|
$qname = "$spec_prefix:$element->{LocalName}"; |
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
else { |
212
|
4
|
|
|
|
|
5
|
$qname = $element->{LocalName}; |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
|
215
|
23
|
|
|
|
|
31
|
my $spec_path = "$stack_top->{spec_path}/$qname"; |
216
|
23
|
100
|
100
|
|
|
124
|
if ( defined( $spec->{$spec_path} ) |
|
|
|
66
|
|
|
|
|
217
|
|
|
|
|
|
|
&& ref( $spec->{$spec_path} ) eq 'HASH' |
218
|
|
|
|
|
|
|
&& defined( $spec->{$spec_path}{spec} ) ) |
219
|
|
|
|
|
|
|
{ |
220
|
11
|
|
|
|
|
13
|
$spec = $spec->{$spec_path}{spec}; |
221
|
11
|
|
|
|
|
8
|
$spec_path = ''; |
222
|
11
|
|
|
|
|
11
|
$result = {}; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
push( |
226
|
23
|
|
|
|
|
97
|
@{ $self->{element_stack} }, |
227
|
|
|
|
|
|
|
{ name => $qname, |
228
|
|
|
|
|
|
|
attrs => $element->{Attributes}, |
229
|
23
|
|
|
|
|
23
|
spec => $spec, |
230
|
|
|
|
|
|
|
spec_path => $spec_path, |
231
|
|
|
|
|
|
|
result => $result |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
); |
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
1; |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
__END__ |