| line | stmt | bran | cond | sub | pod | time | code | 
| 1 | 1 |  |  | 1 |  | 69682 | use strict; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 31 |  | 
| 2 | 1 |  |  | 1 |  | 4 | use warnings; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 54 |  | 
| 3 |  |  |  |  |  |  |  | 
| 4 |  |  |  |  |  |  | package XML::Saxtract; | 
| 5 |  |  |  |  |  |  | $XML::Saxtract::VERSION = '1.03'; | 
| 6 |  |  |  |  |  |  | # ABSTRACT: Streaming parse XML data into a result hash based upon a specification hash | 
| 7 |  |  |  |  |  |  | # PODNAME: XML::Saxtract | 
| 8 |  |  |  |  |  |  |  | 
| 9 | 1 |  |  | 1 |  | 3 | use Exporter qw(import); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 54 |  | 
| 10 |  |  |  |  |  |  | our @EXPORT_OK = qw(saxtract_string saxtract_url); | 
| 11 |  |  |  |  |  |  |  | 
| 12 | 1 |  |  | 1 |  | 686 | use LWP::UserAgent; | 
|  | 1 |  |  |  |  | 72220 |  | 
|  | 1 |  |  |  |  | 100 |  | 
| 13 | 1 |  |  | 1 |  | 1291 | use XML::SAX; | 
|  | 1 |  |  |  |  | 6336 |  | 
|  | 1 |  |  |  |  | 454 |  | 
| 14 |  |  |  |  |  |  |  | 
| 15 |  |  |  |  |  |  | sub saxtract_string { | 
| 16 | 10 |  |  | 10 | 1 | 12246 | my $xml_string = shift; | 
| 17 | 10 |  |  |  |  | 22 | my $spec       = shift; | 
| 18 | 10 |  |  |  |  | 59 | my %options    = @_; | 
| 19 |  |  |  |  |  |  |  | 
| 20 | 10 |  |  |  |  | 118 | my $handler = XML::Saxtract::ContentHandler->new( $spec, $options{object} ); | 
| 21 | 10 |  |  |  |  | 105 | my $parser = XML::SAX::ParserFactory->parser( Handler => $handler ); | 
| 22 | 10 |  |  |  |  | 103626 | $parser->parse_string($xml_string); | 
| 23 |  |  |  |  |  |  |  | 
| 24 | 10 |  |  |  |  | 1568 | return $handler->get_result(); | 
| 25 |  |  |  |  |  |  | } | 
| 26 |  |  |  |  |  |  |  | 
| 27 |  |  |  |  |  |  | sub saxtract_url { | 
| 28 | 1 |  |  | 1 | 1 | 3762 | my $uri     = shift; | 
| 29 | 1 |  |  |  |  | 3 | my $spec    = shift; | 
| 30 | 1 |  |  |  |  | 3 | my %options = @_; | 
| 31 |  |  |  |  |  |  |  | 
| 32 | 1 |  | 33 |  |  | 27 | my $agent = $options{agent} || LWP::UserAgent->new(); | 
| 33 |  |  |  |  |  |  |  | 
| 34 | 1 |  |  |  |  | 4371 | my $response = $agent->get($uri); | 
| 35 | 1 | 50 |  |  |  | 47250 | if ( !$response->is_success() ) { | 
| 36 | 0 | 0 |  |  |  | 0 | if ( $options{die_on_failure} ) { | 
| 37 | 0 |  |  |  |  | 0 | die($response); | 
| 38 |  |  |  |  |  |  | } | 
| 39 |  |  |  |  |  |  | else { | 
| 40 | 0 |  |  |  |  | 0 | return; | 
| 41 |  |  |  |  |  |  | } | 
| 42 |  |  |  |  |  |  | } | 
| 43 |  |  |  |  |  |  |  | 
| 44 | 1 |  |  |  |  | 24 | return saxtract_string( $response->content(), $spec, %options ); | 
| 45 |  |  |  |  |  |  | } | 
| 46 |  |  |  |  |  |  |  | 
| 47 |  |  |  |  |  |  | package XML::Saxtract::ContentHandler; | 
| 48 |  |  |  |  |  |  | $XML::Saxtract::ContentHandler::VERSION = '1.03'; | 
| 49 | 1 |  |  | 1 |  | 13 | use parent qw(Class::Accessor); | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 10 |  | 
| 50 |  |  |  |  |  |  | __PACKAGE__->follow_best_practice; | 
| 51 |  |  |  |  |  |  | __PACKAGE__->mk_ro_accessors(qw(result)); | 
| 52 |  |  |  |  |  |  |  | 
| 53 | 1 |  |  | 1 |  | 5092 | use Data::Dumper; | 
|  | 1 |  |  |  |  | 10636 |  | 
|  | 1 |  |  |  |  | 2086 |  | 
| 54 |  |  |  |  |  |  |  | 
| 55 |  |  |  |  |  |  | sub new { | 
| 56 | 10 |  |  | 10 |  | 37 | my ( $class, @args ) = @_; | 
| 57 | 10 |  |  |  |  | 29 | my $self = bless( {}, $class ); | 
| 58 |  |  |  |  |  |  |  | 
| 59 | 10 |  |  |  |  | 59 | return $self->_init(@args); | 
| 60 |  |  |  |  |  |  | } | 
| 61 |  |  |  |  |  |  |  | 
| 62 |  |  |  |  |  |  | sub _add_value { | 
| 63 | 50 |  |  | 50 |  | 68 | my $object = shift; | 
| 64 | 50 |  |  |  |  | 58 | my $spec   = shift; | 
| 65 | 50 |  |  |  |  | 71 | my $value  = shift; | 
| 66 |  |  |  |  |  |  |  | 
| 67 | 50 |  |  |  |  | 68 | my $type = ref($spec); | 
| 68 | 50 | 100 |  |  |  | 154 | if ( !$type ) { | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 69 | 36 |  |  |  |  | 148 | $object->{$spec} = $value; | 
| 70 |  |  |  |  |  |  | } | 
| 71 |  |  |  |  |  |  | elsif ( $type eq 'SCALAR' ) { | 
| 72 | 0 |  |  |  |  | 0 | $object->{$$spec} = $value; | 
| 73 |  |  |  |  |  |  | } | 
| 74 |  |  |  |  |  |  | elsif ( $type eq 'CODE' ) { | 
| 75 | 1 |  |  |  |  | 5 | &$spec( $object, $value ); | 
| 76 |  |  |  |  |  |  | } | 
| 77 |  |  |  |  |  |  | else { | 
| 78 | 13 |  |  |  |  | 23 | my $name         = $spec->{name}; | 
| 79 | 13 |  |  |  |  | 25 | my $subspec_type = ref( $spec->{type} ); | 
| 80 | 13 | 100 |  |  |  | 61 | if ($subspec_type) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 81 | 2 | 50 |  |  |  | 9 | if ( $subspec_type eq 'CODE' ) { | 
| 82 | 2 |  |  |  |  | 5 | my $subspec_object = $object->{$name}; | 
| 83 | 2 | 100 |  |  |  | 8 | unless ($subspec_object) { | 
| 84 | 1 |  |  |  |  | 3 | $subspec_object = {}; | 
| 85 | 1 |  |  |  |  | 5 | $object->{$name} = $subspec_object; | 
| 86 |  |  |  |  |  |  | } | 
| 87 | 2 |  |  |  |  | 6 | &{ $spec->{type} }( $subspec_object, $value ); | 
|  | 2 |  |  |  |  | 10 |  | 
| 88 |  |  |  |  |  |  | } | 
| 89 |  |  |  |  |  |  | } | 
| 90 |  |  |  |  |  |  | elsif ( $spec->{type} eq 'array' ) { | 
| 91 | 2 | 100 |  |  |  | 10 | if ( !defined( $object->{$name} ) ) { | 
| 92 | 1 |  |  |  |  | 5 | $object->{$name} = []; | 
| 93 |  |  |  |  |  |  | } | 
| 94 | 2 |  |  |  |  | 3 | push( @{ $object->{$name} }, $value ); | 
|  | 2 |  |  |  |  | 9 |  | 
| 95 |  |  |  |  |  |  | } | 
| 96 |  |  |  |  |  |  | elsif ( $spec->{type} eq 'map' ) { | 
| 97 | 4 | 100 |  |  |  | 17 | if ( !defined( $object->{$name} ) ) { | 
| 98 | 2 |  |  |  |  | 9 | $object->{$name} = {}; | 
| 99 |  |  |  |  |  |  | } | 
| 100 | 4 |  |  |  |  | 29 | $object->{$name}{ $value->{ $spec->{key} } } = $value; | 
| 101 |  |  |  |  |  |  | } | 
| 102 |  |  |  |  |  |  | elsif ( $spec->{type} eq 'first' ) { | 
| 103 | 5 | 100 |  |  |  | 25 | if ( !defined( $object->{$name} ) ) { | 
| 104 | 3 |  |  |  |  | 14 | $object->{$name} = $value; | 
| 105 |  |  |  |  |  |  | } | 
| 106 |  |  |  |  |  |  | } | 
| 107 |  |  |  |  |  |  | else { | 
| 108 |  |  |  |  |  |  | # type 'last' or default | 
| 109 | 0 |  |  |  |  | 0 | $object->{$name} = $value; | 
| 110 |  |  |  |  |  |  | } | 
| 111 |  |  |  |  |  |  | } | 
| 112 |  |  |  |  |  |  | } | 
| 113 |  |  |  |  |  |  |  | 
| 114 |  |  |  |  |  |  | sub characters { | 
| 115 | 36 |  |  | 36 |  | 2319 | my ( $self, $characters ) = @_; | 
| 116 | 36 | 50 |  |  |  | 120 | return if ( $self->{skip} > 0 ); | 
| 117 |  |  |  |  |  |  |  | 
| 118 | 36 | 50 |  |  |  | 89 | if ( defined($characters) ) { | 
| 119 | 36 |  |  |  |  | 77 | push( @{ $self->{buffer} }, $characters->{Data} ); | 
|  | 36 |  |  |  |  | 178 |  | 
| 120 |  |  |  |  |  |  | } | 
| 121 |  |  |  |  |  |  | } | 
| 122 |  |  |  |  |  |  |  | 
| 123 |  |  |  |  |  |  | sub end_element { | 
| 124 | 30 |  |  | 30 |  | 4487 | my ( $self, $element ) = @_; | 
| 125 |  |  |  |  |  |  |  | 
| 126 | 30 | 50 |  |  |  | 110 | if ( $self->{skip} > 0 ) { | 
| 127 | 0 |  |  |  |  | 0 | $self->{skip}--; | 
| 128 | 0 |  |  |  |  | 0 | return; | 
| 129 |  |  |  |  |  |  | } | 
| 130 |  |  |  |  |  |  |  | 
| 131 | 30 |  |  |  |  | 57 | my $stack_element = pop( @{ $self->{element_stack} } ); | 
|  | 30 |  |  |  |  | 80 |  | 
| 132 | 30 |  |  |  |  | 63 | my $name          = $stack_element->{name}; | 
| 133 | 30 |  |  |  |  | 46 | my $attrs         = $stack_element->{attrs}; | 
| 134 | 30 |  |  |  |  | 43 | my $spec          = $stack_element->{spec}; | 
| 135 | 30 |  |  |  |  | 46 | my $path          = $stack_element->{spec_path}; | 
| 136 | 30 |  |  |  |  | 41 | my $result        = $stack_element->{result}; | 
| 137 |  |  |  |  |  |  |  | 
| 138 | 30 | 50 | 100 |  |  | 158 | if ( defined( $spec->{$path} ) && scalar( @{ $self->{buffer} } ) ) { | 
|  | 20 |  |  |  |  | 64 |  | 
| 139 | 20 |  |  |  |  | 25 | my $buffer_data = join( '', @{ $self->{buffer} } ); | 
|  | 20 |  |  |  |  | 54 |  | 
| 140 | 20 |  |  |  |  | 144 | $buffer_data =~ s/^\s*//; | 
| 141 | 20 |  |  |  |  | 118 | $buffer_data =~ s/\s*$//; | 
| 142 | 20 |  |  |  |  | 73 | _add_value( $result, $spec->{$path}, $buffer_data ); | 
| 143 |  |  |  |  |  |  | } | 
| 144 |  |  |  |  |  |  |  | 
| 145 | 30 |  |  |  |  | 107 | foreach my $attr ( values(%$attrs) ) { | 
| 146 | 27 |  |  |  |  | 47 | my $ns_uri    = $attr->{NamespaceURI}; | 
| 147 |  |  |  |  |  |  | my $attr_path = join( '', | 
| 148 |  |  |  |  |  |  | $path, '/@', ( $ns_uri && $spec->{$ns_uri} ? "$spec->{$ns_uri}:" : '' ), | 
| 149 | 27 | 100 | 66 |  |  | 147 | $attr->{LocalName} ); | 
| 150 |  |  |  |  |  |  |  | 
| 151 | 27 | 100 |  |  |  | 88 | if ( $spec->{$attr_path} ) { | 
| 152 | 17 |  |  |  |  | 51 | _add_value( $result, $spec->{$attr_path}, $attr->{Value} ); | 
| 153 |  |  |  |  |  |  | } | 
| 154 |  |  |  |  |  |  | } | 
| 155 |  |  |  |  |  |  |  | 
| 156 | 30 | 50 | 100 |  |  | 94 | if ( !$path && scalar( @{ $self->{element_stack} } ) ) { | 
|  | 13 |  |  |  |  | 46 |  | 
| 157 | 13 |  |  |  |  | 26 | my $parent_element = $self->{element_stack}[-1]; | 
| 158 | 13 |  |  |  |  | 39 | my $path_in_parent = "$parent_element->{spec_path}/$name"; | 
| 159 | 13 |  |  |  |  | 49 | _add_value( $parent_element->{result}, $parent_element->{spec}{$path_in_parent}, | 
| 160 |  |  |  |  |  |  | $result ); | 
| 161 |  |  |  |  |  |  | } | 
| 162 |  |  |  |  |  |  |  | 
| 163 | 30 |  |  |  |  | 200 | $self->{buffer} = []; | 
| 164 |  |  |  |  |  |  | } | 
| 165 |  |  |  |  |  |  |  | 
| 166 |  |  |  |  |  |  | sub _init { | 
| 167 | 10 |  |  | 10 |  | 23 | my ( $self, $spec, $result ) = @_; | 
| 168 |  |  |  |  |  |  |  | 
| 169 | 10 |  | 50 |  |  | 87 | $self->{result} = $result || {}; | 
| 170 |  |  |  |  |  |  | $self->{element_stack} = [ | 
| 171 |  |  |  |  |  |  | {   spec      => $spec, | 
| 172 |  |  |  |  |  |  | spec_path => '', | 
| 173 |  |  |  |  |  |  | result    => $self->{result} | 
| 174 |  |  |  |  |  |  | } | 
| 175 | 10 |  |  |  |  | 66 | ]; | 
| 176 | 10 |  |  |  |  | 23 | $self->{buffer} = []; | 
| 177 | 10 |  |  |  |  | 24 | $self->{skip}   = 0; | 
| 178 |  |  |  |  |  |  |  | 
| 179 | 10 |  |  |  |  | 35 | return $self; | 
| 180 |  |  |  |  |  |  | } | 
| 181 |  |  |  |  |  |  |  | 
| 182 |  |  |  |  |  |  | sub _spec_prefix { | 
| 183 | 19 |  |  | 19 |  | 34 | my ( $self, $uri ) = @_; | 
| 184 |  |  |  |  |  |  |  | 
| 185 | 19 |  |  |  |  | 19 | for ( my $i = scalar( @{ $self->{element_stack} } ) - 1; $i >= 0; $i-- ) { | 
|  | 19 |  |  |  |  | 93 |  | 
| 186 | 23 |  |  |  |  | 71 | my $spec_prefix = $self->{element_stack}[$i]->{spec}{$uri}; | 
| 187 | 23 | 100 |  |  |  | 126 | return $spec_prefix if ( defined($spec_prefix) ); | 
| 188 |  |  |  |  |  |  | } | 
| 189 |  |  |  |  |  |  |  | 
| 190 | 0 |  |  |  |  | 0 | return; | 
| 191 |  |  |  |  |  |  | } | 
| 192 |  |  |  |  |  |  |  | 
| 193 |  |  |  |  |  |  | sub start_element { | 
| 194 | 30 |  |  | 30 |  | 24950 | my ( $self, $element ) = @_; | 
| 195 |  |  |  |  |  |  |  | 
| 196 | 30 | 50 |  |  |  | 118 | if ( $self->{skip} ) { | 
| 197 | 0 |  |  |  |  | 0 | $self->{skip}++; | 
| 198 | 0 |  |  |  |  | 0 | return; | 
| 199 |  |  |  |  |  |  | } | 
| 200 |  |  |  |  |  |  |  | 
| 201 | 30 |  |  |  |  | 60 | my $stack_top = $self->{element_stack}[-1]; | 
| 202 | 30 |  |  |  |  | 48 | my $spec      = $stack_top->{spec}; | 
| 203 | 30 |  |  |  |  | 52 | my $result    = $stack_top->{result}; | 
| 204 | 30 |  |  |  |  | 53 | my $uri       = $element->{NamespaceURI}; | 
| 205 |  |  |  |  |  |  |  | 
| 206 | 30 |  |  |  |  | 35 | my $qname; | 
| 207 | 30 | 100 |  |  |  | 67 | if ($uri) { | 
| 208 | 19 |  |  |  |  | 61 | my $spec_prefix = $self->_spec_prefix($uri); | 
| 209 | 19 | 50 |  |  |  | 78 | if ( !defined($spec_prefix) ) { | 
|  |  | 100 |  |  |  |  |  | 
| 210 |  |  |  |  |  |  |  | 
| 211 |  |  |  |  |  |  | # uri is not in spec, so nothing could possibly match | 
| 212 | 0 |  |  |  |  | 0 | $self->{skip} = 1; | 
| 213 | 0 |  |  |  |  | 0 | return; | 
| 214 |  |  |  |  |  |  | } | 
| 215 |  |  |  |  |  |  | elsif ( $spec_prefix eq '' ) { | 
| 216 | 11 |  |  |  |  | 27 | $qname = $element->{LocalName}; | 
| 217 |  |  |  |  |  |  | } | 
| 218 |  |  |  |  |  |  | else { | 
| 219 | 8 |  |  |  |  | 31 | $qname = "$spec_prefix:$element->{LocalName}"; | 
| 220 |  |  |  |  |  |  | } | 
| 221 |  |  |  |  |  |  | } | 
| 222 |  |  |  |  |  |  | else { | 
| 223 | 11 |  |  |  |  | 21 | $qname = $element->{LocalName}; | 
| 224 |  |  |  |  |  |  | } | 
| 225 |  |  |  |  |  |  |  | 
| 226 | 30 |  |  |  |  | 82 | my $spec_path = "$stack_top->{spec_path}/$qname"; | 
| 227 | 30 | 100 | 100 |  |  | 286 | if (   defined( $spec->{$spec_path} ) | 
|  |  |  | 66 |  |  |  |  | 
| 228 |  |  |  |  |  |  | && ref( $spec->{$spec_path} ) eq 'HASH' | 
| 229 |  |  |  |  |  |  | && defined( $spec->{$spec_path}{spec} ) ) | 
| 230 |  |  |  |  |  |  | { | 
| 231 | 13 |  |  |  |  | 30 | $spec      = $spec->{$spec_path}{spec}; | 
| 232 | 13 |  |  |  |  | 20 | $spec_path = ''; | 
| 233 | 13 |  |  |  |  | 23 | $result    = {}; | 
| 234 |  |  |  |  |  |  | } | 
| 235 |  |  |  |  |  |  |  | 
| 236 |  |  |  |  |  |  | push( | 
| 237 | 30 |  |  |  |  | 286 | @{ $self->{element_stack} }, | 
| 238 |  |  |  |  |  |  | {   name      => $qname, | 
| 239 |  |  |  |  |  |  | attrs     => $element->{Attributes}, | 
| 240 | 30 |  |  |  |  | 47 | spec      => $spec, | 
| 241 |  |  |  |  |  |  | spec_path => $spec_path, | 
| 242 |  |  |  |  |  |  | result    => $result | 
| 243 |  |  |  |  |  |  | } | 
| 244 |  |  |  |  |  |  | ); | 
| 245 |  |  |  |  |  |  | } | 
| 246 |  |  |  |  |  |  |  | 
| 247 |  |  |  |  |  |  | 1; | 
| 248 |  |  |  |  |  |  |  | 
| 249 |  |  |  |  |  |  | __END__ |