| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package MAB2::Parser::XML; | 
| 2 |  |  |  |  |  |  |  | 
| 3 |  |  |  |  |  |  | our $VERSION = '0.23'; | 
| 4 |  |  |  |  |  |  |  | 
| 5 | 7 |  |  | 7 |  | 427 | use strict; | 
|  | 7 |  |  |  |  | 12 |  | 
|  | 7 |  |  |  |  | 183 |  | 
| 6 | 7 |  |  | 7 |  | 42 | use warnings; | 
|  | 7 |  |  |  |  | 10 |  | 
|  | 7 |  |  |  |  | 201 |  | 
| 7 | 7 |  |  | 7 |  | 33 | use Carp qw<croak>; | 
|  | 7 |  |  |  |  | 24 |  | 
|  | 7 |  |  |  |  | 268 |  | 
| 8 | 7 |  |  | 7 |  | 2524 | use XML::LibXML::Reader; | 
|  | 7 |  |  |  |  | 252433 |  | 
|  | 7 |  |  |  |  | 3979 |  | 
| 9 |  |  |  |  |  |  |  | 
| 10 |  |  |  |  |  |  |  | 
| 11 |  |  |  |  |  |  | sub new { | 
| 12 | 10 |  |  | 10 | 1 | 2871 | my $class = shift; | 
| 13 | 10 |  |  |  |  | 18 | my $input  = shift; | 
| 14 |  |  |  |  |  |  |  | 
| 15 | 10 |  |  |  |  | 45 | my $self = { | 
| 16 |  |  |  |  |  |  | filename   => undef, | 
| 17 |  |  |  |  |  |  | rec_number => 0, | 
| 18 |  |  |  |  |  |  | xml_reader => undef, | 
| 19 |  |  |  |  |  |  | }; | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | # check for file or filehandle | 
| 22 | 10 |  |  |  |  | 20 | my $ishandle = eval { fileno($input); }; | 
|  | 10 |  |  |  |  | 93 |  | 
| 23 | 10 | 100 | 66 |  |  | 310 | if ( !$@ && defined $ishandle ) { | 
|  |  | 100 | 33 |  |  |  |  | 
|  |  | 100 | 66 |  |  |  |  | 
|  |  |  | 66 |  |  |  |  | 
| 24 | 3 |  |  |  |  | 7 | binmode $input; # drop all PerlIO layers, as required by libxml2 | 
| 25 | 3 | 50 |  |  |  | 27 | my $reader = XML::LibXML::Reader->new( IO => $input ) | 
| 26 |  |  |  |  |  |  | or croak "cannot read from filehandle $input\n"; | 
| 27 | 3 |  |  |  |  | 711 | $self->{filename}   = scalar $input; | 
| 28 | 3 |  |  |  |  | 8 | $self->{xml_reader} = $reader; | 
| 29 |  |  |  |  |  |  | } | 
| 30 |  |  |  |  |  |  | elsif ( defined $input && $input !~ /\n/ && -e $input ) { | 
| 31 | 1 | 50 |  |  |  | 9 | my $reader = XML::LibXML::Reader->new( location => $input ) | 
| 32 |  |  |  |  |  |  | or croak "cannot read from file $input\n"; | 
| 33 | 1 |  |  |  |  | 265 | $self->{filename}   = $input; | 
| 34 | 1 |  |  |  |  | 3 | $self->{xml_reader} = $reader; | 
| 35 |  |  |  |  |  |  | } | 
| 36 |  |  |  |  |  |  | elsif ( defined $input && length $input > 0 ) { | 
| 37 | 5 | 50 |  |  |  | 27 | my $reader = XML::LibXML::Reader->new( string => $input ) | 
| 38 |  |  |  |  |  |  | or croak "cannot read XML string $input\n"; | 
| 39 | 5 |  |  |  |  | 367 | $self->{xml_reader} = $reader; | 
| 40 |  |  |  |  |  |  | } | 
| 41 |  |  |  |  |  |  | else { | 
| 42 | 1 |  |  |  |  | 30 | croak "file, filehande or string $input does not exists"; | 
| 43 |  |  |  |  |  |  | } | 
| 44 | 9 |  |  |  |  | 38 | return ( bless $self, $class ); | 
| 45 |  |  |  |  |  |  | } | 
| 46 |  |  |  |  |  |  |  | 
| 47 |  |  |  |  |  |  |  | 
| 48 |  |  |  |  |  |  | sub next { | 
| 49 | 30 |  |  | 30 | 1 | 1638 | my $self = shift; | 
| 50 | 30 | 100 |  |  |  | 1210 | if ( $self->{xml_reader}->nextElement('datensatz') ) { | 
| 51 | 29 |  |  |  |  | 300 | $self->{rec_number}++; | 
| 52 | 29 |  |  |  |  | 62 | my $record = _decode( $self->{xml_reader} ); | 
| 53 | 29 |  |  |  |  | 8785 | my ($id) = map { $_->[-1] } grep { $_->[0] =~ '001' } @{$record}; | 
|  | 29 |  |  |  |  | 60 |  | 
|  | 1408 |  |  |  |  | 1905 |  | 
|  | 29 |  |  |  |  | 57 |  | 
| 54 | 29 |  |  |  |  | 143 | return { _id => $id, record => $record }; | 
| 55 |  |  |  |  |  |  | } | 
| 56 | 1 |  |  |  |  | 4 | return; | 
| 57 |  |  |  |  |  |  | } | 
| 58 |  |  |  |  |  |  |  | 
| 59 |  |  |  |  |  |  |  | 
| 60 |  |  |  |  |  |  | sub _decode { | 
| 61 | 29 |  |  | 29 |  | 37 | my $reader = shift; | 
| 62 | 29 |  |  |  |  | 35 | my @record; | 
| 63 |  |  |  |  |  |  |  | 
| 64 |  |  |  |  |  |  | # get all field nodes from MAB2 XML record; | 
| 65 | 29 |  |  |  |  | 2964 | foreach my $field_node ( | 
| 66 |  |  |  |  |  |  | $reader->copyCurrentNode(1)->getChildrenByTagName('feld') ) | 
| 67 |  |  |  |  |  |  | { | 
| 68 | 1408 |  |  |  |  | 7513 | my @field; | 
| 69 |  |  |  |  |  |  |  | 
| 70 |  |  |  |  |  |  | # get field tag number | 
| 71 | 1408 |  |  |  |  | 2171 | my $tag = $field_node->getAttribute('nr'); | 
| 72 | 1408 |  | 50 |  |  | 9709 | my $ind = $field_node->getAttribute('ind') // ''; | 
| 73 |  |  |  |  |  |  |  | 
| 74 |  |  |  |  |  |  | # ToDo: textContent ignores </tf> and <ns> | 
| 75 |  |  |  |  |  |  |  | 
| 76 |  |  |  |  |  |  | # Check for data or subfields | 
| 77 | 1408 | 100 |  |  |  | 9793 | if ( my @subfields = $field_node->getChildrenByTagName('uf') ) { | 
| 78 | 71 |  |  |  |  | 1127 | push( @field, ( $tag, $ind ) ); | 
| 79 |  |  |  |  |  |  |  | 
| 80 |  |  |  |  |  |  | # get all subfield nodes | 
| 81 | 71 |  |  |  |  | 113 | foreach my $subfield_node (@subfields) { | 
| 82 | 115 |  |  |  |  | 190 | my $subfield_code = $subfield_node->getAttribute('code'); | 
| 83 | 115 |  |  |  |  | 887 | my $subfield_data = $subfield_node->textContent; | 
| 84 | 115 |  |  |  |  | 256 | push( @field, ( $subfield_code, $subfield_data ) ); | 
| 85 |  |  |  |  |  |  | } | 
| 86 |  |  |  |  |  |  | } | 
| 87 |  |  |  |  |  |  | else { | 
| 88 | 1337 |  |  |  |  | 24739 | my $data = $field_node->textContent(); | 
| 89 | 1337 |  |  |  |  | 2710 | push( @field, ( $tag, $ind, '_', $data ) ); | 
| 90 |  |  |  |  |  |  | } | 
| 91 |  |  |  |  |  |  |  | 
| 92 | 1408 |  |  |  |  | 3605 | push( @record, [@field] ); | 
| 93 |  |  |  |  |  |  | } | 
| 94 | 29 |  |  |  |  | 66 | return \@record; | 
| 95 |  |  |  |  |  |  | } | 
| 96 |  |  |  |  |  |  |  | 
| 97 |  |  |  |  |  |  |  | 
| 98 |  |  |  |  |  |  | 1; | 
| 99 |  |  |  |  |  |  |  | 
| 100 |  |  |  |  |  |  | __END__ | 
| 101 |  |  |  |  |  |  |  | 
| 102 |  |  |  |  |  |  | =pod | 
| 103 |  |  |  |  |  |  |  | 
| 104 |  |  |  |  |  |  | =encoding UTF-8 | 
| 105 |  |  |  |  |  |  |  | 
| 106 |  |  |  |  |  |  | =head1 NAME | 
| 107 |  |  |  |  |  |  |  | 
| 108 |  |  |  |  |  |  | MAB2::Parser::XML - MAB2 XML parser | 
| 109 |  |  |  |  |  |  |  | 
| 110 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 111 |  |  |  |  |  |  |  | 
| 112 |  |  |  |  |  |  | L<MAB2::Parser::XML> is a parser for MAB2 XML records. | 
| 113 |  |  |  |  |  |  |  | 
| 114 |  |  |  |  |  |  | use MAB2::Parser::XML; | 
| 115 |  |  |  |  |  |  |  | 
| 116 |  |  |  |  |  |  | my $parser = MAB2::Parser::XML->new( $filename ); | 
| 117 |  |  |  |  |  |  |  | 
| 118 |  |  |  |  |  |  | while ( my $record_hash = $parser->next() ) { | 
| 119 |  |  |  |  |  |  | # do something | 
| 120 |  |  |  |  |  |  | } | 
| 121 |  |  |  |  |  |  |  | 
| 122 |  |  |  |  |  |  | =head1 Arguments | 
| 123 |  |  |  |  |  |  |  | 
| 124 |  |  |  |  |  |  | =over | 
| 125 |  |  |  |  |  |  |  | 
| 126 |  |  |  |  |  |  | =item C<file> | 
| 127 |  |  |  |  |  |  |  | 
| 128 |  |  |  |  |  |  | Path to file with MAB2 XML records. | 
| 129 |  |  |  |  |  |  |  | 
| 130 |  |  |  |  |  |  | =item C<fh> | 
| 131 |  |  |  |  |  |  |  | 
| 132 |  |  |  |  |  |  | Open filehandle for file with MAB2 XML records. | 
| 133 |  |  |  |  |  |  |  | 
| 134 |  |  |  |  |  |  | =item C<string> | 
| 135 |  |  |  |  |  |  |  | 
| 136 |  |  |  |  |  |  | XML string with MAB2 XML records. | 
| 137 |  |  |  |  |  |  |  | 
| 138 |  |  |  |  |  |  | =back | 
| 139 |  |  |  |  |  |  |  | 
| 140 |  |  |  |  |  |  | =head1 METHODS | 
| 141 |  |  |  |  |  |  |  | 
| 142 |  |  |  |  |  |  | =head2 new($filename | $filehandle | $string) | 
| 143 |  |  |  |  |  |  |  | 
| 144 |  |  |  |  |  |  | =head2 next() | 
| 145 |  |  |  |  |  |  |  | 
| 146 |  |  |  |  |  |  | Reads the next record from MAB2 XML input stream. Returns a Perl hash. | 
| 147 |  |  |  |  |  |  |  | 
| 148 |  |  |  |  |  |  | =head2 _decode($record) | 
| 149 |  |  |  |  |  |  |  | 
| 150 |  |  |  |  |  |  | Deserialize a MAB2 XML record to an an ARRAY of ARRAYs. | 
| 151 |  |  |  |  |  |  |  | 
| 152 |  |  |  |  |  |  | =head1 SEEALSO | 
| 153 |  |  |  |  |  |  |  | 
| 154 |  |  |  |  |  |  | L<Catmandu::Importer::MAB2>. | 
| 155 |  |  |  |  |  |  |  | 
| 156 |  |  |  |  |  |  | =head1 AUTHOR | 
| 157 |  |  |  |  |  |  |  | 
| 158 |  |  |  |  |  |  | Johann Rolschewski <jorol@cpan.org> | 
| 159 |  |  |  |  |  |  |  | 
| 160 |  |  |  |  |  |  | =head1 COPYRIGHT AND LICENSE | 
| 161 |  |  |  |  |  |  |  | 
| 162 |  |  |  |  |  |  | This software is copyright (c) 2013 by Johann Rolschewski. | 
| 163 |  |  |  |  |  |  |  | 
| 164 |  |  |  |  |  |  | This is free software; you can redistribute it and/or modify it under | 
| 165 |  |  |  |  |  |  | the same terms as the Perl 5 programming language system itself. | 
| 166 |  |  |  |  |  |  |  | 
| 167 |  |  |  |  |  |  | =cut |