line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package MARC::Parser::XML; |
2
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
173540
|
use strict; |
|
2
|
|
|
|
|
9
|
|
|
2
|
|
|
|
|
59
|
|
4
|
2
|
|
|
2
|
|
12
|
use warnings; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
45
|
|
5
|
2
|
|
|
2
|
|
45
|
use 5.008_005; |
|
2
|
|
|
|
|
8
|
|
6
|
|
|
|
|
|
|
our $VERSION = '0.03'; |
7
|
|
|
|
|
|
|
|
8
|
2
|
|
|
2
|
|
11
|
use Carp qw(croak); |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
108
|
|
9
|
2
|
|
|
2
|
|
948
|
use XML::LibXML::Reader; |
|
2
|
|
|
|
|
95787
|
|
|
2
|
|
|
|
|
1367
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
sub new { |
12
|
2
|
|
|
2
|
1
|
634
|
my ( $class, $input ) = @_; |
13
|
|
|
|
|
|
|
|
14
|
2
|
|
|
|
|
8
|
my $self = bless { input => $input, rec_number => 0, }, $class; |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
# check for file or filehandle |
17
|
2
|
|
|
|
|
4
|
my $ishandle = eval { fileno($input); }; |
|
2
|
|
|
|
|
14
|
|
18
|
2
|
50
|
33
|
|
|
70
|
if ( !$@ && defined $ishandle ) { |
|
|
50
|
33
|
|
|
|
|
|
|
0
|
33
|
|
|
|
|
|
|
|
0
|
|
|
|
|
19
|
0
|
|
|
|
|
0
|
binmode $input; # drop all PerlIO layers, as required by libxml2 |
20
|
0
|
0
|
|
|
|
0
|
my $reader = XML::LibXML::Reader->new( IO => $input ) |
21
|
|
|
|
|
|
|
or croak "cannot read from filehandle $input\n"; |
22
|
0
|
|
|
|
|
0
|
$self->{xml_reader} = $reader; |
23
|
|
|
|
|
|
|
} |
24
|
|
|
|
|
|
|
elsif ( defined $input && $input !~ /\n/ && -e $input ) { |
25
|
2
|
50
|
|
|
|
11
|
my $reader = XML::LibXML::Reader->new( location => $input ) |
26
|
|
|
|
|
|
|
or croak "cannot read from file $input\n"; |
27
|
2
|
|
|
|
|
396
|
$self->{xml_reader} = $reader; |
28
|
|
|
|
|
|
|
} |
29
|
|
|
|
|
|
|
elsif ( defined $input && length $input > 0 ) { |
30
|
0
|
0
|
0
|
|
|
0
|
$input = ${$input} if ( ref($input) // '' eq 'SCALAR' ); |
|
0
|
|
|
|
|
0
|
|
31
|
0
|
0
|
|
|
|
0
|
my $reader = XML::LibXML::Reader->new( string => $input ) |
32
|
|
|
|
|
|
|
or croak "cannot read XML string $input\n"; |
33
|
0
|
|
|
|
|
0
|
$self->{xml_reader} = $reader; |
34
|
|
|
|
|
|
|
} |
35
|
|
|
|
|
|
|
else { |
36
|
0
|
|
|
|
|
0
|
croak "file, filehande or string $input does not exists"; |
37
|
|
|
|
|
|
|
} |
38
|
2
|
|
|
|
|
17
|
return $self; |
39
|
|
|
|
|
|
|
} |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
sub next { |
42
|
22
|
|
|
22
|
1
|
13191
|
my ($self) = @_; |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
return |
45
|
|
|
|
|
|
|
unless $self->{xml_reader} |
46
|
22
|
100
|
|
|
|
880
|
->nextElement( 'record', 'http://www.loc.gov/MARC21/slim' ); |
47
|
|
|
|
|
|
|
|
48
|
20
|
50
|
|
|
|
54
|
if ( my $record = $self->_decode() ) { |
49
|
20
|
|
|
|
|
3055
|
return $record; |
50
|
|
|
|
|
|
|
} |
51
|
|
|
|
|
|
|
else { |
52
|
0
|
|
|
|
|
0
|
return $self->next; |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
|
55
|
0
|
|
|
|
|
0
|
return; |
56
|
|
|
|
|
|
|
} |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
sub _decode { |
59
|
20
|
|
|
20
|
|
34
|
my ($self) = @_; |
60
|
20
|
|
|
|
|
125
|
my @record; |
61
|
|
|
|
|
|
|
|
62
|
20
|
|
|
|
|
2607
|
foreach my $field_node ( |
63
|
|
|
|
|
|
|
$self->{xml_reader}->copyCurrentNode(1)->getChildrenByTagName('*') ) |
64
|
|
|
|
|
|
|
{ |
65
|
|
|
|
|
|
|
|
66
|
366
|
100
|
|
|
|
10664
|
if ( $field_node->localName =~ m/leader/ ) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
67
|
20
|
|
|
|
|
160
|
push @record, |
68
|
|
|
|
|
|
|
[ 'LDR', undef, undef, '_', $field_node->textContent ]; |
69
|
|
|
|
|
|
|
} |
70
|
|
|
|
|
|
|
elsif ( $field_node->localName =~ m/controlfield/ ) { |
71
|
80
|
|
|
|
|
196
|
push @record, |
72
|
|
|
|
|
|
|
[ |
73
|
|
|
|
|
|
|
$field_node->getAttribute('tag'), undef, |
74
|
|
|
|
|
|
|
undef, '_', |
75
|
|
|
|
|
|
|
$field_node->textContent |
76
|
|
|
|
|
|
|
]; |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
elsif ( $field_node->localName eq 'datafield' ) { |
79
|
|
|
|
|
|
|
push @record, |
80
|
|
|
|
|
|
|
[ |
81
|
|
|
|
|
|
|
$field_node->getAttribute('tag'), |
82
|
|
|
|
|
|
|
$field_node->getAttribute('ind1') // '', |
83
|
|
|
|
|
|
|
$field_node->getAttribute('ind2') // '', |
84
|
266
|
|
50
|
|
|
587
|
map { $_->getAttribute('code'), $_->textContent } |
|
426
|
|
50
|
|
|
13696
|
|
85
|
|
|
|
|
|
|
$field_node->getChildrenByTagName('*') |
86
|
|
|
|
|
|
|
]; |
87
|
|
|
|
|
|
|
} |
88
|
|
|
|
|
|
|
} |
89
|
20
|
|
|
|
|
440
|
return \@record; |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
1; |
93
|
|
|
|
|
|
|
__END__ |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=encoding utf-8 |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
=head1 NAME |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
MARC::Parser::XML - Parser for MARC XML records |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=begin markdown |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
[![Build Status](https://travis-ci.org/jorol/MARC-Parser-XML.png)](https://travis-ci.org/jorol/MARC-Parser-XML) |
104
|
|
|
|
|
|
|
[![Coverage Status](https://coveralls.io/repos/github/jorol/MARC-Parser-XML/badge.png?branch=devel)](https://coveralls.io/github/jorol/MARC-Parser-XML?branch=devel) |
105
|
|
|
|
|
|
|
[![Kwalitee Score](http://cpants.cpanauthors.org/dist/MARC-Parser-XML.png)](http://cpants.cpanauthors.org/dist/MARC-Parser-XML) |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=end markdown |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=head1 SYNOPSIS |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
use MARC::Parser::XML; |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
my $parser = MARC::Parser::XML->new( 't/marc.xml' ); |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
while ( my $record = $parser->next() ) { |
116
|
|
|
|
|
|
|
# do something ... |
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
=head1 DESCRIPTION |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
MARC::Parser::XML is a lightweight, fault tolerant parser for MARC XML records. Tags, indicators and subfield codes are not validated against the MARC standard. The resulting data structure is optimized for usage with the Catmandu data tool kit. |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=head1 MARC |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
The MARC record is parsed into an ARRAY of ARRAYs: |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
$record = [ |
128
|
|
|
|
|
|
|
[ 'LDR', undef, undef, '_', '00661nam 22002538a 4500' ], |
129
|
|
|
|
|
|
|
[ '001', undef, undef, '_', 'fol05865967 ' ], |
130
|
|
|
|
|
|
|
... |
131
|
|
|
|
|
|
|
[ '245', '1', '0', 'a', 'Programming Perl /', |
132
|
|
|
|
|
|
|
'c', 'Larry Wall, Tom Christiansen & Jon Orwant.' |
133
|
|
|
|
|
|
|
], |
134
|
|
|
|
|
|
|
... |
135
|
|
|
|
|
|
|
]; |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
=head1 METHODS |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=head2 new($file|$fh|$xml) |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=head3 Configuration |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=over |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
=item C<file> |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
Path to file with MARC XML records. |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
=item C<fh> |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
Open filehandle for MARC XML records. |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=item C<xml> |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
XML string. |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=back |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=head2 next() |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
Reads the next record from MARC input. |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
=head2 _decode($record) |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
Deserialize a raw MARC record to an ARRAY of ARRAYs. |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
=head1 AUTHOR |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
Johann Rolschewski E<lt>jorol@cpan.orgE<gt> |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
=head1 COPYRIGHT |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
Copyright 2016- Johann Rolschewski |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
=head1 LICENSE |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
178
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=head1 SEE ALSO |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
=cut |