line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package MARC::Parser::RAW; |
2
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
35207
|
use strict; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
65
|
|
4
|
2
|
|
|
2
|
|
10
|
use warnings; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
69
|
|
5
|
2
|
|
|
2
|
|
1333
|
use utf8; |
|
2
|
|
|
|
|
23
|
|
|
2
|
|
|
|
|
10
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
our $VERSION = "0.05"; |
8
|
|
|
|
|
|
|
|
9
|
2
|
|
|
2
|
|
1327
|
use charnames qw< :full >; |
|
2
|
|
|
|
|
49102
|
|
|
2
|
|
|
|
|
14
|
|
10
|
2
|
|
|
2
|
|
413
|
use Carp qw(croak carp); |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
162
|
|
11
|
2
|
|
|
2
|
|
1291
|
use Encode qw(find_encoding); |
|
2
|
|
|
|
|
13747
|
|
|
2
|
|
|
|
|
127
|
|
12
|
2
|
|
|
2
|
|
990
|
use English; |
|
2
|
|
|
|
|
6214
|
|
|
2
|
|
|
|
|
9
|
|
13
|
2
|
|
|
2
|
|
1678
|
use Readonly; |
|
2
|
|
|
|
|
5058
|
|
|
2
|
|
|
|
|
105
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
Readonly my $LEADER_LEN => 24; |
16
|
|
|
|
|
|
|
Readonly my $SUBFIELD_INDICATOR => qq{\N{INFORMATION SEPARATOR ONE}}; |
17
|
|
|
|
|
|
|
Readonly my $END_OF_FIELD => qq{\N{INFORMATION SEPARATOR TWO}}; |
18
|
|
|
|
|
|
|
Readonly my $END_OF_RECORD => qq{\N{INFORMATION SEPARATOR THREE}}; |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 NAME |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
MARC::Parser::RAW - Parser for ISO 2709 encoded MARC records |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=begin markdown |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
[![Build Status](https://travis-ci.org/jorol/MARC-Parser-RAW.png)](https://travis-ci.org/jorol/MARC-Parser-RAW) |
27
|
|
|
|
|
|
|
[![Coverage Status](https://coveralls.io/repos/jorol/MARC-Parser-RAW/badge.png?branch=devel)](https://coveralls.io/r/jorol/MARC-Parser-RAW?branch=devel) |
28
|
|
|
|
|
|
|
[![Kwalitee Score](http://cpants.cpanauthors.org/dist/MARC-Parser-RAW.png)](http://cpants.cpanauthors.org/dist/MARC-Parser-RAW) |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=end markdown |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
=head1 SYNOPSIS |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
use MARC::Parser::RAW; |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
my $parser = MARC::Parser::RAW->new( $file ); |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
while ( my $record = $parser->next() ) { |
39
|
|
|
|
|
|
|
# do something |
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=head1 DESCRIPTION |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
L is a lightweight, fault tolerant parser for ISO 2709 |
45
|
|
|
|
|
|
|
encoded MARC records. Tags, indicators and subfield codes are not validated |
46
|
|
|
|
|
|
|
against the MARC standard. Record length from leader and field lengths from |
47
|
|
|
|
|
|
|
the directory are ignored. Records with a faulty structure will be skipped |
48
|
|
|
|
|
|
|
with a warning. The resulting data structure is optimized for usage with the |
49
|
|
|
|
|
|
|
L data tool kit. |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
L expects UTF-8 encoded files as input. Otherwise provide |
52
|
|
|
|
|
|
|
a filehande with a specified I/O layer or specify encoding. |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
=head1 MARC |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
The MARC record is parsed into an ARRAY of ARRAYs: |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
$record = [ |
59
|
|
|
|
|
|
|
[ 'LDR', undef, undef, '_', '00661nam 22002538a 4500' ], |
60
|
|
|
|
|
|
|
[ '001', undef, undef, '_', 'fol05865967 ' ], |
61
|
|
|
|
|
|
|
... |
62
|
|
|
|
|
|
|
[ '245', '1', '0', 'a', 'Programming Perl /', |
63
|
|
|
|
|
|
|
'c', 'Larry Wall, Tom Christiansen & Jon Orwant.' |
64
|
|
|
|
|
|
|
], |
65
|
|
|
|
|
|
|
... |
66
|
|
|
|
|
|
|
]; |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=head1 METHODS |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=head2 new($file|$fh|$scalarref [, $encoding]) |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=head3 Configuration |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=over |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=item C |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
Path to file with raw MARC records. |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=item C |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
Open filehandle for raw MARC records. |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=item C |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
Reference to scalar with raw MARC records. |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
=item C |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
Set encoding. Default: UTF-8. Optional. |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=back |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=cut |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
sub new { |
98
|
8
|
|
|
8
|
1
|
5201
|
my ( $class, $file, $encoding ) = @_; |
99
|
|
|
|
|
|
|
|
100
|
8
|
100
|
|
|
|
164
|
$file or croak "first argument must be a file or filehandle"; |
101
|
|
|
|
|
|
|
|
102
|
7
|
100
|
|
|
|
12
|
if ($encoding) { |
103
|
2
|
100
|
|
|
|
6
|
find_encoding($encoding) or croak "encoding \"$_[0]\" not found"; |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
|
106
|
6
|
100
|
|
|
|
38
|
my $self = { |
107
|
|
|
|
|
|
|
file => undef, |
108
|
|
|
|
|
|
|
fh => undef, |
109
|
|
|
|
|
|
|
encoding => $encoding ? $encoding : 'UTF-8', |
110
|
|
|
|
|
|
|
rec_number => 0, |
111
|
|
|
|
|
|
|
}; |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
# check for file or filehandle |
114
|
|
|
|
|
|
|
# ToDo: check for scalar ref |
115
|
6
|
|
|
|
|
5
|
my $ishandle = eval { fileno($file); }; |
|
6
|
|
|
|
|
25
|
|
116
|
6
|
100
|
100
|
|
|
112
|
if ( !$@ && defined $ishandle ) { |
|
|
100
|
100
|
|
|
|
|
117
|
1
|
|
|
|
|
3
|
$self->{file} = scalar $file; |
118
|
1
|
|
|
|
|
2
|
$self->{fh} = $file; |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
elsif ( -e $file || ref($file) eq 'SCALAR' ) { |
121
|
4
|
50
|
|
1
|
|
92
|
open $self->{fh}, "<:encoding($self->{encoding})", $file |
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
4
|
|
122
|
|
|
|
|
|
|
or croak "cannot read from file $file\n"; |
123
|
4
|
|
|
|
|
1030
|
$self->{file} = $file; |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
else { |
126
|
1
|
|
|
|
|
77
|
croak "file or filehande $file does not exists"; |
127
|
|
|
|
|
|
|
} |
128
|
5
|
|
|
|
|
15
|
return ( bless $self, $class ); |
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
=head2 next() |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
Reads the next record from MARC input stream. Returns a Perl hash. |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=cut |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
sub next { |
138
|
6
|
|
|
6
|
1
|
1472
|
my $self = shift; |
139
|
6
|
|
|
|
|
9
|
my $fh = $self->{fh}; |
140
|
6
|
|
|
|
|
24
|
local $INPUT_RECORD_SEPARATOR = $END_OF_RECORD; |
141
|
6
|
50
|
|
|
|
75
|
if ( defined( my $raw = <$fh> ) ) { |
142
|
6
|
|
|
|
|
99
|
$self->{rec_number}++; |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
# remove illegal garbage that sometimes occurs between records |
145
|
6
|
|
|
|
|
28
|
$raw |
146
|
|
|
|
|
|
|
=~ s/^[\N{SPACE}\N{NUL}\N{LINE FEED}\N{CARRIAGE RETURN}\N{SUB}]+//; |
147
|
6
|
50
|
|
|
|
9
|
return unless $raw; |
148
|
|
|
|
|
|
|
|
149
|
6
|
100
|
|
|
|
10
|
if ( my $marc = $self->_decode($raw) ) { |
150
|
2
|
|
|
|
|
9
|
return $marc; |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
else { |
153
|
4
|
|
|
|
|
16
|
return $self->next(); |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
} |
156
|
0
|
|
|
|
|
0
|
return; |
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=head2 _decode($record) |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
Deserialize a raw MARC record to an ARRAY of ARRAYs. |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
=cut |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub _decode { |
166
|
6
|
|
|
6
|
|
7
|
my ( $self, $raw ) = @_; |
167
|
6
|
|
|
|
|
11
|
chop $raw; |
168
|
6
|
|
|
|
|
15
|
my ( $head, @fields ) = split $END_OF_FIELD, $raw; |
169
|
|
|
|
|
|
|
|
170
|
6
|
100
|
|
|
|
137
|
if ( !@fields ) { |
171
|
1
|
|
|
|
|
85
|
carp "no fields found in record " . $self->{rec_number}; |
172
|
1
|
|
|
|
|
33
|
return; |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
# ToDO: better RegEX for leader |
176
|
5
|
|
|
|
|
5
|
my $leader; |
177
|
5
|
100
|
|
|
|
11
|
if ( $head =~ /(.{$LEADER_LEN})/cg ) { |
178
|
4
|
|
|
|
|
34
|
$leader = $1; |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
else { |
181
|
1
|
|
|
|
|
74
|
carp "no valid record leader found in record " . $self->{rec_number}; |
182
|
1
|
|
|
|
|
25
|
return; |
183
|
|
|
|
|
|
|
} |
184
|
|
|
|
|
|
|
|
185
|
4
|
|
|
|
|
42
|
my @tags = $head =~ /\G(\d{3})\d{9}/cg; |
186
|
|
|
|
|
|
|
|
187
|
4
|
100
|
|
|
|
10
|
if ( scalar @tags != scalar @fields ) { |
188
|
|
|
|
|
|
|
carp "different number of tags and fields in record " |
189
|
1
|
|
|
|
|
86
|
. $self->{rec_number}; |
190
|
1
|
|
|
|
|
35
|
return; |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
|
193
|
3
|
100
|
|
|
|
8
|
if ( $head !~ /\G$/cg ) { |
194
|
1
|
|
|
|
|
104
|
carp "incomplete directory entry in record " . $self->{rec_number}; |
195
|
1
|
|
|
|
|
27
|
return; |
196
|
|
|
|
|
|
|
} |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
return [ |
199
|
2
|
|
|
|
|
12
|
[ 'LDR', undef, undef, '_', $leader ], |
200
|
|
|
|
|
|
|
map [ shift(@tags), $self->_field($_) ], |
201
|
|
|
|
|
|
|
@fields |
202
|
|
|
|
|
|
|
]; |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
=head2 _field($field) |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
Split MARC field string in individual components. |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=cut |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
sub _field { |
212
|
35
|
|
|
35
|
|
29
|
my ( $self, $field ) = @_; |
213
|
35
|
|
|
|
|
50
|
my @chunks = split( /$SUBFIELD_INDICATOR(.)/, $field ); |
214
|
35
|
100
|
|
|
|
203
|
return ( undef, undef, '_', @chunks ) if @chunks == 1; |
215
|
27
|
|
|
|
|
20
|
my @subfields; |
216
|
27
|
|
|
|
|
39
|
my ( $indicator1, $indicator2 ) = ( split //, shift @chunks ); |
217
|
27
|
|
|
|
|
37
|
while (@chunks) { |
218
|
47
|
|
|
|
|
73
|
push @subfields, ( splice @chunks, 0, 2 ); |
219
|
|
|
|
|
|
|
} |
220
|
27
|
|
|
|
|
106
|
return ( $indicator1, $indicator2, @subfields ); |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
=head1 AUTHOR |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
Johann Rolschewski Ejorol@cpan.orgE |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
=head1 COPYRIGHT |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
Copyright 2014- Johann Rolschewski |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
=head1 LICENSE |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
234
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
=head1 SEEALSO |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
L, L. |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENT |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
The parser methods are adapted from Marc Chantreux's L module. |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
=cut |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
1; # End of MARC::Parser::RAW |
247
|
|
|
|
|
|
|
|