line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package MARC::Parser::RAW; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = "0.04"; |
4
|
|
|
|
|
|
|
|
5
|
2
|
|
|
2
|
|
26971
|
use strict; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
53
|
|
6
|
2
|
|
|
2
|
|
8
|
use warnings; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
57
|
|
7
|
2
|
|
|
2
|
|
1009
|
use charnames qw< :full >; |
|
2
|
|
|
|
|
45889
|
|
|
2
|
|
|
|
|
9
|
|
8
|
2
|
|
|
2
|
|
296
|
use Carp qw(croak carp); |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
113
|
|
9
|
2
|
|
|
2
|
|
955
|
use Encode qw(find_encoding); |
|
2
|
|
|
|
|
12525
|
|
|
2
|
|
|
|
|
119
|
|
10
|
2
|
|
|
2
|
|
944
|
use English; |
|
2
|
|
|
|
|
5838
|
|
|
2
|
|
|
|
|
9
|
|
11
|
2
|
|
|
2
|
|
1566
|
use Readonly; |
|
2
|
|
|
|
|
5078
|
|
|
2
|
|
|
|
|
97
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
Readonly my $LEADER_LEN => 24; |
14
|
|
|
|
|
|
|
Readonly my $SUBFIELD_INDICATOR => qq{\N{INFORMATION SEPARATOR ONE}}; |
15
|
|
|
|
|
|
|
Readonly my $END_OF_FIELD => qq{\N{INFORMATION SEPARATOR TWO}}; |
16
|
|
|
|
|
|
|
Readonly my $END_OF_RECORD => qq{\N{INFORMATION SEPARATOR THREE}}; |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 NAME |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
MARC::Parser::RAW - Parser for ISO 2709 encoded MARC records |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=begin markdown |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
[![Build Status](https://travis-ci.org/jorol/MARC-Parser-RAW.png)](https://travis-ci.org/jorol/MARC-Parser-RAW) |
25
|
|
|
|
|
|
|
[![Coverage Status](https://coveralls.io/repos/jorol/MARC-Parser-RAW/badge.png?branch=devel)](https://coveralls.io/r/jorol/MARC-Parser-RAW?branch=devel) |
26
|
|
|
|
|
|
|
[![Kwalitee Score](http://cpants.cpanauthors.org/dist/MARC-Parser-RAW.png)](http://cpants.cpanauthors.org/dist/MARC-Parser-RAW) |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=end markdown |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head1 SYNOPSIS |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
use MARC::Parser::RAW; |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
my $parser = MARC::Parser::RAW->new( $file ); |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
while ( my $record = $parser->next() ) { |
37
|
|
|
|
|
|
|
# do something |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head1 DESCRIPTION |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
L is a lightweight, fault tolerant parser for ISO 2709 |
43
|
|
|
|
|
|
|
encoded MARC records. Tags, indicators and subfield codes are not validated |
44
|
|
|
|
|
|
|
against the MARC standard. Record length from leader and field lengths from |
45
|
|
|
|
|
|
|
the directory are ignored. Records with a faulty structure will be skipped |
46
|
|
|
|
|
|
|
with a warning. The resulting data structure is optimized for usage with the |
47
|
|
|
|
|
|
|
L data tool kit. |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
L expects UTF-8 encoded files as input. Otherwise provide |
50
|
|
|
|
|
|
|
a filehande with a specified I/O layer or specify encoding. |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=head1 MARC |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
The MARC record is parsed into an ARRAY of ARRAYs: |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
$record = [ |
57
|
|
|
|
|
|
|
[ 'LDR', undef, undef, '_', '00661nam 22002538a 4500' ], |
58
|
|
|
|
|
|
|
[ '001', undef, undef, '_', 'fol05865967 ' ], |
59
|
|
|
|
|
|
|
... |
60
|
|
|
|
|
|
|
[ '245', '1', '0', 'a', 'Programming Perl /', |
61
|
|
|
|
|
|
|
'c', 'Larry Wall, Tom Christiansen & Jon Orwant.' |
62
|
|
|
|
|
|
|
], |
63
|
|
|
|
|
|
|
... |
64
|
|
|
|
|
|
|
]; |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head1 METHODS |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=head2 new($file|$fh [, $encoding]) |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=head3 Configuration |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=over |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=item C |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
Path to file with raw MARC records. |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=item C |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
Open filehandle for raw MARC records. |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=item C |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
Set encoding. Default: UTF-8. Optional. |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
=back |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
=cut |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
sub new { |
92
|
6
|
|
|
6
|
1
|
2417
|
my ( $class, $file, $encoding ) = @_; |
93
|
|
|
|
|
|
|
|
94
|
6
|
100
|
|
|
|
157
|
$file or croak "first argument must be a file or filehandle"; |
95
|
|
|
|
|
|
|
|
96
|
5
|
100
|
|
|
|
9
|
if ($encoding) { |
97
|
2
|
100
|
|
|
|
5
|
find_encoding($encoding) or croak "encoding \"$_[0]\" not found"; |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
|
100
|
4
|
100
|
|
|
|
29
|
my $self = { |
101
|
|
|
|
|
|
|
file => undef, |
102
|
|
|
|
|
|
|
fh => undef, |
103
|
|
|
|
|
|
|
encoding => $encoding ? $encoding : 'UTF-8', |
104
|
|
|
|
|
|
|
rec_number => 0, |
105
|
|
|
|
|
|
|
}; |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
# check for file or filehandle |
108
|
4
|
|
|
|
|
4
|
my $ishandle = eval { fileno($file); }; |
|
4
|
|
|
|
|
32
|
|
109
|
4
|
50
|
33
|
|
|
80
|
if ( !$@ && defined $ishandle ) { |
|
|
100
|
|
|
|
|
|
110
|
0
|
|
|
|
|
0
|
$self->{file} = scalar $file; |
111
|
0
|
|
|
|
|
0
|
$self->{fh} = $file; |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
elsif ( -e $file ) { |
114
|
3
|
50
|
|
1
|
|
75
|
open $self->{fh}, "<:encoding($self->{encoding})", $file |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
5
|
|
115
|
|
|
|
|
|
|
or croak "cannot read from file $file\n"; |
116
|
3
|
|
|
|
|
1040
|
$self->{file} = $file; |
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
else { |
119
|
1
|
|
|
|
|
83
|
croak "file or filehande $file does not exists"; |
120
|
|
|
|
|
|
|
} |
121
|
3
|
|
|
|
|
13
|
return ( bless $self, $class ); |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=head2 next() |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
Reads the next record from MARC input stream. Returns a Perl hash. |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=cut |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
sub next { |
131
|
6
|
|
|
6
|
1
|
1126
|
my $self = shift; |
132
|
6
|
|
|
|
|
15
|
my $fh = $self->{fh}; |
133
|
6
|
|
|
|
|
22
|
local $INPUT_RECORD_SEPARATOR = $END_OF_RECORD; |
134
|
6
|
50
|
|
|
|
71
|
if ( defined (my $raw = <$fh>) ) { |
135
|
6
|
|
|
|
|
101
|
$self->{rec_number}++; |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
# remove illegal garbage that sometimes occurs between records |
138
|
6
|
|
|
|
|
28
|
$raw |
139
|
|
|
|
|
|
|
=~ s/^[\N{SPACE}\N{NUL}\N{LINE FEED}\N{CARRIAGE RETURN}\N{SUB}]+//; |
140
|
6
|
50
|
|
|
|
8
|
return unless $raw; |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
|
143
|
6
|
100
|
|
|
|
9
|
if ( my $marc = $self->_decode($raw) ) { |
144
|
2
|
|
|
|
|
9
|
return $marc; |
145
|
|
|
|
|
|
|
} |
146
|
|
|
|
|
|
|
else { |
147
|
4
|
|
|
|
|
12
|
return $self->next(); |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
} |
150
|
0
|
|
|
|
|
0
|
return; |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=head2 _decode($record) |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
Deserialize a raw MARC record to an ARRAY of ARRAYs. |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=cut |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
sub _decode { |
160
|
6
|
|
|
6
|
|
8
|
my ( $self, $raw ) = @_; |
161
|
6
|
|
|
|
|
9
|
chop $raw; |
162
|
6
|
|
|
|
|
27
|
my ( $head, @fields ) = split $END_OF_FIELD, $raw; |
163
|
|
|
|
|
|
|
|
164
|
6
|
100
|
|
|
|
124
|
if ( !@fields ) { |
165
|
1
|
|
|
|
|
105
|
carp "no fields found in record " . $self->{rec_number}; |
166
|
1
|
|
|
|
|
31
|
return; |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
# ToDO: better RegEX for leader |
170
|
5
|
|
|
|
|
3
|
my $leader; |
171
|
5
|
100
|
|
|
|
15
|
if ( $head =~ /(.{$LEADER_LEN})/cg ) { |
172
|
4
|
|
|
|
|
36
|
$leader = $1; |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
else { |
175
|
1
|
|
|
|
|
72
|
carp "no valid record leader found in record " . $self->{rec_number}; |
176
|
1
|
|
|
|
|
25
|
return; |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
|
179
|
4
|
|
|
|
|
55
|
my @tags = $head =~ /\G(\d{3})\d{9}/cg; |
180
|
|
|
|
|
|
|
|
181
|
4
|
100
|
|
|
|
25
|
if ( scalar @tags != scalar @fields ) { |
182
|
|
|
|
|
|
|
carp "different number of tags and fields in record " |
183
|
1
|
|
|
|
|
73
|
. $self->{rec_number}; |
184
|
1
|
|
|
|
|
24
|
return; |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
|
187
|
3
|
100
|
|
|
|
12
|
if ( $head !~ /\G$/cg ) { |
188
|
1
|
|
|
|
|
81
|
carp "incomplete directory entry in record " . $self->{rec_number}; |
189
|
1
|
|
|
|
|
24
|
return; |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
return [ |
193
|
2
|
|
|
|
|
12
|
[ 'LDR', undef, undef, '_', $leader ], |
194
|
|
|
|
|
|
|
map [ shift(@tags), $self->_field($_) ], |
195
|
|
|
|
|
|
|
@fields |
196
|
|
|
|
|
|
|
]; |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
=head2 _field($field) |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
Split MARC field string in individual components. |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=cut |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
sub _field { |
206
|
35
|
|
|
35
|
|
30
|
my ( $self, $field ) = @_; |
207
|
35
|
|
|
|
|
52
|
my @chunks = split( /$SUBFIELD_INDICATOR(.)/, $field ); |
208
|
35
|
100
|
|
|
|
207
|
return ( undef, undef, '_', @chunks ) if @chunks == 1; |
209
|
27
|
|
|
|
|
25
|
my @subfields; |
210
|
27
|
|
|
|
|
35
|
my ( $indicator1, $indicator2 ) = ( split //, shift @chunks ); |
211
|
27
|
|
|
|
|
38
|
while (@chunks) { |
212
|
47
|
|
|
|
|
92
|
push @subfields, ( splice @chunks, 0, 2 ); |
213
|
|
|
|
|
|
|
} |
214
|
27
|
|
|
|
|
102
|
return ( $indicator1, $indicator2, @subfields ); |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
=head1 AUTHOR |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
Johann Rolschewski Ejorol@cpan.orgE |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
=head1 COPYRIGHT |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
Copyright 2014- Johann Rolschewski |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
=head1 LICENSE |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
228
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
=head1 SEEALSO |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
L, L. |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENT |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
The parser methods are adapted from Marc Chantreux's L module. |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
=cut |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
1; # End of MARC::Parser::RAW |
241
|
|
|
|
|
|
|
|