line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package MAB2::Parser::RAW; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '0.24'; |
4
|
|
|
|
|
|
|
|
5
|
6
|
|
|
6
|
|
589
|
use strict; |
|
6
|
|
|
|
|
14
|
|
|
6
|
|
|
|
|
212
|
|
6
|
6
|
|
|
6
|
|
31
|
use warnings; |
|
6
|
|
|
|
|
13
|
|
|
6
|
|
|
|
|
265
|
|
7
|
6
|
|
|
6
|
|
36
|
use charnames qw< :full >; |
|
6
|
|
|
|
|
49
|
|
|
6
|
|
|
|
|
40
|
|
8
|
6
|
|
|
6
|
|
1155
|
use Carp qw(carp croak); |
|
6
|
|
|
|
|
18
|
|
|
6
|
|
|
|
|
341
|
|
9
|
6
|
|
|
6
|
|
34
|
use Readonly; |
|
6
|
|
|
|
|
13
|
|
|
6
|
|
|
|
|
323
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
Readonly my $LEADER_LEN => 24; |
12
|
|
|
|
|
|
|
Readonly my $SUBFIELD_INDICATOR => qq{\N{INFORMATION SEPARATOR ONE}}; |
13
|
|
|
|
|
|
|
Readonly my $END_OF_FIELD => qq{\N{INFORMATION SEPARATOR TWO}}; |
14
|
|
|
|
|
|
|
Readonly my $END_OF_RECORD => qq{\N{INFORMATION SEPARATOR THREE}}; |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
sub new { |
17
|
5
|
|
|
5
|
1
|
4006
|
my $class = shift; |
18
|
5
|
|
|
|
|
11
|
my $file = shift; |
19
|
|
|
|
|
|
|
|
20
|
5
|
|
|
|
|
24
|
my $self = { |
21
|
|
|
|
|
|
|
filename => undef, |
22
|
|
|
|
|
|
|
rec_number => 0, |
23
|
|
|
|
|
|
|
reader => undef, |
24
|
|
|
|
|
|
|
}; |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# check for file or filehandle |
27
|
5
|
|
|
|
|
11
|
my $ishandle = eval { fileno($file); }; |
|
5
|
|
|
|
|
24
|
|
28
|
5
|
100
|
66
|
|
|
128
|
if ( !$@ && defined $ishandle ) { |
|
|
100
|
|
|
|
|
|
29
|
2
|
|
|
|
|
7
|
$self->{filename} = scalar $file; |
30
|
2
|
|
|
|
|
4
|
$self->{reader} = $file; |
31
|
|
|
|
|
|
|
} |
32
|
|
|
|
|
|
|
elsif ( -e $file ) { |
33
|
2
|
50
|
|
1
|
|
151
|
open $self->{reader}, '<:encoding(UTF-8)', $file |
|
1
|
|
|
|
|
9
|
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
7
|
|
34
|
|
|
|
|
|
|
or croak "cannot read from file $file\n"; |
35
|
2
|
|
|
|
|
1516
|
$self->{filename} = $file; |
36
|
|
|
|
|
|
|
} |
37
|
|
|
|
|
|
|
else { |
38
|
1
|
|
|
|
|
21
|
croak "file or filehande $file does not exists"; |
39
|
|
|
|
|
|
|
} |
40
|
4
|
|
|
|
|
32
|
return ( bless $self, $class ); |
41
|
|
|
|
|
|
|
} |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
sub next { |
44
|
48
|
|
|
48
|
1
|
3119
|
my $self = shift; |
45
|
48
|
100
|
|
|
|
1110
|
if ( my $line = $self->{reader}->getline() ) { |
46
|
46
|
|
|
|
|
1840
|
$self->{rec_number}++; |
47
|
46
|
|
|
|
|
96
|
my $record = _decode($line); |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
# get last subfield from 001 as id |
50
|
46
|
|
|
|
|
79
|
my ($id) = map { $_->[-1] } grep { $_->[0] =~ '001' } @{$record}; |
|
45
|
|
|
|
|
104
|
|
|
2082
|
|
|
|
|
3406
|
|
|
46
|
|
|
|
|
101
|
|
51
|
46
|
|
|
|
|
224
|
return { _id => $id, record => $record }; |
52
|
|
|
|
|
|
|
} |
53
|
2
|
|
|
|
|
52
|
return; |
54
|
|
|
|
|
|
|
} |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
sub _decode { |
57
|
46
|
|
|
46
|
|
84
|
my $reader = shift; |
58
|
46
|
|
|
|
|
118
|
chomp $reader; |
59
|
|
|
|
|
|
|
|
60
|
46
|
100
|
|
|
|
510
|
if ( substr( $reader, -1, 1 ) ne $END_OF_RECORD ) { |
61
|
1
|
|
|
|
|
21
|
carp "record terminator not found"; |
62
|
|
|
|
|
|
|
} |
63
|
|
|
|
|
|
|
|
64
|
46
|
|
|
|
|
915
|
my @record; |
65
|
46
|
|
|
|
|
132
|
my $leader = substr $reader, 0, $LEADER_LEN; |
66
|
46
|
100
|
|
|
|
511
|
if ( $leader =~ m/(\d{5}\wM2.0\d*\s*\w)/ ) { |
67
|
45
|
|
|
|
|
142
|
push @record, [ 'LDR', '', '_', $leader ]; |
68
|
|
|
|
|
|
|
} |
69
|
|
|
|
|
|
|
else { |
70
|
1
|
|
|
|
|
30
|
carp "faulty record leader: \"$leader\""; |
71
|
|
|
|
|
|
|
} |
72
|
|
|
|
|
|
|
|
73
|
46
|
|
|
|
|
1000
|
my @fields = split $END_OF_FIELD, substr( $reader, $LEADER_LEN, -1 ); |
74
|
|
|
|
|
|
|
|
75
|
46
|
|
|
|
|
1729
|
for my $field (@fields) { |
76
|
|
|
|
|
|
|
|
77
|
2039
|
100
|
|
|
|
5041
|
if ( length $field <= 4 ) { |
78
|
1
|
|
|
|
|
15
|
carp "faulty field: \"$field\""; |
79
|
1
|
|
|
|
|
525
|
next; |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
|
82
|
2038
|
100
|
|
|
|
8199
|
if ( my ( $tag, $ind, $data ) |
83
|
|
|
|
|
|
|
= $field =~ m/^(\d{3})([A-Za-z0-9\s])(.*)/ ) |
84
|
|
|
|
|
|
|
{ |
85
|
2037
|
100
|
|
|
|
5466
|
if ( $data =~ m/\s*$SUBFIELD_INDICATOR(.*)/ ) { |
86
|
|
|
|
|
|
|
push( |
87
|
|
|
|
|
|
|
@record, |
88
|
|
|
|
|
|
|
[ $tag, |
89
|
|
|
|
|
|
|
$ind, |
90
|
109
|
|
|
|
|
938
|
map { ( substr( $_, 0, 1 ), substr( $_, 1 ) ) } |
|
169
|
|
|
|
|
1525
|
|
91
|
|
|
|
|
|
|
split /$SUBFIELD_INDICATOR/, |
92
|
|
|
|
|
|
|
$1 |
93
|
|
|
|
|
|
|
] |
94
|
|
|
|
|
|
|
); |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
else { |
97
|
1928
|
|
|
|
|
14766
|
push @record, [ $tag, $ind, '_', $data ]; |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
else { |
101
|
1
|
|
|
|
|
17
|
carp "faulty field structure: \"$field\""; |
102
|
1
|
|
|
|
|
544
|
next; |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
46
|
|
|
|
|
338
|
return \@record; |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
1; # End of MAB2::Parser::RAW |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
__END__ |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=pod |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=encoding UTF-8 |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=head1 NAME |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
MAB2::Parser::RAW - MAB2 RAW format parser |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=head1 SYNOPSIS |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
L<MAB2::Parser::RAW> is a parser for raw MAB2 records. |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
L<MAB2::Parser::RAW> expects UTF-8 encoded files as input. Otherwise provide a |
127
|
|
|
|
|
|
|
filehande with a specified I/O layer. |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
use MAB2::Parser::RAW; |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
my $parser = MAB2::Parser::RAW->new( $filename ); |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
while ( my $record_hash = $parser->next() ) { |
134
|
|
|
|
|
|
|
# do something |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
=head1 Arguments |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=over |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=item C<file> |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
Path to file with MAB2 Band records. |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
=item C<fh> |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
Open filehandle for file with MAB2 Band records. |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
=back |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=head1 METHODS |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=head2 new($filename | $filehandle) |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=head2 next() |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
Reads the next record from MAB2 input stream. Returns a Perl hash. |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=head2 _decode($record) |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
Deserialize a raw MAB2 record to an ARRAY of ARRAYs. |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
=head1 SEEALSO |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
L<Catmandu::Importer::MAB2>. |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
=head1 AUTHOR |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
Johann Rolschewski <jorol@cpan.org> |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
This software is copyright (c) 2013 by Johann Rolschewski. |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
176
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=cut |