line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package MAB2::Parser::Disk; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '0.24'; |
4
|
|
|
|
|
|
|
|
5
|
6
|
|
|
6
|
|
116965
|
use strict; |
|
6
|
|
|
|
|
22
|
|
|
6
|
|
|
|
|
184
|
|
6
|
6
|
|
|
6
|
|
86
|
use warnings; |
|
6
|
|
|
|
|
12
|
|
|
6
|
|
|
|
|
173
|
|
7
|
6
|
|
|
6
|
|
3125
|
use charnames qw< :full >; |
|
6
|
|
|
|
|
188247
|
|
|
6
|
|
|
|
|
42
|
|
8
|
6
|
|
|
6
|
|
1400
|
use Carp qw(carp croak); |
|
6
|
|
|
|
|
14
|
|
|
6
|
|
|
|
|
428
|
|
9
|
6
|
|
|
6
|
|
3189
|
use Readonly; |
|
6
|
|
|
|
|
19065
|
|
|
6
|
|
|
|
|
302
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
Readonly my $SUBFIELD_INDICATOR => qq{\N{INFORMATION SEPARATOR ONE}}; |
12
|
|
|
|
|
|
|
Readonly my $END_OF_FIELD => qq{\N{LINE FEED}}; |
13
|
|
|
|
|
|
|
Readonly my $END_OF_RECORD => q{}; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
sub new { |
16
|
4
|
|
|
4
|
1
|
4607
|
my $class = shift; |
17
|
4
|
|
|
|
|
9
|
my $file = shift; |
18
|
|
|
|
|
|
|
|
19
|
4
|
|
|
|
|
20
|
my $self = { |
20
|
|
|
|
|
|
|
filename => undef, |
21
|
|
|
|
|
|
|
rec_number => 0, |
22
|
|
|
|
|
|
|
reader => undef, |
23
|
|
|
|
|
|
|
}; |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# check for file or filehandle |
26
|
4
|
|
|
|
|
9
|
my $ishandle = eval { fileno($file); }; |
|
4
|
|
|
|
|
23
|
|
27
|
4
|
100
|
66
|
|
|
106
|
if ( !$@ && defined $ishandle ) { |
|
|
100
|
|
|
|
|
|
28
|
1
|
|
|
|
|
3
|
$self->{filename} = scalar $file; |
29
|
1
|
|
|
|
|
3
|
$self->{reader} = $file; |
30
|
|
|
|
|
|
|
} |
31
|
|
|
|
|
|
|
elsif ( -e $file ) { |
32
|
2
|
50
|
|
|
|
91
|
open $self->{reader}, '<:encoding(UTF-8)', $file |
33
|
|
|
|
|
|
|
or croak "cannot read from file $file\n"; |
34
|
2
|
|
|
|
|
191
|
$self->{filename} = $file; |
35
|
|
|
|
|
|
|
} |
36
|
|
|
|
|
|
|
else { |
37
|
1
|
|
|
|
|
25
|
croak "file or filehande $file does not exists"; |
38
|
|
|
|
|
|
|
} |
39
|
3
|
|
|
|
|
18
|
return ( bless $self, $class ); |
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
sub next { |
43
|
27
|
|
|
27
|
1
|
2692
|
my $self = shift; |
44
|
27
|
|
|
|
|
123
|
local $/ = $END_OF_RECORD; |
45
|
27
|
100
|
|
|
|
766
|
if ( my $data = $self->{reader}->getline() ) { |
46
|
26
|
|
|
|
|
1005
|
$self->{rec_number}++; |
47
|
26
|
|
|
|
|
76
|
my $record = _decode($data); |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
# get last subfield from 001 as id |
50
|
26
|
|
|
|
|
49
|
my ($id) = map { $_->[-1] } grep { $_->[0] =~ '001' } @{$record}; |
|
25
|
|
|
|
|
62
|
|
|
1078
|
|
|
|
|
1892
|
|
|
26
|
|
|
|
|
55
|
|
51
|
26
|
|
|
|
|
173
|
return { _id => $id, record => $record }; |
52
|
|
|
|
|
|
|
} |
53
|
1
|
|
|
|
|
26
|
return; |
54
|
|
|
|
|
|
|
} |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
sub _decode { |
57
|
26
|
|
|
26
|
|
43
|
my $reader = shift; |
58
|
26
|
|
|
|
|
51
|
chomp($reader); |
59
|
|
|
|
|
|
|
|
60
|
26
|
|
|
|
|
39
|
my @record; |
61
|
|
|
|
|
|
|
|
62
|
26
|
|
|
|
|
86
|
my @fields = split( $END_OF_FIELD, $reader ); |
63
|
|
|
|
|
|
|
|
64
|
26
|
|
|
|
|
771
|
my $leader = shift @fields; |
65
|
26
|
100
|
|
|
|
146
|
if ($leader =~ m/^\N{NUMBER SIGN}{3}\s(\d{5}[cdnpu]M2.0\d{7}\s{6}\w)/xms ) |
66
|
|
|
|
|
|
|
{ |
67
|
25
|
|
|
|
|
104
|
push( @record, [ 'LDR', '', '_', $1 ] ); |
68
|
|
|
|
|
|
|
} |
69
|
|
|
|
|
|
|
else { |
70
|
1
|
|
|
|
|
18
|
carp "faulty record leader: $leader"; |
71
|
|
|
|
|
|
|
} |
72
|
|
|
|
|
|
|
|
73
|
26
|
|
|
|
|
670
|
foreach my $field (@fields) { |
74
|
|
|
|
|
|
|
|
75
|
1055
|
100
|
|
|
|
2433
|
if ( length $field <= 4 ) { |
76
|
1
|
|
|
|
|
16
|
carp "faulty field: \"$field\""; |
77
|
1
|
|
|
|
|
579
|
next; |
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
|
80
|
1054
|
100
|
|
|
|
4083
|
if ( my ( $tag, $ind, $data ) |
81
|
|
|
|
|
|
|
= $field =~ m/^(\d{3})([A-Za-z0-9\s])(.*)/ ) |
82
|
|
|
|
|
|
|
{ |
83
|
|
|
|
|
|
|
# check if data contains subfield indicators |
84
|
1053
|
100
|
|
|
|
2841
|
if ( $data =~ m/\s*($SUBFIELD_INDICATOR|\$)(.*)/ ) { |
85
|
61
|
100
|
|
|
|
514
|
my $subfield_indicator = $1 eq '$' ? '\$' : $1; |
86
|
|
|
|
|
|
|
push |
87
|
|
|
|
|
|
|
@record, |
88
|
|
|
|
|
|
|
[ |
89
|
|
|
|
|
|
|
$tag, |
90
|
|
|
|
|
|
|
$ind, |
91
|
61
|
|
|
|
|
290
|
map { ( substr( $_, 0, 1 ), substr( $_, 1 ) ) } |
|
97
|
|
|
|
|
470
|
|
92
|
|
|
|
|
|
|
split /$subfield_indicator/, |
93
|
|
|
|
|
|
|
$2 |
94
|
|
|
|
|
|
|
]; |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
else { |
97
|
992
|
|
|
|
|
10779
|
push @record, [ $tag, $ind, '_', $data ]; |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
else { |
101
|
1
|
|
|
|
|
15
|
carp "faulty field structure: \"$field\""; |
102
|
1
|
|
|
|
|
541
|
next; |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
} |
105
|
26
|
|
|
|
|
175
|
return \@record; |
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
1; # End of MAB2::Parser::Disk |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
__END__ |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=pod |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=encoding UTF-8 |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=head1 NAME |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
MAB2::Parser::Disk - MAB2 Diskette format parser |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=head1 SYNOPSIS |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
L<MAB2::Parser::Disk> is a parser for MAB2 Diskette records. |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
L<MAB2::Parser::Disk> expects UTF-8 encoded files as input. Otherwise |
125
|
|
|
|
|
|
|
provide a filehande with a specified I/O layer. |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
use MAB2::Parser::Disk; |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
my $parser = MAB2::Parser::Disk->new( $filename ); |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
while ( my $record_hash = $parser->next() ) { |
132
|
|
|
|
|
|
|
# do something |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=head1 Arguments |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
=over |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=item C<file> |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
Path to file with MAB2 Diskette records. |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=item C<fh> |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
Open filehandle for file with MAB2 Diskette records. |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=back |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
=head1 METHODS |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=head2 new($filename | $filehandle) |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=head2 next() |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
Reads the next record from MAB2 input stream. Returns a Perl hash. |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=head2 _decode($record) |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
Deserialize a raw MAB2 record to an ARRAY of ARRAYs. |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=head1 SEE ALSO |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
L<Catmandu::Importer::MAB2>. |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
=head1 AUTHOR |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
Johann Rolschewski <jorol@cpan.org> |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
This software is copyright (c) 2013 by Johann Rolschewski. |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
174
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=cut |