| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package MAB2::Parser::RAW; | 
| 2 |  |  |  |  |  |  |  | 
| 3 |  |  |  |  |  |  | our $VERSION = '0.24'; | 
| 4 |  |  |  |  |  |  |  | 
| 5 | 6 |  |  | 6 |  | 589 | use strict; | 
|  | 6 |  |  |  |  | 14 |  | 
|  | 6 |  |  |  |  | 212 |  | 
| 6 | 6 |  |  | 6 |  | 31 | use warnings; | 
|  | 6 |  |  |  |  | 13 |  | 
|  | 6 |  |  |  |  | 265 |  | 
| 7 | 6 |  |  | 6 |  | 36 | use charnames qw< :full >; | 
|  | 6 |  |  |  |  | 49 |  | 
|  | 6 |  |  |  |  | 40 |  | 
| 8 | 6 |  |  | 6 |  | 1155 | use Carp qw(carp croak); | 
|  | 6 |  |  |  |  | 18 |  | 
|  | 6 |  |  |  |  | 341 |  | 
| 9 | 6 |  |  | 6 |  | 34 | use Readonly; | 
|  | 6 |  |  |  |  | 13 |  | 
|  | 6 |  |  |  |  | 323 |  | 
| 10 |  |  |  |  |  |  |  | 
| 11 |  |  |  |  |  |  | Readonly my $LEADER_LEN         => 24; | 
| 12 |  |  |  |  |  |  | Readonly my $SUBFIELD_INDICATOR => qq{\N{INFORMATION SEPARATOR ONE}}; | 
| 13 |  |  |  |  |  |  | Readonly my $END_OF_FIELD       => qq{\N{INFORMATION SEPARATOR TWO}}; | 
| 14 |  |  |  |  |  |  | Readonly my $END_OF_RECORD      => qq{\N{INFORMATION SEPARATOR THREE}}; | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | sub new { | 
| 17 | 5 |  |  | 5 | 1 | 4006 | my $class = shift; | 
| 18 | 5 |  |  |  |  | 11 | my $file  = shift; | 
| 19 |  |  |  |  |  |  |  | 
| 20 | 5 |  |  |  |  | 24 | my $self = { | 
| 21 |  |  |  |  |  |  | filename   => undef, | 
| 22 |  |  |  |  |  |  | rec_number => 0, | 
| 23 |  |  |  |  |  |  | reader     => undef, | 
| 24 |  |  |  |  |  |  | }; | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | # check for file or filehandle | 
| 27 | 5 |  |  |  |  | 11 | my $ishandle = eval { fileno($file); }; | 
|  | 5 |  |  |  |  | 24 |  | 
| 28 | 5 | 100 | 66 |  |  | 128 | if ( !$@ && defined $ishandle ) { | 
|  |  | 100 |  |  |  |  |  | 
| 29 | 2 |  |  |  |  | 7 | $self->{filename} = scalar $file; | 
| 30 | 2 |  |  |  |  | 4 | $self->{reader}   = $file; | 
| 31 |  |  |  |  |  |  | } | 
| 32 |  |  |  |  |  |  | elsif ( -e $file ) { | 
| 33 | 2 | 50 |  | 1 |  | 151 | open $self->{reader}, '<:encoding(UTF-8)', $file | 
|  | 1 |  |  |  |  | 9 |  | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 7 |  | 
| 34 |  |  |  |  |  |  | or croak "cannot read from file $file\n"; | 
| 35 | 2 |  |  |  |  | 1516 | $self->{filename} = $file; | 
| 36 |  |  |  |  |  |  | } | 
| 37 |  |  |  |  |  |  | else { | 
| 38 | 1 |  |  |  |  | 21 | croak "file or filehande $file does not exists"; | 
| 39 |  |  |  |  |  |  | } | 
| 40 | 4 |  |  |  |  | 32 | return ( bless $self, $class ); | 
| 41 |  |  |  |  |  |  | } | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  | sub next { | 
| 44 | 48 |  |  | 48 | 1 | 3119 | my $self = shift; | 
| 45 | 48 | 100 |  |  |  | 1110 | if ( my $line = $self->{reader}->getline() ) { | 
| 46 | 46 |  |  |  |  | 1840 | $self->{rec_number}++; | 
| 47 | 46 |  |  |  |  | 96 | my $record = _decode($line); | 
| 48 |  |  |  |  |  |  |  | 
| 49 |  |  |  |  |  |  | # get last subfield from 001 as id | 
| 50 | 46 |  |  |  |  | 79 | my ($id) = map { $_->[-1] } grep { $_->[0] =~ '001' } @{$record}; | 
|  | 45 |  |  |  |  | 104 |  | 
|  | 2082 |  |  |  |  | 3406 |  | 
|  | 46 |  |  |  |  | 101 |  | 
| 51 | 46 |  |  |  |  | 224 | return { _id => $id, record => $record }; | 
| 52 |  |  |  |  |  |  | } | 
| 53 | 2 |  |  |  |  | 52 | return; | 
| 54 |  |  |  |  |  |  | } | 
| 55 |  |  |  |  |  |  |  | 
| 56 |  |  |  |  |  |  | sub _decode { | 
| 57 | 46 |  |  | 46 |  | 84 | my $reader = shift; | 
| 58 | 46 |  |  |  |  | 118 | chomp $reader; | 
| 59 |  |  |  |  |  |  |  | 
| 60 | 46 | 100 |  |  |  | 510 | if ( substr( $reader, -1, 1 ) ne $END_OF_RECORD ) { | 
| 61 | 1 |  |  |  |  | 21 | carp "record terminator not found"; | 
| 62 |  |  |  |  |  |  | } | 
| 63 |  |  |  |  |  |  |  | 
| 64 | 46 |  |  |  |  | 915 | my @record; | 
| 65 | 46 |  |  |  |  | 132 | my $leader = substr $reader, 0, $LEADER_LEN; | 
| 66 | 46 | 100 |  |  |  | 511 | if ( $leader =~ m/(\d{5}\wM2.0\d*\s*\w)/ ) { | 
| 67 | 45 |  |  |  |  | 142 | push @record, [ 'LDR', '', '_', $leader ]; | 
| 68 |  |  |  |  |  |  | } | 
| 69 |  |  |  |  |  |  | else { | 
| 70 | 1 |  |  |  |  | 30 | carp "faulty record leader: \"$leader\""; | 
| 71 |  |  |  |  |  |  | } | 
| 72 |  |  |  |  |  |  |  | 
| 73 | 46 |  |  |  |  | 1000 | my @fields = split $END_OF_FIELD, substr( $reader, $LEADER_LEN, -1 ); | 
| 74 |  |  |  |  |  |  |  | 
| 75 | 46 |  |  |  |  | 1729 | for my $field (@fields) { | 
| 76 |  |  |  |  |  |  |  | 
| 77 | 2039 | 100 |  |  |  | 5041 | if ( length $field <= 4 ) { | 
| 78 | 1 |  |  |  |  | 15 | carp "faulty field: \"$field\""; | 
| 79 | 1 |  |  |  |  | 525 | next; | 
| 80 |  |  |  |  |  |  | } | 
| 81 |  |  |  |  |  |  |  | 
| 82 | 2038 | 100 |  |  |  | 8199 | if ( my ( $tag, $ind, $data ) | 
| 83 |  |  |  |  |  |  | = $field =~ m/^(\d{3})([A-Za-z0-9\s])(.*)/ ) | 
| 84 |  |  |  |  |  |  | { | 
| 85 | 2037 | 100 |  |  |  | 5466 | if ( $data =~ m/\s*$SUBFIELD_INDICATOR(.*)/ ) { | 
| 86 |  |  |  |  |  |  | push( | 
| 87 |  |  |  |  |  |  | @record, | 
| 88 |  |  |  |  |  |  | [   $tag, | 
| 89 |  |  |  |  |  |  | $ind, | 
| 90 | 109 |  |  |  |  | 938 | map { ( substr( $_, 0, 1 ), substr( $_, 1 ) ) } | 
|  | 169 |  |  |  |  | 1525 |  | 
| 91 |  |  |  |  |  |  | split /$SUBFIELD_INDICATOR/, | 
| 92 |  |  |  |  |  |  | $1 | 
| 93 |  |  |  |  |  |  | ] | 
| 94 |  |  |  |  |  |  | ); | 
| 95 |  |  |  |  |  |  | } | 
| 96 |  |  |  |  |  |  | else { | 
| 97 | 1928 |  |  |  |  | 14766 | push @record, [ $tag, $ind, '_', $data ]; | 
| 98 |  |  |  |  |  |  | } | 
| 99 |  |  |  |  |  |  | } | 
| 100 |  |  |  |  |  |  | else { | 
| 101 | 1 |  |  |  |  | 17 | carp "faulty field structure: \"$field\""; | 
| 102 | 1 |  |  |  |  | 544 | next; | 
| 103 |  |  |  |  |  |  | } | 
| 104 |  |  |  |  |  |  |  | 
| 105 |  |  |  |  |  |  | } | 
| 106 |  |  |  |  |  |  |  | 
| 107 | 46 |  |  |  |  | 338 | return \@record; | 
| 108 |  |  |  |  |  |  | } | 
| 109 |  |  |  |  |  |  |  | 
| 110 |  |  |  |  |  |  | 1;    # End of MAB2::Parser::RAW | 
| 111 |  |  |  |  |  |  |  | 
| 112 |  |  |  |  |  |  | __END__ | 
| 113 |  |  |  |  |  |  |  | 
| 114 |  |  |  |  |  |  | =pod | 
| 115 |  |  |  |  |  |  |  | 
| 116 |  |  |  |  |  |  | =encoding UTF-8 | 
| 117 |  |  |  |  |  |  |  | 
| 118 |  |  |  |  |  |  | =head1 NAME | 
| 119 |  |  |  |  |  |  |  | 
| 120 |  |  |  |  |  |  | MAB2::Parser::RAW - MAB2 RAW format parser | 
| 121 |  |  |  |  |  |  |  | 
| 122 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 123 |  |  |  |  |  |  |  | 
| 124 |  |  |  |  |  |  | L<MAB2::Parser::RAW> is a parser for raw MAB2 records. | 
| 125 |  |  |  |  |  |  |  | 
| 126 |  |  |  |  |  |  | L<MAB2::Parser::RAW> expects UTF-8 encoded files as input. Otherwise provide a | 
| 127 |  |  |  |  |  |  | filehande with a specified I/O layer. | 
| 128 |  |  |  |  |  |  |  | 
| 129 |  |  |  |  |  |  | use MAB2::Parser::RAW; | 
| 130 |  |  |  |  |  |  |  | 
| 131 |  |  |  |  |  |  | my $parser = MAB2::Parser::RAW->new( $filename ); | 
| 132 |  |  |  |  |  |  |  | 
| 133 |  |  |  |  |  |  | while ( my $record_hash = $parser->next() ) { | 
| 134 |  |  |  |  |  |  | # do something | 
| 135 |  |  |  |  |  |  | } | 
| 136 |  |  |  |  |  |  |  | 
| 137 |  |  |  |  |  |  | =head1 Arguments | 
| 138 |  |  |  |  |  |  |  | 
| 139 |  |  |  |  |  |  | =over | 
| 140 |  |  |  |  |  |  |  | 
| 141 |  |  |  |  |  |  | =item C<file> | 
| 142 |  |  |  |  |  |  |  | 
| 143 |  |  |  |  |  |  | Path to file with MAB2 Band records. | 
| 144 |  |  |  |  |  |  |  | 
| 145 |  |  |  |  |  |  | =item C<fh> | 
| 146 |  |  |  |  |  |  |  | 
| 147 |  |  |  |  |  |  | Open filehandle for file with MAB2 Band records. | 
| 148 |  |  |  |  |  |  |  | 
| 149 |  |  |  |  |  |  | =back | 
| 150 |  |  |  |  |  |  |  | 
| 151 |  |  |  |  |  |  | =head1 METHODS | 
| 152 |  |  |  |  |  |  |  | 
| 153 |  |  |  |  |  |  | =head2 new($filename | $filehandle) | 
| 154 |  |  |  |  |  |  |  | 
| 155 |  |  |  |  |  |  | =head2 next() | 
| 156 |  |  |  |  |  |  |  | 
| 157 |  |  |  |  |  |  | Reads the next record from MAB2 input stream. Returns a Perl hash. | 
| 158 |  |  |  |  |  |  |  | 
| 159 |  |  |  |  |  |  | =head2 _decode($record) | 
| 160 |  |  |  |  |  |  |  | 
| 161 |  |  |  |  |  |  | Deserialize a raw MAB2 record to an ARRAY of ARRAYs. | 
| 162 |  |  |  |  |  |  |  | 
| 163 |  |  |  |  |  |  | =head1 SEEALSO | 
| 164 |  |  |  |  |  |  |  | 
| 165 |  |  |  |  |  |  | L<Catmandu::Importer::MAB2>. | 
| 166 |  |  |  |  |  |  |  | 
| 167 |  |  |  |  |  |  | =head1 AUTHOR | 
| 168 |  |  |  |  |  |  |  | 
| 169 |  |  |  |  |  |  | Johann Rolschewski <jorol@cpan.org> | 
| 170 |  |  |  |  |  |  |  | 
| 171 |  |  |  |  |  |  | =head1 COPYRIGHT AND LICENSE | 
| 172 |  |  |  |  |  |  |  | 
| 173 |  |  |  |  |  |  | This software is copyright (c) 2013 by Johann Rolschewski. | 
| 174 |  |  |  |  |  |  |  | 
| 175 |  |  |  |  |  |  | This is free software; you can redistribute it and/or modify it under | 
| 176 |  |  |  |  |  |  | the same terms as the Perl 5 programming language system itself. | 
| 177 |  |  |  |  |  |  |  | 
| 178 |  |  |  |  |  |  | =cut |