| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package MARC::Field::Normalize::NACO; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 1 |  |  | 1 |  | 51626 | use strict; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 38 |  | 
| 4 | 1 |  |  | 1 |  | 5 | use warnings; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 29 |  | 
| 5 | 1 |  |  | 1 |  | 5 | use utf8; | 
|  | 1 |  |  |  |  | 7 |  | 
|  | 1 |  |  |  |  | 7 |  | 
| 6 | 1 |  |  | 1 |  | 2237 | use Unicode::Normalize qw(NFD); | 
|  | 1 |  |  |  |  | 3278 |  | 
|  | 1 |  |  |  |  | 168 |  | 
| 7 | 1 |  |  | 1 |  | 3034 | use List::MoreUtils qw(natatime); | 
|  | 1 |  |  |  |  | 2301 |  | 
|  | 1 |  |  |  |  | 119 |  | 
| 8 | 1 |  |  | 1 |  | 9 | use MARC::Field; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 21 |  | 
| 9 | 1 |  |  | 1 |  | 1509 | use Method::Signatures; | 
|  | 1 |  |  |  |  | 150683 |  | 
|  | 1 |  |  |  |  | 9 |  | 
| 10 |  |  |  |  |  |  |  | 
| 11 |  |  |  |  |  |  | our $VERSION = '0.04'; | 
| 12 |  |  |  |  |  |  |  | 
| 13 | 1 |  |  | 1 |  | 634 | use vars qw( @EXPORT_OK ); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 70 |  | 
| 14 | 1 |  |  | 1 |  | 6 | use Exporter 'import'; | 
|  | 1 |  |  |  |  | 9 |  | 
|  | 1 |  |  |  |  | 82 |  | 
| 15 |  |  |  |  |  |  | @EXPORT_OK = qw( | 
| 16 |  |  |  |  |  |  | naco_from_string naco_from_array | 
| 17 |  |  |  |  |  |  | naco_from_field naco_from_authority | 
| 18 |  |  |  |  |  |  | ); | 
| 19 |  |  |  |  |  |  |  | 
| 20 | 1 | 50 | 66 | 1 |  | 188100 | func naco_from_string( Str $s, Bool :$keep_first_comma ) { | 
|  | 35 | 50 | 66 | 35 |  | 2227 |  | 
|  | 35 | 50 | 33 |  |  | 304 |  | 
|  | 35 | 50 |  |  |  | 3155 |  | 
|  | 35 | 50 |  |  |  | 135 |  | 
|  | 35 |  |  |  |  | 375 |  | 
|  | 35 |  |  |  |  | 104 |  | 
|  | 35 |  |  |  |  | 69 |  | 
|  | 35 |  |  |  |  | 83 |  | 
|  | 35 |  |  |  |  | 86 |  | 
| 21 |  |  |  |  |  |  | # decompose and uppercase | 
| 22 | 35 |  |  |  |  | 281 | $s = uc( NFD($s) ); | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | # strip out combining diacritics | 
| 25 | 1 |  |  | 1 |  | 345 | $s =~ s/\p{M}//g; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 15 |  | 
|  | 35 |  |  |  |  | 6305 |  | 
| 26 |  |  |  |  |  |  |  | 
| 27 |  |  |  |  |  |  | # transpose diagraphs and related characters | 
| 28 | 35 |  |  |  |  | 238 | $s =~ s/Ã/AE/g; | 
| 29 | 35 |  |  |  |  | 57 | $s =~ s/Å/OE/g; | 
| 30 | 35 |  |  |  |  | 100 | $s =~ s/Ã|Ò¨/O/g; | 
| 31 | 35 |  |  |  |  | 56 | $s =~ s/Ã/TH/g; | 
| 32 | 35 |  |  |  |  | 50 | $s =~ s/Ã/D/g; | 
| 33 | 35 |  |  |  |  | 208 | $s =~ s/Ã/SS/g; | 
| 34 |  |  |  |  |  |  |  | 
| 35 |  |  |  |  |  |  | # transpose sub- and super-script with numerals | 
| 36 | 35 |  |  |  |  | 105 | $s =~ tr/â°Â¹Â²Â³â´âµâ¶â·â¸â¹/0123456789/; | 
| 37 | 35 |  |  |  |  | 84 | $s =~ tr/ââââââ
ââââ/0123456789/; | 
| 38 |  |  |  |  |  |  |  | 
| 39 |  |  |  |  |  |  | # delete or blank out punctuation | 
| 40 | 35 |  |  |  |  | 138 | $s =~ s/[!"()\-{}<>;:.?¿¡\/\\*\|%=±âºâ»â¢â©°^_`~]/ /g; | 
| 41 | 35 |  |  |  |  | 99 | $s =~ s/['\[\]ЪЬ·]//g; | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  | # blank out commas | 
| 44 | 35 | 100 |  |  |  | 72 | if ($keep_first_comma) { | 
| 45 | 17 |  |  |  |  | 61 | my $i = index $s, ','; | 
| 46 | 17 |  |  |  |  | 88 | $s =~ s/,/ /g; | 
| 47 | 17 |  |  |  |  | 247 | $s =~ s/^((?:.){$i})\s/$1,/; | 
| 48 |  |  |  |  |  |  | } | 
| 49 |  |  |  |  |  |  | else { | 
| 50 | 18 |  |  |  |  | 45 | $s =~ s/,/ /g; | 
| 51 |  |  |  |  |  |  | } | 
| 52 |  |  |  |  |  |  |  | 
| 53 |  |  |  |  |  |  | # lastly, trim and deduplicate whitespace | 
| 54 | 35 |  |  |  |  | 121 | $s =~ s/\s\s+/ /g; | 
| 55 | 35 |  |  |  |  | 188 | $s =~ s/^\s+|\s+$//g; | 
| 56 |  |  |  |  |  |  |  | 
| 57 | 35 |  |  |  |  | 136 | return $s; | 
| 58 |  |  |  |  |  |  | } | 
| 59 |  |  |  |  |  |  |  | 
| 60 | 1 | 50 | 66 | 1 |  | 23981 | func naco_from_array( ArrayRef $subfs ) { | 
|  | 11 | 50 |  | 11 |  | 1127 |  | 
|  | 11 | 50 |  |  |  | 111 |  | 
|  | 11 |  |  |  |  | 58 |  | 
|  | 11 |  |  |  |  | 33 |  | 
| 61 |  |  |  |  |  |  | # Expects $subfs == [ 'a', 'Thurber, James', 'd', '1914-', ... ] | 
| 62 | 11 |  |  |  |  | 98 | my $itr = natatime 2, @$subfs; | 
| 63 | 11 |  |  |  |  | 23 | my $out = ''; | 
| 64 | 11 |  |  |  |  | 68 | while (my ($subf, $val) = $itr->()) { | 
| 65 | 29 |  |  |  |  | 73 | my $norm = naco_from_string( $val, keep_first_comma => $subf eq 'a' ); | 
| 66 | 29 |  |  |  |  | 186 | $out .= '$'. $subf . $norm; | 
| 67 |  |  |  |  |  |  | } | 
| 68 | 11 |  |  |  |  | 112 | return $out; | 
| 69 |  |  |  |  |  |  | } | 
| 70 |  |  |  |  |  |  |  | 
| 71 | 1 | 50 | 66 | 1 |  | 5074 | func naco_from_field( MARC::Field $f, :$subfields = 'a-df-hj-vx-z') { | 
|  | 8 | 50 |  | 8 |  | 1845 |  | 
|  | 8 | 50 |  |  |  | 82 |  | 
|  | 8 | 50 |  |  |  | 563 |  | 
|  | 8 | 50 |  |  |  | 40 |  | 
|  | 8 |  |  |  |  | 29 |  | 
|  | 8 |  |  |  |  | 16 |  | 
|  | 8 |  |  |  |  | 93 |  | 
|  | 8 |  |  |  |  | 23 |  | 
| 72 | 8 |  |  |  |  | 32 | my @flat = map {@$_} grep {$_->[0] =~ /[$subfields]/} $f->subfields; | 
|  | 19 |  |  |  |  | 54 |  | 
|  | 25 |  |  |  |  | 483 |  | 
| 73 | 8 |  |  |  |  | 37 | return naco_from_array( \@flat ); | 
| 74 |  |  |  |  |  |  | } | 
| 75 |  |  |  |  |  |  |  | 
| 76 | 1 | 50 | 66 | 1 |  | 2075 | func naco_from_authority( MARC::Record $r ) { | 
|  | 2 | 50 |  | 2 |  | 1376 |  | 
|  | 2 | 50 |  |  |  | 26 |  | 
|  | 2 |  |  |  |  | 299 |  | 
|  | 2 |  |  |  |  | 10 |  | 
| 77 | 2 |  |  |  |  | 12 | return naco_from_field( scalar $r->field('1..'), subfields => 'a-z' ); | 
| 78 |  |  |  |  |  |  | } | 
| 79 |  |  |  |  |  |  |  | 
| 80 |  |  |  |  |  |  | { | 
| 81 | 1 |  |  | 1 |  | 183 | no warnings qw(once); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 100 |  | 
| 82 |  |  |  |  |  |  | *MARC::Field::as_naco = \&naco_from_field; | 
| 83 |  |  |  |  |  |  | } | 
| 84 |  |  |  |  |  |  |  | 
| 85 |  |  |  |  |  |  | 1; | 
| 86 |  |  |  |  |  |  | __END__ | 
| 87 |  |  |  |  |  |  |  | 
| 88 |  |  |  |  |  |  | =encoding utf-8 | 
| 89 |  |  |  |  |  |  |  | 
| 90 |  |  |  |  |  |  | =head1 NAME | 
| 91 |  |  |  |  |  |  |  | 
| 92 |  |  |  |  |  |  | MARC::Field::Normalize::NACO - Matching normalization for MARC::Field | 
| 93 |  |  |  |  |  |  |  | 
| 94 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 95 |  |  |  |  |  |  |  | 
| 96 |  |  |  |  |  |  | use MARC::Field; | 
| 97 |  |  |  |  |  |  | use MARC::Field::Normalize::NACO; | 
| 98 |  |  |  |  |  |  |  | 
| 99 |  |  |  |  |  |  | my $field = MARC::Field->new( | 
| 100 |  |  |  |  |  |  | '100', ' ', ' ', a => 'Stephenson, Neal,', d => '1953-'); | 
| 101 |  |  |  |  |  |  | my $normalized = $field->as_naco; | 
| 102 |  |  |  |  |  |  | my $custom = $field->as_naco(subfields => 'a'); | 
| 103 |  |  |  |  |  |  |  | 
| 104 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 105 |  |  |  |  |  |  |  | 
| 106 |  |  |  |  |  |  | MARC::Field::Normalize::NACO turns MARC::Field objects into | 
| 107 |  |  |  |  |  |  | strings canonicalized into NACO format. This makes them | 
| 108 |  |  |  |  |  |  | suitable for matching against an index of similarly normalized | 
| 109 |  |  |  |  |  |  | fields. | 
| 110 |  |  |  |  |  |  |  | 
| 111 |  |  |  |  |  |  | The principal means of invoking is through the as_naco() method | 
| 112 |  |  |  |  |  |  | that the module injects into MARC::Field when loaded. A string | 
| 113 |  |  |  |  |  |  | is returned. | 
| 114 |  |  |  |  |  |  |  | 
| 115 |  |  |  |  |  |  | This method takes an optional named parameter, subfields. The | 
| 116 |  |  |  |  |  |  | value of this parameter should be something that fits nicely | 
| 117 |  |  |  |  |  |  | into the regex qr/[$subfields]/, typically a range of letters. | 
| 118 |  |  |  |  |  |  | The default value is "a-z68". | 
| 119 |  |  |  |  |  |  |  | 
| 120 |  |  |  |  |  |  | =head1 AUTHOR | 
| 121 |  |  |  |  |  |  |  | 
| 122 |  |  |  |  |  |  | Clay Fouts E<lt>cfouts@khephera.netE<gt> | 
| 123 |  |  |  |  |  |  |  | 
| 124 |  |  |  |  |  |  | =head1 COPYRIGHT | 
| 125 |  |  |  |  |  |  |  | 
| 126 |  |  |  |  |  |  | Copyright 2013 PTFS, Inc. | 
| 127 |  |  |  |  |  |  |  | 
| 128 |  |  |  |  |  |  | =head1 LICENSE | 
| 129 |  |  |  |  |  |  |  | 
| 130 |  |  |  |  |  |  | This library is free software; you can redistribute it and/or modify | 
| 131 |  |  |  |  |  |  | it under the same terms as Perl itself. | 
| 132 |  |  |  |  |  |  |  | 
| 133 |  |  |  |  |  |  | =head1 SEE ALSO | 
| 134 |  |  |  |  |  |  |  | 
| 135 |  |  |  |  |  |  | =over | 
| 136 |  |  |  |  |  |  |  | 
| 137 |  |  |  |  |  |  | =item * | 
| 138 |  |  |  |  |  |  |  | 
| 139 |  |  |  |  |  |  | http://www.loc.gov/aba/pcc/naco/normrule-2.html | 
| 140 |  |  |  |  |  |  |  | 
| 141 |  |  |  |  |  |  | =item * | 
| 142 |  |  |  |  |  |  |  | 
| 143 |  |  |  |  |  |  | MARC::Record | 
| 144 |  |  |  |  |  |  |  | 
| 145 |  |  |  |  |  |  | =back | 
| 146 |  |  |  |  |  |  |  | 
| 147 |  |  |  |  |  |  | =cut |