|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package MARC::Field::Normalize::NACO;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
75119
 | 
 use strict;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
    | 
| 
4
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
6
 | 
 use warnings;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
39
 | 
    | 
| 
5
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
6
 | 
 use utf8;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
    | 
| 
6
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
607
 | 
 use Unicode::Normalize qw(NFD);  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2115
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
88
 | 
    | 
| 
7
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
569
 | 
 use List::MoreUtils qw(natatime);  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12107
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    | 
| 
8
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
999
 | 
 use MARC::Field;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
42
 | 
    | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $VERSION = '0.06';  | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
12
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
5
 | 
 use vars qw( @EXPORT_OK );  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
52
 | 
    | 
| 
13
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
7
 | 
 use Exporter 'import';  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
130
 | 
    | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 @EXPORT_OK = qw(  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     naco_from_string naco_from_array  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     naco_from_field naco_from_authority  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 );  | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #func naco_from_string( Str $s, Bool :$keep_first_comma ) {  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub naco_from_string {  | 
| 
21
 | 
44
 | 
 
 | 
 
 | 
  
44
  
 | 
  
0
  
 | 
3751
 | 
     my $s = shift;  | 
| 
22
 | 
44
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
97
 | 
     my $keep_first_comma = (@_ == 2) ? $_[1] : undef;  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # decompose and uppercase  | 
| 
24
 | 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
278
 | 
     $s = uc( NFD($s) );  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # strip out combining diacritics  | 
| 
27
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
6
 | 
     $s =~ s/\p{M}//g;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
    | 
| 
 
 | 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6967
 | 
    | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # transpose diagraphs and related characters  | 
| 
30
 | 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
88
 | 
     $s =~ s/Æ/AE/g;  | 
| 
31
 | 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
72
 | 
     $s =~ s/Œ/OE/g;  | 
| 
32
 | 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
159
 | 
     $s =~ s/Ø|Ҩ/O/g;  | 
| 
33
 | 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
68
 | 
     $s =~ s/Þ/TH/g;  | 
| 
34
 | 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
74
 | 
     $s =~ s/Ð/D/g;  | 
| 
35
 | 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
67
 | 
     $s =~ s/ß/SS/g;  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # transpose sub- and super-script with numerals  | 
| 
38
 | 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
135
 | 
     $s =~ tr/⁰¹²³⁴⁵⁶⁷⁸⁹/0123456789/;  | 
| 
39
 | 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
100
 | 
     $s =~ tr/₀₁₂₃₄₅₆₇₈₉/0123456789/;  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # delete or blank out punctuation  | 
| 
42
 | 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
119
 | 
     $s =~ s/[!"()\-{}<>;:.?¿¡\/\\*\|%=±⁺⁻™℗©°^_`~]/ /g;  | 
| 
43
 | 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
86
 | 
     $s =~ s/['\[\]ЪЬ·]//g;  | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # blank out commas  | 
| 
46
 | 
44
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
170
 | 
     if ($keep_first_comma && $s =~ /,/) {  | 
| 
47
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
68
 | 
         my $i = index $s, ',';  | 
| 
48
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
76
 | 
         $s =~ s/,/ /g;  | 
| 
49
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
59
 | 
         substr $s, $i, 1, ',';  | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # always strip off a trailing comma, even if it's the only one  | 
| 
51
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
50
 | 
         $s =~ s/,$//;  | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
54
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
57
 | 
         $s =~ s/,/ /g;  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # lastly, trim and deduplicate whitespace  | 
| 
58
 | 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
129
 | 
     $s =~ s/\s\s+/ /g;  | 
| 
59
 | 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
189
 | 
     $s =~ s/^\s+|\s+$//g;  | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
61
 | 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
127
 | 
     return $s;  | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #func naco_from_array( ArrayRef $subfs ) {  | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub naco_from_array {  | 
| 
66
 | 
15
 | 
 
 | 
 
 | 
  
15
  
 | 
  
0
  
 | 
1646
 | 
     my $subfs = shift;  | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Expects $subfs == [ 'a', 'Thurber, James', 'd', '1914-', ... ]  | 
| 
68
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
92
 | 
     my $itr = natatime 2, @$subfs;  | 
| 
69
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
     my $out = '';  | 
| 
70
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
85
 | 
     while (my ($subf, $val) = $itr->()) {  | 
| 
71
 | 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
87
 | 
         my $norm = naco_from_string( $val, keep_first_comma => $subf eq 'a' );  | 
| 
72
 | 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
163
 | 
         $out .= '$'. $subf . $norm;  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
74
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
107
 | 
     return $out;  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #func naco_from_field( MARC::Field $f, :$subfields = 'a-df-hj-vx-z') {  | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub naco_from_field {  | 
| 
79
 | 
12
 | 
 
 | 
 
 | 
  
12
  
 | 
  
0
  
 | 
3036
 | 
     my $f = shift;  | 
| 
80
 | 
12
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
33
 | 
     my $subfields = (@_ == 2) ? $_[1] : 'a-df-hj-vx-z';  | 
| 
81
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38
 | 
     my @flat = map {@$_} grep {$_->[0] =~ /[$subfields]/} $f->subfields;  | 
| 
 
 | 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
66
 | 
    | 
| 
 
 | 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
538
 | 
    | 
| 
82
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38
 | 
     return naco_from_array( \@flat );  | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #func naco_from_authority( MARC::Record $r ) {  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub naco_from_authority {  | 
| 
87
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
0
  
 | 
1126
 | 
     my $r = shift;  | 
| 
88
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
     return naco_from_field( scalar $r->field('1..'), subfields => 'a-z' );  | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
92
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
22808
 | 
     no warnings qw(once);  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
91
 | 
    | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     *MARC::Field::as_naco = \&naco_from_field;  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __END__  |