line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package MARC::Lint; |
2
|
|
|
|
|
|
|
|
3
|
5
|
|
|
5
|
|
327188
|
use strict; |
|
5
|
|
|
|
|
14
|
|
|
5
|
|
|
|
|
187
|
|
4
|
5
|
|
|
5
|
|
32
|
use warnings; |
|
5
|
|
|
|
|
14
|
|
|
5
|
|
|
|
|
136
|
|
5
|
5
|
|
|
5
|
|
31
|
use integer; |
|
5
|
|
|
|
|
24
|
|
|
5
|
|
|
|
|
30
|
|
6
|
5
|
|
|
5
|
|
1142
|
use MARC::Record; |
|
5
|
|
|
|
|
7942
|
|
|
5
|
|
|
|
|
191
|
|
7
|
5
|
|
|
5
|
|
34
|
use MARC::Field; |
|
5
|
|
|
|
|
9
|
|
|
5
|
|
|
|
|
133
|
|
8
|
|
|
|
|
|
|
|
9
|
5
|
|
|
5
|
|
1691
|
use MARC::Lint::CodeData qw(%GeogAreaCodes %ObsoleteGeogAreaCodes %LanguageCodes %ObsoleteLanguageCodes); |
|
5
|
|
|
|
|
17
|
|
|
5
|
|
|
|
|
4679
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
our $VERSION = 1.52; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=head1 NAME |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
MARC::Lint - Perl extension for checking validity of MARC records |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 SYNOPSIS |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
use MARC::File::USMARC; |
20
|
|
|
|
|
|
|
use MARC::Lint; |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
my $lint = new MARC::Lint; |
23
|
|
|
|
|
|
|
my $filename = shift; |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
my $file = MARC::File::USMARC->in( $filename ); |
26
|
|
|
|
|
|
|
while ( my $marc = $file->next() ) { |
27
|
|
|
|
|
|
|
$lint->check_record( $marc ); |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
# Print the title tag |
30
|
|
|
|
|
|
|
print $marc->title, "\n"; |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
# Print the errors that were found |
33
|
|
|
|
|
|
|
print join( "\n", $lint->warnings ), "\n"; |
34
|
|
|
|
|
|
|
} # while |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
Given the following MARC record: |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
LDR 00000nam 22002538a 4500 |
39
|
|
|
|
|
|
|
040 _aMdSSJTT |
40
|
|
|
|
|
|
|
_cMdSSJTT |
41
|
|
|
|
|
|
|
040 _aMdSSJTT |
42
|
|
|
|
|
|
|
_beng |
43
|
|
|
|
|
|
|
_cMdSSJTT |
44
|
|
|
|
|
|
|
100 14 _aWall, Larry. |
45
|
|
|
|
|
|
|
110 1 _aO'Reilly & Associates. |
46
|
|
|
|
|
|
|
245 90 _aProgramming Perl / |
47
|
|
|
|
|
|
|
_aBig Book of Perl / |
48
|
|
|
|
|
|
|
_cLarry Wall, Tom Christiansen & Jon Orwant. |
49
|
|
|
|
|
|
|
250 _a3rd ed. |
50
|
|
|
|
|
|
|
250 _a3rd ed. |
51
|
|
|
|
|
|
|
260 _aCambridge, Mass. : |
52
|
|
|
|
|
|
|
_bO'Reilly, |
53
|
|
|
|
|
|
|
_r2000. |
54
|
|
|
|
|
|
|
590 4 _aPersonally signed by Larry. |
55
|
|
|
|
|
|
|
856 43 _uhttp://www.perl.com/ |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
the following errors are generated: |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
1XX: Only one 1XX tag is allowed, but I found 2 of them. |
60
|
|
|
|
|
|
|
100: Indicator 2 must be blank but it's "4" |
61
|
|
|
|
|
|
|
245: Indicator 1 must be 0 or 1 but it's "9" |
62
|
|
|
|
|
|
|
245: Subfield _a is not repeatable. |
63
|
|
|
|
|
|
|
040: Field is not repeatable. |
64
|
|
|
|
|
|
|
260: Subfield _r is not allowed. |
65
|
|
|
|
|
|
|
856: Indicator 2 must be blank, 0, 1, 2 or 8 but it's "3" |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head1 DESCRIPTION |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
Module for checking validity of MARC records. 99% of the users will want to do |
70
|
|
|
|
|
|
|
something like is shown in the synopsis. The other intrepid 1% will overload the |
71
|
|
|
|
|
|
|
C module's methods and provide their own special field-level checking. |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
What this means is that if you have certain requirements, such as making sure that |
74
|
|
|
|
|
|
|
all 952 tags have a certain call number in them, you can write a function that |
75
|
|
|
|
|
|
|
checks for that, and still get all the benefits of the MARC::Lint framework. |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=head1 EXPORT |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
None. Everything is done through objects. |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=head1 METHODS |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=head2 new() |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
No parms needed. The C object is little more than a list of warnings |
86
|
|
|
|
|
|
|
and a bunch of rules. |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=cut |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
sub new { |
91
|
6
|
|
|
6
|
1
|
5548
|
my $class = shift; |
92
|
|
|
|
|
|
|
|
93
|
6
|
|
|
|
|
62
|
my $self = { |
94
|
|
|
|
|
|
|
_warnings => [], |
95
|
|
|
|
|
|
|
}; |
96
|
6
|
|
|
|
|
49
|
bless $self, $class; |
97
|
|
|
|
|
|
|
|
98
|
6
|
|
|
|
|
55
|
$self->_read_rules(); |
99
|
|
|
|
|
|
|
|
100
|
6
|
|
|
|
|
39
|
return $self; |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=head2 warnings() |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
Returns a list of warnings found by C and its brethren. |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=cut |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
sub warnings { |
110
|
64
|
|
|
64
|
1
|
1677
|
my $self = shift; |
111
|
|
|
|
|
|
|
|
112
|
64
|
50
|
|
|
|
131
|
return wantarray ? @{$self->{_warnings}} : scalar @{$self->{_warnings}}; |
|
64
|
|
|
|
|
198
|
|
|
0
|
|
|
|
|
0
|
|
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=head2 clear_warnings() |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
Clear the list of warnings for this linter object. It's automatically called |
118
|
|
|
|
|
|
|
when you call C. |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=cut |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
sub clear_warnings { |
123
|
64
|
|
|
64
|
1
|
183
|
my $self = shift; |
124
|
|
|
|
|
|
|
|
125
|
64
|
|
|
|
|
166
|
$self->{_warnings} = []; |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=head2 warn( $str [, $str...] ) |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
Create a warning message, built from strings passed, like a C |
131
|
|
|
|
|
|
|
statement. |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
Typically, you'll leave this to C, but industrious |
134
|
|
|
|
|
|
|
programmers may want to do their own checking as well. |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=cut |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
sub warn { |
139
|
56
|
|
|
56
|
1
|
2038
|
my $self = shift; |
140
|
|
|
|
|
|
|
|
141
|
56
|
|
|
|
|
76
|
push( @{$self->{_warnings}}, join( "", @_ ) ); |
|
56
|
|
|
|
|
179
|
|
142
|
|
|
|
|
|
|
|
143
|
56
|
|
|
|
|
194
|
return; |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=head2 check_record( $marc ) |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
Does all sorts of lint-like checks on the MARC record I<$marc>, |
149
|
|
|
|
|
|
|
both on the record as a whole, and on the individual fields & |
150
|
|
|
|
|
|
|
subfields. |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=cut |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
sub check_record { |
155
|
13
|
|
|
13
|
1
|
38369
|
my $self = shift; |
156
|
13
|
|
|
|
|
28
|
my $marc = shift; |
157
|
|
|
|
|
|
|
|
158
|
13
|
|
|
|
|
46
|
$self->clear_warnings(); |
159
|
|
|
|
|
|
|
|
160
|
13
|
50
|
33
|
|
|
109
|
( (ref $marc) && $marc->isa('MARC::Record') ) |
161
|
|
|
|
|
|
|
or return $self->warn( "Must pass a MARC::Record object to check_record" ); |
162
|
|
|
|
|
|
|
|
163
|
13
|
|
|
|
|
49
|
my @_1xx = $marc->field( "1.." ); |
164
|
13
|
|
|
|
|
1467
|
my $n1xx = scalar @_1xx; |
165
|
13
|
100
|
|
|
|
48
|
if ( $n1xx > 1 ) { |
166
|
1
|
|
|
|
|
7
|
$self->warn( "1XX: Only one 1XX tag is allowed, but I found $n1xx of them." ); |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
|
169
|
13
|
50
|
|
|
|
35
|
if ( not $marc->field( 245 ) ) { |
170
|
0
|
|
|
|
|
0
|
$self->warn( "245: No 245 tag." ); |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
|
174
|
13
|
|
|
|
|
976
|
my %field_seen; |
175
|
13
|
|
|
|
|
27
|
my $rules = $self->{_rules}; |
176
|
13
|
|
|
|
|
43
|
for my $field ( $marc->fields ) { |
177
|
206
|
|
|
|
|
515
|
my $tagno = $field->tag; |
178
|
|
|
|
|
|
|
|
179
|
206
|
|
|
|
|
891
|
my $tagrules = ''; |
180
|
|
|
|
|
|
|
#if 880 field, inherit rules from tagno in subfield _6 |
181
|
206
|
|
|
|
|
517
|
my $is_880 = 0; |
182
|
206
|
100
|
|
|
|
347
|
if ($tagno eq '880') { |
183
|
1
|
|
|
|
|
1
|
$is_880 = 1; |
184
|
1
|
50
|
|
|
|
4
|
if ($field->subfield('6')) { |
185
|
1
|
|
|
|
|
32
|
my $sub6 = $field->subfield('6'); |
186
|
1
|
|
|
|
|
19
|
$tagno = substr($sub6, 0, 3); |
187
|
|
|
|
|
|
|
|
188
|
1
|
50
|
|
|
|
4
|
$tagrules = $rules->{$tagno} or next; |
189
|
|
|
|
|
|
|
#880 is repeatable, but its linked field may not be |
190
|
1
|
50
|
33
|
|
|
10
|
if ( ($tagrules->{'repeatable'} && ( $tagrules->{'repeatable'} eq 'NR' )) && $field_seen{'880.'.$tagno} ) { |
|
|
|
33
|
|
|
|
|
191
|
0
|
|
|
|
|
0
|
$self->warn( "$tagno: Field is not repeatable." ); |
192
|
|
|
|
|
|
|
} #if repeatability |
193
|
|
|
|
|
|
|
} #if subfield 6 present |
194
|
|
|
|
|
|
|
else { |
195
|
0
|
|
|
|
|
0
|
$self->warn( "880: No subfield 6." ); |
196
|
|
|
|
|
|
|
} #else no subfield 6 in 880 field |
197
|
|
|
|
|
|
|
} #if this is 880 field |
198
|
|
|
|
|
|
|
else { |
199
|
205
|
100
|
|
|
|
535
|
$tagrules = $rules->{$tagno} or next; |
200
|
|
|
|
|
|
|
|
201
|
204
|
50
|
66
|
|
|
862
|
if ( ($tagrules->{'repeatable'} && ( $tagrules->{'repeatable'} eq 'NR' )) && $field_seen{$tagno} ) { |
|
|
|
66
|
|
|
|
|
202
|
0
|
|
|
|
|
0
|
$self->warn( "$tagno: Field is not repeatable." ); |
203
|
|
|
|
|
|
|
} #if repeatability |
204
|
|
|
|
|
|
|
} #else not 880 |
205
|
|
|
|
|
|
|
|
206
|
205
|
100
|
|
|
|
453
|
if ( $tagno >= 10 ) { |
|
|
50
|
|
|
|
|
|
207
|
158
|
|
|
|
|
281
|
for my $ind ( 1..2 ) { |
208
|
316
|
|
|
|
|
683
|
my $indvalue = $field->indicator($ind); |
209
|
316
|
100
|
|
|
|
4741
|
if ( not ($indvalue =~ $tagrules->{"ind$ind" . "_regex"}) ) { |
210
|
|
|
|
|
|
|
$self->warn( |
211
|
|
|
|
|
|
|
"$tagno: Indicator $ind must be ", |
212
|
4
|
|
|
|
|
20
|
$tagrules->{"ind$ind" . "_desc"}, |
213
|
|
|
|
|
|
|
" but it's \"$indvalue\"" |
214
|
|
|
|
|
|
|
); |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
} # for |
217
|
|
|
|
|
|
|
|
218
|
158
|
|
|
|
|
253
|
my %sub_seen; |
219
|
158
|
|
|
|
|
365
|
for my $subfield ( $field->subfields ) { |
220
|
262
|
|
|
|
|
3038
|
my ($code,$data) = @$subfield; |
221
|
|
|
|
|
|
|
|
222
|
262
|
|
|
|
|
438
|
my $rule = $tagrules->{$code}; |
223
|
262
|
100
|
66
|
|
|
772
|
if ( not defined $rule ) { |
|
|
100
|
|
|
|
|
|
224
|
2
|
|
|
|
|
7
|
$self->warn( "$tagno: Subfield _$code is not allowed." ); |
225
|
|
|
|
|
|
|
} elsif ( ($rule eq "NR") && $sub_seen{$code} ) { |
226
|
1
|
|
|
|
|
4
|
$self->warn( "$tagno: Subfield _$code is not repeatable." ); |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
|
229
|
262
|
50
|
|
|
|
608
|
if ( $data =~ /[\t\r\n]/ ) { |
230
|
0
|
|
|
|
|
0
|
$self->warn( "$tagno: Subfield _$code has an invalid control character" ); |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
|
233
|
262
|
|
|
|
|
615
|
++$sub_seen{$code}; |
234
|
|
|
|
|
|
|
} # for $subfields |
235
|
|
|
|
|
|
|
} # if $tagno >= 10 |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
elsif ($tagno < 10) { |
238
|
|
|
|
|
|
|
#check for subfield characters |
239
|
47
|
100
|
|
|
|
101
|
if ($field->data() =~ /\x1F/) { |
240
|
1
|
|
|
|
|
13
|
$self->warn( "$tagno: Subfields are not allowed in fields lower than 010" ); |
241
|
|
|
|
|
|
|
} #if control field has subfield delimiter |
242
|
|
|
|
|
|
|
} #elsif $tagno < 10 |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
# Check to see if a check_xxx() function exists, and call it on the field if it does |
245
|
205
|
|
|
|
|
822
|
my $checker = "check_$tagno"; |
246
|
205
|
100
|
|
|
|
1639
|
if ( $self->can( $checker ) ) { |
247
|
25
|
|
|
|
|
77
|
$self->$checker( $field ); |
248
|
|
|
|
|
|
|
} |
249
|
|
|
|
|
|
|
|
250
|
205
|
100
|
|
|
|
3434
|
if ($is_880) { |
251
|
1
|
|
|
|
|
3
|
++$field_seen{'880.'.$tagno}; |
252
|
|
|
|
|
|
|
} #if 880 field |
253
|
|
|
|
|
|
|
else { |
254
|
204
|
|
|
|
|
436
|
++$field_seen{$tagno}; |
255
|
|
|
|
|
|
|
} |
256
|
|
|
|
|
|
|
} # for my $fields |
257
|
|
|
|
|
|
|
|
258
|
13
|
|
|
|
|
57
|
return; |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
=head2 check_I( $field ) |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
Various functions to check the different fields. If the function doesn't exist, |
264
|
|
|
|
|
|
|
then it doesn't get checked. |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
=head2 check_020() |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
Looks at 020$a and reports errors if the check digit is wrong. |
269
|
|
|
|
|
|
|
Looks at 020$z and validates number if hyphens are present. |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
Uses Business::ISBN to do validation. Thirteen digit checking is currently done |
272
|
|
|
|
|
|
|
with the internal sub _isbn13_check_digit(), based on code from Business::ISBN. |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
TO DO (check_020): |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
Fix 13-digit ISBN checking. |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=cut |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
sub check_020 { |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
|
283
|
5
|
|
|
5
|
|
2087
|
use Business::ISBN; |
|
5
|
|
|
|
|
206432
|
|
|
5
|
|
|
|
|
15106
|
|
284
|
|
|
|
|
|
|
|
285
|
22
|
|
|
22
|
1
|
19306
|
my $self = shift; |
286
|
22
|
|
|
|
|
53
|
my $field = shift; |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
################################################### |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
# break subfields into code-data array and validate data |
291
|
|
|
|
|
|
|
|
292
|
22
|
|
|
|
|
69
|
my @subfields = $field->subfields(); |
293
|
|
|
|
|
|
|
|
294
|
22
|
|
|
|
|
510
|
while (my $subfield = pop(@subfields)) { |
295
|
22
|
|
|
|
|
58
|
my ($code, $data) = @$subfield; |
296
|
22
|
|
|
|
|
46
|
my $isbnno = $data; |
297
|
|
|
|
|
|
|
#remove any hyphens |
298
|
22
|
|
|
|
|
80
|
$isbnno =~ s/\-//g; |
299
|
|
|
|
|
|
|
#remove nondigits |
300
|
22
|
|
|
|
|
163
|
$isbnno =~ s/^\D*(\d{9,12}[X\d])\b.*$/$1/; |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
#report error if this is subfield 'a' |
303
|
|
|
|
|
|
|
#and the first 10 or 13 characters are not a match for $isbnno |
304
|
22
|
100
|
|
|
|
78
|
if ($code eq 'a') { |
|
|
50
|
|
|
|
|
|
305
|
21
|
100
|
|
|
|
82
|
if ((substr($data,0,length($isbnno)) ne $isbnno)) { |
306
|
2
|
|
|
|
|
12
|
$self->warn( "020: Subfield a may have invalid characters."); |
307
|
|
|
|
|
|
|
} #if first characters don't match |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
#report error if no space precedes a qualifier in subfield a |
310
|
21
|
100
|
|
|
|
79
|
if ($data =~ /\(/) { |
311
|
8
|
100
|
|
|
|
52
|
$self->warn( "020: Subfield a qualifier must be preceded by space, $data.") unless ($data =~ /[X0-9] \(/); |
312
|
|
|
|
|
|
|
} #if data has parenthetical qualifier |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
#report error if unable to find 10-13 digit string of digits in subfield 'a' |
315
|
21
|
100
|
|
|
|
98
|
if (($isbnno !~ /(?:^\d{10}$)|(?:^\d{13}$)|(?:^\d{9}X$)/)) { |
316
|
3
|
|
|
|
|
22
|
$self->warn( "020: Subfield a has the wrong number of digits, $data."); |
317
|
|
|
|
|
|
|
} # if subfield 'a' but not 10 or 13 digit isbn |
318
|
|
|
|
|
|
|
#otherwise, check 10 and 13 digit checksums for validity |
319
|
|
|
|
|
|
|
else { |
320
|
18
|
100
|
|
|
|
70
|
if ((length ($isbnno) == 10)) { |
|
|
50
|
|
|
|
|
|
321
|
|
|
|
|
|
|
|
322
|
16
|
50
|
33
|
|
|
67
|
if (($Business::ISBN::VERSION gt '2.02_01') || ($Business::ISBN::VERSION gt '2.009')) { |
|
|
0
|
|
|
|
|
|
323
|
16
|
100
|
|
|
|
82
|
$self->warn( "020: Subfield a has bad checksum, $data." ) if (Business::ISBN::valid_isbn_checksum($isbnno) != 1); |
324
|
|
|
|
|
|
|
} #if Business::ISBN version higher than 2.02_01 or 2.009 |
325
|
|
|
|
|
|
|
elsif ($Business::ISBN::VERSION lt '2') { |
326
|
0
|
0
|
|
|
|
0
|
$self->warn( "020: Subfield a has bad checksum, $data." ) if (Business::ISBN::is_valid_checksum($isbnno) != 1); |
327
|
|
|
|
|
|
|
} #elsif Business::ISBN version lower than 2 |
328
|
|
|
|
|
|
|
else { |
329
|
0
|
|
|
|
|
0
|
$self->warn( "Business::ISBN version must be below 2 or above 2.02_02 or 2.009." ); |
330
|
|
|
|
|
|
|
} #else Business::ISBN version between 2 and 2.02_02 |
331
|
|
|
|
|
|
|
} #if 10 digit ISBN has invalid check digit |
332
|
|
|
|
|
|
|
# do validation check for 13 digit isbn |
333
|
|
|
|
|
|
|
######################################### |
334
|
|
|
|
|
|
|
### Not yet fully implemented ########### |
335
|
|
|
|
|
|
|
######################################### |
336
|
|
|
|
|
|
|
elsif (length($isbnno) == 13){ |
337
|
|
|
|
|
|
|
#change line below once Business::ISBN handles 13-digit ISBNs |
338
|
2
|
|
|
|
|
9
|
my $is_valid_13 = _isbn13_check_digit($isbnno); |
339
|
2
|
100
|
|
|
|
20
|
$self->warn( "020: Subfield a has bad checksum (13 digit), $data.") unless ($is_valid_13 == 1); |
340
|
|
|
|
|
|
|
} #elsif 13 digit ISBN has invalid check digit |
341
|
|
|
|
|
|
|
################################################### |
342
|
|
|
|
|
|
|
} #else subfield 'a' has 10 or 13 digits |
343
|
|
|
|
|
|
|
} #if subfield 'a' |
344
|
|
|
|
|
|
|
#look for valid isbn in 020$z |
345
|
|
|
|
|
|
|
elsif ($code eq 'z') { |
346
|
1
|
50
|
33
|
|
|
17
|
if (($data =~ /^ISBN/) || ($data =~ /^\d*\-\d+/)){ |
347
|
|
|
|
|
|
|
################################################## |
348
|
|
|
|
|
|
|
## Turned on for now--Comment to unimplement #### |
349
|
|
|
|
|
|
|
################################################## |
350
|
0
|
0
|
0
|
|
|
0
|
$self->warn( "020: Subfield z is numerically valid.") if ((length ($isbnno) == 10) && (Business::ISBN::is_valid_checksum($isbnno) == 1)); |
351
|
|
|
|
|
|
|
} #if 10 digit ISBN has invalid check digit |
352
|
|
|
|
|
|
|
} #elsif subfield 'z' |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
} # while @subfields |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
} #check_020 |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
=head2 _isbn13_check_digit($ean) |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
Internal sub to determine if 13-digit ISBN has a valid checksum. The code is |
361
|
|
|
|
|
|
|
taken from Business::ISBN::as_ean. It is expected to be temporary until |
362
|
|
|
|
|
|
|
Business::ISBN is updated to check 13-digit ISBNs itself. |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
=cut |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
sub _isbn13_check_digit { |
367
|
|
|
|
|
|
|
|
368
|
2
|
|
|
2
|
|
6
|
my $ean = shift; |
369
|
|
|
|
|
|
|
#remove and store current check digit |
370
|
2
|
|
|
|
|
8
|
my $check_digit = chop($ean); |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
#calculate valid checksum |
373
|
2
|
|
|
|
|
7
|
my $sum = 0; |
374
|
2
|
|
|
|
|
8
|
foreach my $index ( 0, 2, 4, 6, 8, 10 ) |
375
|
|
|
|
|
|
|
{ |
376
|
12
|
|
|
|
|
37
|
$sum += substr($ean, $index, 1); |
377
|
12
|
|
|
|
|
31
|
$sum += 3 * substr($ean, $index + 1, 1); |
378
|
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
#take the next higher multiple of 10 and subtract the sum. |
381
|
|
|
|
|
|
|
#if $sum is 37, the next highest multiple of ten is 40. the |
382
|
|
|
|
|
|
|
#check digit would be 40 - 37 => 3. |
383
|
2
|
|
|
|
|
9
|
my $valid_check_digit = ( 10 * ( int( $sum / 10 ) + 1 ) - $sum ) % 10; |
384
|
|
|
|
|
|
|
|
385
|
2
|
100
|
|
|
|
10
|
return $check_digit == $valid_check_digit ? 1 : 0; |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
} # _isbn13_check_digit |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
######################################### |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
=head2 check_041( $field ) |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
Warns if subfields are not evenly divisible by 3 unless second indicator is 7 |
394
|
|
|
|
|
|
|
(future implementation would ensure that each subfield is exactly 3 characters |
395
|
|
|
|
|
|
|
unless ind2 is 7--since subfields are now repeatable. This is not implemented |
396
|
|
|
|
|
|
|
here due to the large number of records needing to be corrected.). Validates |
397
|
|
|
|
|
|
|
against the MARC Code List for Languages (L) using the |
398
|
|
|
|
|
|
|
MARC::Lint::CodeData data pack to MARC::Lint (%LanguageCodes, |
399
|
|
|
|
|
|
|
%ObsoleteLanguageCodes). |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
=cut |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
sub check_041 { |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
|
406
|
3
|
|
|
3
|
1
|
2983
|
my $self = shift; |
407
|
3
|
|
|
|
|
11
|
my $field = shift; |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
# break subfields into code-data array (so the entire field is in one array) |
410
|
|
|
|
|
|
|
|
411
|
3
|
|
|
|
|
11
|
my @subfields = $field->subfields(); |
412
|
3
|
|
|
|
|
59
|
my @newsubfields = (); |
413
|
|
|
|
|
|
|
|
414
|
3
|
|
|
|
|
10
|
while (my $subfield = pop(@subfields)) { |
415
|
7
|
|
|
|
|
30
|
my ($code, $data) = @$subfield; |
416
|
7
|
|
|
|
|
28
|
unshift (@newsubfields, $code, $data); |
417
|
|
|
|
|
|
|
} # while |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
#warn if length of each subfield is not divisible by 3 unless ind2 is 7 |
420
|
3
|
50
|
|
|
|
11
|
unless ($field->indicator(2) eq '7') { |
421
|
3
|
|
|
|
|
45
|
for (my $index = 0; $index <=$#newsubfields; $index+=2) { |
422
|
7
|
100
|
|
|
|
22
|
if (length ($newsubfields[$index+1]) %3 != 0) { |
423
|
3
|
|
|
|
|
14
|
$self->warn( "041: Subfield _$newsubfields[$index] must be evenly divisible by 3 or exactly three characters if ind2 is not 7, ($newsubfields[$index+1])." ); |
424
|
|
|
|
|
|
|
} #if field length not divisible evenly by 3 |
425
|
|
|
|
|
|
|
############################################## |
426
|
|
|
|
|
|
|
# validation against code list data |
427
|
|
|
|
|
|
|
## each subfield has a multiple of 3 chars |
428
|
|
|
|
|
|
|
# need to look at each group of 3 characters |
429
|
|
|
|
|
|
|
else { |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
#break each character of the subfield into an array position |
432
|
4
|
|
|
|
|
17
|
my @codechars = split '', $newsubfields[$index+1]; |
433
|
|
|
|
|
|
|
|
434
|
4
|
|
|
|
|
6
|
my $pos = 0; |
435
|
|
|
|
|
|
|
#store each 3 char code in a slot of @codes041 |
436
|
4
|
|
|
|
|
8
|
my @codes041 = (); |
437
|
4
|
|
|
|
|
11
|
while ($pos <= $#codechars) { |
438
|
6
|
|
|
|
|
21
|
push @codes041, (join '', @codechars[$pos..$pos+2]); |
439
|
6
|
|
|
|
|
16
|
$pos += 3; |
440
|
|
|
|
|
|
|
} |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
|
443
|
4
|
|
|
|
|
10
|
foreach my $code041 (@codes041) { |
444
|
|
|
|
|
|
|
#see if language code matches valid code |
445
|
6
|
50
|
|
|
|
15
|
my $validlang = $LanguageCodes{$code041} ? 1 : 0; |
446
|
|
|
|
|
|
|
#look for invalid code match if valid code was not matched |
447
|
6
|
100
|
|
|
|
14
|
my $obsoletelang = $ObsoleteLanguageCodes{$code041} ? 1 : 0; |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
# skip valid subfields |
450
|
6
|
50
|
|
|
|
14
|
unless ($validlang) { |
451
|
|
|
|
|
|
|
#report invalid matches as possible obsolete codes |
452
|
6
|
100
|
|
|
|
12
|
if ($obsoletelang) { |
453
|
1
|
|
|
|
|
7
|
$self->warn( "041: Subfield _$newsubfields[$index], $newsubfields[$index+1], may be obsolete."); |
454
|
|
|
|
|
|
|
} |
455
|
|
|
|
|
|
|
else { |
456
|
5
|
|
|
|
|
22
|
$self->warn( "041: Subfield _$newsubfields[$index], $newsubfields[$index+1] ($code041), is not valid."); |
457
|
|
|
|
|
|
|
} #else code not found |
458
|
|
|
|
|
|
|
} # unless found valid code |
459
|
|
|
|
|
|
|
} #foreach code in 041 |
460
|
|
|
|
|
|
|
} # else subfield has multiple of 3 chars |
461
|
|
|
|
|
|
|
############################################## |
462
|
|
|
|
|
|
|
} # foreach subfield |
463
|
|
|
|
|
|
|
} #unless ind2 is 7 |
464
|
|
|
|
|
|
|
} #check_041 |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
=head2 check_043( $field ) |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
Warns if each subfield a is not exactly 7 characters. Validates each code |
469
|
|
|
|
|
|
|
against the MARC code list for Geographic Areas (L) |
470
|
|
|
|
|
|
|
using the MARC::Lint::CodeData data pack to MARC::Lint (%GeogAreaCodes, |
471
|
|
|
|
|
|
|
%ObsoleteGeogAreaCodes). |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
=cut |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
sub check_043 { |
476
|
|
|
|
|
|
|
|
477
|
2
|
|
|
2
|
1
|
2369
|
my $self = shift; |
478
|
2
|
|
|
|
|
5
|
my $field = shift; |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
# break subfields into code-data array (so the entire field is in one array) |
481
|
|
|
|
|
|
|
|
482
|
2
|
|
|
|
|
6
|
my @subfields = $field->subfields(); |
483
|
2
|
|
|
|
|
36
|
my @newsubfields = (); |
484
|
|
|
|
|
|
|
|
485
|
2
|
|
|
|
|
9
|
while (my $subfield = pop(@subfields)) { |
486
|
5
|
|
|
|
|
10
|
my ($code, $data) = @$subfield; |
487
|
5
|
|
|
|
|
18
|
unshift (@newsubfields, $code, $data); |
488
|
|
|
|
|
|
|
} # while |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
#warn if length of subfield a is not exactly 7 |
491
|
2
|
|
|
|
|
11
|
for (my $index = 0; $index <=$#newsubfields; $index+=2) { |
492
|
5
|
100
|
66
|
|
|
38
|
if (($newsubfields[$index] eq 'a') && (length ($newsubfields[$index+1]) != 7)) { |
|
|
50
|
|
|
|
|
|
493
|
2
|
|
|
|
|
7
|
$self->warn( "043: Subfield _a must be exactly 7 characters, $newsubfields[$index+1]" ); |
494
|
|
|
|
|
|
|
} # if suba and length is not 7 |
495
|
|
|
|
|
|
|
#check against code list for geographic areas. |
496
|
|
|
|
|
|
|
elsif ($newsubfields[$index] eq 'a') { |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
#see if geog area code matches valid code |
499
|
3
|
50
|
|
|
|
17
|
my $validgac = $GeogAreaCodes{$newsubfields[$index+1]} ? 1 : 0; |
500
|
|
|
|
|
|
|
#look for obsolete code match if valid code was not matched |
501
|
3
|
100
|
|
|
|
10
|
my $obsoletegac = $ObsoleteGeogAreaCodes{$newsubfields[$index+1]} ? 1 : 0; |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
# skip valid subfields |
504
|
3
|
50
|
|
|
|
9
|
unless ($validgac) { |
505
|
|
|
|
|
|
|
#report invalid matches as possible obsolete codes |
506
|
3
|
100
|
|
|
|
7
|
if ($obsoletegac) { |
507
|
1
|
|
|
|
|
4
|
$self->warn( "043: Subfield _a, $newsubfields[$index+1], may be obsolete."); |
508
|
|
|
|
|
|
|
} |
509
|
|
|
|
|
|
|
else { |
510
|
2
|
|
|
|
|
11
|
$self->warn( "043: Subfield _a, $newsubfields[$index+1], is not valid."); |
511
|
|
|
|
|
|
|
} #else code not found |
512
|
|
|
|
|
|
|
} # unless found valid code |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
} #elsif suba |
515
|
|
|
|
|
|
|
} #foreach subfield |
516
|
|
|
|
|
|
|
} #check_043 |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
=head2 check_245( $field ) |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
-Makes sure $a exists (and is first subfield). |
521
|
|
|
|
|
|
|
-Warns if last character of field is not a period |
522
|
|
|
|
|
|
|
--Follows LCRI 1.0C, Nov. 2003 rather than MARC21 rule |
523
|
|
|
|
|
|
|
-Verifies that $c is preceded by / (space-/) |
524
|
|
|
|
|
|
|
-Verifies that initials in $c are not spaced |
525
|
|
|
|
|
|
|
-Verifies that $b is preceded by :;= (space-colon, space-semicolon, space-equals) |
526
|
|
|
|
|
|
|
-Verifies that $h is not preceded by space unless it is dash-space |
527
|
|
|
|
|
|
|
-Verifies that data of $h is enclosed in square brackets |
528
|
|
|
|
|
|
|
-Verifies that $n is preceded by . (period) |
529
|
|
|
|
|
|
|
--As part of that, looks for no-space period, or dash-space-period (for replaced elipses) |
530
|
|
|
|
|
|
|
-Verifies that $p is preceded by , (no-space-comma) when following $n and . (period) when following other subfields. |
531
|
|
|
|
|
|
|
-Performs rudimentary article check of 245 2nd indicator vs. 1st word of 245$a (for manual verification). |
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
Article checking is done by internal _check_article method, which should work for 130, 240, 245, 440, 630, 730, and 830. |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
=cut |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
sub check_245 { |
538
|
|
|
|
|
|
|
|
539
|
49
|
|
|
49
|
1
|
22624
|
my $self = shift; |
540
|
49
|
|
|
|
|
69
|
my $field = shift; |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
#set tagno for reporting |
543
|
49
|
|
|
|
|
68
|
my $tagno = '245'; |
544
|
|
|
|
|
|
|
|
545
|
49
|
100
|
|
|
|
111
|
if ( not $field->subfield( "a" ) ) { |
546
|
1
|
|
|
|
|
22
|
$self->warn( "245: Must have a subfield _a." ); |
547
|
|
|
|
|
|
|
} |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
# break subfields into code-data array (so the entire field is in one array) |
550
|
|
|
|
|
|
|
|
551
|
49
|
|
|
|
|
1065
|
my @subfields = $field->subfields(); |
552
|
49
|
|
|
|
|
770
|
my @newsubfields = (); |
553
|
49
|
|
|
|
|
65
|
my $has_sub_6 = 0; |
554
|
|
|
|
|
|
|
|
555
|
49
|
|
|
|
|
118
|
while (my $subfield = pop(@subfields)) { |
556
|
90
|
|
|
|
|
147
|
my ($code, $data) = @$subfield; |
557
|
|
|
|
|
|
|
#check for subfield 6 being present |
558
|
90
|
100
|
|
|
|
161
|
$has_sub_6 = 1 if ($code eq '6'); |
559
|
90
|
|
|
|
|
285
|
unshift (@newsubfields, $code, $data); |
560
|
|
|
|
|
|
|
} # while |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
# 245 must end in period (may want to make this less restrictive by allowing trailing spaces) |
563
|
|
|
|
|
|
|
#do 2 checks--for final punctuation (MARC21 rule), and for period (LCRI 1.0C, Nov. 2003; LCPS 1.7.1) |
564
|
49
|
100
|
|
|
|
271
|
if ($newsubfields[$#newsubfields] !~ /[.?!]$/) { |
|
|
100
|
|
|
|
|
|
565
|
1
|
|
|
|
|
4
|
$self->warn ( "245: Must end with . (period)."); |
566
|
|
|
|
|
|
|
} |
567
|
|
|
|
|
|
|
elsif($newsubfields[$#newsubfields] =~ /[?!]$/) { |
568
|
2
|
|
|
|
|
5
|
$self->warn ( "245: MARC21 allows ? or ! as final punctuation but LCRI 1.0C, Nov. 2003 (LCPS 1.7.1 for RDA records), requires period."); |
569
|
|
|
|
|
|
|
} |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
##Check for first subfield |
572
|
|
|
|
|
|
|
#subfield a should be first subfield (or 2nd if subfield '6' is present) |
573
|
49
|
100
|
|
|
|
93
|
if ($has_sub_6) { |
574
|
|
|
|
|
|
|
#make sure there are at least 2 subfields |
575
|
2
|
50
|
|
|
|
6
|
if ($#newsubfields < 3) { |
576
|
0
|
|
|
|
|
0
|
$self->warn ("$tagno: May have too few subfields."); |
577
|
|
|
|
|
|
|
} #if fewer than 2 subfields |
578
|
|
|
|
|
|
|
else { |
579
|
2
|
50
|
|
|
|
5
|
if ($newsubfields[0] ne '6') { |
580
|
0
|
|
|
|
|
0
|
$self->warn ( "$tagno: First subfield must be _6, but it is $newsubfields[0]"); |
581
|
|
|
|
|
|
|
} #if 1st subfield not '6' |
582
|
2
|
50
|
|
|
|
7
|
if ($newsubfields[2] ne 'a') { |
583
|
0
|
|
|
|
|
0
|
$self->warn ( "$tagno: First subfield after subfield _6 must be _a, but it is _$newsubfields[2]"); |
584
|
|
|
|
|
|
|
} #if 2nd subfield not 'a' |
585
|
|
|
|
|
|
|
} #else at least 2 subfields |
586
|
|
|
|
|
|
|
} #if has subfield 6 |
587
|
|
|
|
|
|
|
else { |
588
|
|
|
|
|
|
|
#1st subfield must be 'a' |
589
|
47
|
100
|
|
|
|
97
|
if ($newsubfields[0] ne 'a') { |
590
|
1
|
|
|
|
|
4
|
$self->warn ( "$tagno: First subfield must be _a, but it is _$newsubfields[0]"); |
591
|
|
|
|
|
|
|
} #if 2nd subfield not 'a' |
592
|
|
|
|
|
|
|
} #else no subfield _6 |
593
|
|
|
|
|
|
|
##End check for first subfield |
594
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
#subfield c, if present, must be preceded by / |
596
|
|
|
|
|
|
|
#also look for space between initials |
597
|
49
|
100
|
|
|
|
988
|
if ($field->subfield("c")) { |
598
|
|
|
|
|
|
|
|
599
|
14
|
|
|
|
|
368
|
for (my $index = 2; $index <=$#newsubfields; $index+=2) { |
600
|
|
|
|
|
|
|
# 245 subfield c must be preceded by / (space-/) |
601
|
17
|
100
|
|
|
|
45
|
if ($newsubfields[$index] eq 'c') { |
602
|
14
|
100
|
|
|
|
71
|
$self->warn ( "245: Subfield _c must be preceded by /") if ($newsubfields[$index-1] !~ /\s\/$/); |
603
|
|
|
|
|
|
|
# 245 subfield c initials should not have space |
604
|
14
|
100
|
66
|
|
|
62
|
$self->warn ( "245: Subfield _c initials should not have a space.") if (($newsubfields[$index+1] =~ /\b\w\. \b\w\./) && ($newsubfields[$index+1] !~ /\[\bi\.e\. \b\w\..*\]/)); |
605
|
14
|
|
|
|
|
31
|
last; |
606
|
|
|
|
|
|
|
} #if |
607
|
|
|
|
|
|
|
} #for |
608
|
|
|
|
|
|
|
} # subfield c exists |
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
#each subfield b, if present, should be preceded by :;= (colon, semicolon, or equals sign) |
611
|
|
|
|
|
|
|
### Are there others? ### |
612
|
49
|
100
|
|
|
|
657
|
if ($field->subfield("b")) { |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
# 245 subfield b should be preceded by space-:;= (colon, semicolon, or equals sign) |
615
|
13
|
|
|
|
|
267
|
for (my $index = 2; $index <=$#newsubfields; $index+=2) { |
616
|
|
|
|
|
|
|
#report error if subfield 'b' is not preceded by space-:;= (colon, semicolon, or equals sign) |
617
|
16
|
100
|
100
|
|
|
91
|
if (($newsubfields[$index] eq 'b') && ($newsubfields[$index-1] !~ / [:;=]$/)) { |
618
|
4
|
|
|
|
|
9
|
$self->warn ( "245: Subfield _b should be preceded by space-colon, space-semicolon, or space-equals sign."); |
619
|
|
|
|
|
|
|
} #if |
620
|
|
|
|
|
|
|
} #for |
621
|
|
|
|
|
|
|
} # subfield b exists |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
#each subfield h, if present, should be preceded by non-space |
625
|
49
|
100
|
|
|
|
685
|
if ($field->subfield("h")) { |
626
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
# 245 subfield h should not be preceded by space |
628
|
4
|
|
|
|
|
83
|
for (my $index = 2; $index <=$#newsubfields; $index+=2) { |
629
|
|
|
|
|
|
|
#report error if subfield 'h' is preceded by space (unless dash-space) |
630
|
6
|
100
|
100
|
|
|
40
|
if (($newsubfields[$index] eq 'h') && ($newsubfields[$index-1] !~ /(\S$)|(\-\- $)/)) { |
631
|
1
|
|
|
|
|
4
|
$self->warn ( "245: Subfield _h should not be preceded by space."); |
632
|
|
|
|
|
|
|
} #if h and not preceded by no-space (unless dash) |
633
|
|
|
|
|
|
|
#report error if subfield 'h' does not start with open square bracket with a matching close bracket |
634
|
|
|
|
|
|
|
##could have check against list of valid values here |
635
|
6
|
100
|
100
|
|
|
35
|
if (($newsubfields[$index] eq 'h') && ($newsubfields[$index+1] !~ /^\[\w*\s*\w*\]/)) { |
636
|
1
|
|
|
|
|
6
|
$self->warn ( "245: Subfield _h must have matching square brackets, $newsubfields[$index]."); |
637
|
|
|
|
|
|
|
} |
638
|
|
|
|
|
|
|
} #for |
639
|
|
|
|
|
|
|
} # subfield h exists |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
#each subfield n, if present, must be preceded by . (period) |
642
|
49
|
100
|
|
|
|
782
|
if ($field->subfield("n")) { |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
# 245 subfield n must be preceded by . (period) |
645
|
4
|
|
|
|
|
74
|
for (my $index = 2; $index <=$#newsubfields; $index+=2) { |
646
|
|
|
|
|
|
|
#report error if subfield 'n' is not preceded by non-space-period or dash-space-period |
647
|
6
|
100
|
100
|
|
|
40
|
if (($newsubfields[$index] eq 'n') && ($newsubfields[$index-1] !~ /(\S\.$)|(\-\- \.$)/)) { |
648
|
1
|
|
|
|
|
3
|
$self->warn ( "245: Subfield _n must be preceded by . (period)."); |
649
|
|
|
|
|
|
|
} #if |
650
|
|
|
|
|
|
|
} #for |
651
|
|
|
|
|
|
|
} # subfield n exists |
652
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
#each subfield p, if present, must be preceded by a , (no-space-comma) if it follows subfield n, or by . (no-space-period or dash-space-period) following other subfields |
654
|
49
|
100
|
|
|
|
814
|
if ($field->subfield("p")) { |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
# 245 subfield p must be preceded by . (period) or , (comma) |
657
|
4
|
|
|
|
|
74
|
for (my $index = 2; $index <=$#newsubfields; $index+=2) { |
658
|
|
|
|
|
|
|
#only looking for subfield p |
659
|
6
|
100
|
|
|
|
14
|
if ($newsubfields[$index] eq 'p') { |
660
|
|
|
|
|
|
|
# case for subfield 'n' being field before this one (allows dash-space-comma) |
661
|
4
|
100
|
100
|
|
|
53
|
if (($newsubfields[$index-2] eq 'n') && ($newsubfields[$index-1] !~ /(\S,$)|(\-\- ,$)/)) { |
|
|
100
|
100
|
|
|
|
|
662
|
1
|
|
|
|
|
5
|
$self->warn ( "245: Subfield _p must be preceded by , (comma) when it follows subfield _n."); |
663
|
|
|
|
|
|
|
} #if subfield n precedes this one |
664
|
|
|
|
|
|
|
# elsif case for subfield before this one is not n |
665
|
|
|
|
|
|
|
elsif (($newsubfields[$index-2] ne 'n') && ($newsubfields[$index-1] !~ /(\S\.$)|(\-\- \.$)/)) { |
666
|
1
|
|
|
|
|
3
|
$self->warn ( "245: Subfield _p must be preceded by . (period) when it follows a subfield other than _n."); |
667
|
|
|
|
|
|
|
} #elsif subfield p preceded by non-period when following a non-subfield 'n' |
668
|
|
|
|
|
|
|
} #if index is looking at subfield p |
669
|
|
|
|
|
|
|
} #for |
670
|
|
|
|
|
|
|
} # subfield p exists |
671
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
###################################### |
673
|
|
|
|
|
|
|
#check for invalid 2nd indicator |
674
|
49
|
|
|
|
|
795
|
$self->_check_article($field); |
675
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
} # check_245 |
677
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
############ |
682
|
|
|
|
|
|
|
# Internal # |
683
|
|
|
|
|
|
|
############ |
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
=head2 _check_article |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
Check of articles is based on code from Ian Hamilton. This version is more |
688
|
|
|
|
|
|
|
limited in that it focuses on English, Spanish, French, Italian and German |
689
|
|
|
|
|
|
|
articles. Certain possible articles have been removed if they are valid English |
690
|
|
|
|
|
|
|
non-articles. This version also disregards 008_language/041 codes and just uses |
691
|
|
|
|
|
|
|
the list of articles to provide warnings/suggestions. |
692
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
source for articles = L |
694
|
|
|
|
|
|
|
|
695
|
|
|
|
|
|
|
Should work with fields 130, 240, 245, 440, 630, 730, and 830. Reports error if |
696
|
|
|
|
|
|
|
another field is passed in. |
697
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
=cut |
699
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
sub _check_article { |
701
|
|
|
|
|
|
|
|
702
|
49
|
|
|
49
|
|
60
|
my $self = shift; |
703
|
49
|
|
|
|
|
60
|
my $field = shift; |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
#add articles here as needed |
706
|
|
|
|
|
|
|
##Some omitted due to similarity with valid words (e.g. the German 'die'). |
707
|
49
|
|
|
|
|
636
|
my %article = ( |
708
|
|
|
|
|
|
|
'a' => 'eng glg hun por', |
709
|
|
|
|
|
|
|
'an' => 'eng', |
710
|
|
|
|
|
|
|
'das' => 'ger', |
711
|
|
|
|
|
|
|
'dem' => 'ger', |
712
|
|
|
|
|
|
|
'der' => 'ger', |
713
|
|
|
|
|
|
|
'ein' => 'ger', |
714
|
|
|
|
|
|
|
'eine' => 'ger', |
715
|
|
|
|
|
|
|
'einem' => 'ger', |
716
|
|
|
|
|
|
|
'einen' => 'ger', |
717
|
|
|
|
|
|
|
'einer' => 'ger', |
718
|
|
|
|
|
|
|
'eines' => 'ger', |
719
|
|
|
|
|
|
|
'el' => 'spa', |
720
|
|
|
|
|
|
|
'en' => 'cat dan nor swe', |
721
|
|
|
|
|
|
|
'gl' => 'ita', |
722
|
|
|
|
|
|
|
'gli' => 'ita', |
723
|
|
|
|
|
|
|
'il' => 'ita mlt', |
724
|
|
|
|
|
|
|
'l' => 'cat fre ita mlt', |
725
|
|
|
|
|
|
|
'la' => 'cat fre ita spa', |
726
|
|
|
|
|
|
|
'las' => 'spa', |
727
|
|
|
|
|
|
|
'le' => 'fre ita', |
728
|
|
|
|
|
|
|
'les' => 'cat fre', |
729
|
|
|
|
|
|
|
'lo' => 'ita spa', |
730
|
|
|
|
|
|
|
'los' => 'spa', |
731
|
|
|
|
|
|
|
'os' => 'por', |
732
|
|
|
|
|
|
|
'the' => 'eng', |
733
|
|
|
|
|
|
|
'um' => 'por', |
734
|
|
|
|
|
|
|
'uma' => 'por', |
735
|
|
|
|
|
|
|
'un' => 'cat spa fre ita', |
736
|
|
|
|
|
|
|
'una' => 'cat spa ita', |
737
|
|
|
|
|
|
|
'une' => 'fre', |
738
|
|
|
|
|
|
|
'uno' => 'ita', |
739
|
|
|
|
|
|
|
); |
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
#add exceptions here as needed |
742
|
|
|
|
|
|
|
# may want to make keys lowercase |
743
|
49
|
|
|
|
|
451
|
my %exceptions = ( |
744
|
|
|
|
|
|
|
'A & E' => 1, |
745
|
|
|
|
|
|
|
'A & ' => 1, |
746
|
|
|
|
|
|
|
'A-' => 1, |
747
|
|
|
|
|
|
|
'A+' => 1, |
748
|
|
|
|
|
|
|
'A is ' => 1, |
749
|
|
|
|
|
|
|
'A isn\'t ' => 1, |
750
|
|
|
|
|
|
|
'A l\'' => 1, |
751
|
|
|
|
|
|
|
'A la ' => 1, |
752
|
|
|
|
|
|
|
'A posteriori' => 1, |
753
|
|
|
|
|
|
|
'A priori' => 1, |
754
|
|
|
|
|
|
|
'A to ' => 1, |
755
|
|
|
|
|
|
|
'El Nino' => 1, |
756
|
|
|
|
|
|
|
'El Salvador' => 1, |
757
|
|
|
|
|
|
|
'L is ' => 1, |
758
|
|
|
|
|
|
|
'L-' => 1, |
759
|
|
|
|
|
|
|
'La Salle' => 1, |
760
|
|
|
|
|
|
|
'Las Vegas' => 1, |
761
|
|
|
|
|
|
|
'Lo cual' => 1, |
762
|
|
|
|
|
|
|
'Lo mein' => 1, |
763
|
|
|
|
|
|
|
'Lo que' => 1, |
764
|
|
|
|
|
|
|
'Los Alamos' => 1, |
765
|
|
|
|
|
|
|
'Los Angeles' => 1, |
766
|
|
|
|
|
|
|
); |
767
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
#get tagno to determine which indicator to check and for reporting |
769
|
49
|
|
|
|
|
108
|
my $tagno = $field->tag(); |
770
|
|
|
|
|
|
|
#retrieve tagno from subfield 6 if 880 field |
771
|
49
|
100
|
|
|
|
240
|
if ($tagno eq '880') { |
772
|
1
|
50
|
|
|
|
3
|
if ($field->subfield('6')) { |
773
|
1
|
|
|
|
|
19
|
my $sub6 = $field->subfield('6'); |
774
|
1
|
|
|
|
|
17
|
$tagno = substr($sub6, 0, 3); |
775
|
|
|
|
|
|
|
} #if subfield 6 |
776
|
|
|
|
|
|
|
} #if 880 field |
777
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
#$ind holds nonfiling character indicator value |
779
|
49
|
|
|
|
|
62
|
my $ind = ''; |
780
|
|
|
|
|
|
|
#$first_or_second holds which indicator is for nonfiling char value |
781
|
49
|
|
|
|
|
59
|
my $first_or_second = ''; |
782
|
49
|
50
|
|
|
|
274
|
if ($tagno !~ /^(?:130|240|245|440|630|730|830)$/) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
783
|
0
|
|
|
|
|
0
|
print $tagno, " is not a valid field for article checking\n"; |
784
|
0
|
|
|
|
|
0
|
return; |
785
|
|
|
|
|
|
|
} #if field is not one of those checked for articles |
786
|
|
|
|
|
|
|
#130, 630, 730 => ind1 |
787
|
|
|
|
|
|
|
elsif ($tagno =~ /^(?:130|630|730)$/) { |
788
|
0
|
|
|
|
|
0
|
$ind = $field->indicator(1); |
789
|
0
|
|
|
|
|
0
|
$first_or_second = '1st'; |
790
|
|
|
|
|
|
|
} #if field is 130, 630, or 730 |
791
|
|
|
|
|
|
|
#240, 245, 440, 830 => ind2 |
792
|
|
|
|
|
|
|
elsif ($tagno =~ /^(?:240|245|440|830)$/) { |
793
|
49
|
|
|
|
|
121
|
$ind = $field->indicator(2); |
794
|
49
|
|
|
|
|
485
|
$first_or_second = '2nd'; |
795
|
|
|
|
|
|
|
} #if field is 240, 245, 440, or 830 |
796
|
|
|
|
|
|
|
|
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
#report non-numeric non-filing indicators as invalid |
799
|
49
|
50
|
|
|
|
114
|
$self->warn ( $tagno, ": Non-filing indicator is non-numeric" ) unless ($ind =~ /^[0-9]$/); |
800
|
|
|
|
|
|
|
#get subfield 'a' of the title field |
801
|
49
|
|
100
|
|
|
102
|
my $title = $field->subfield('a') || ''; |
802
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
|
804
|
49
|
|
|
|
|
940
|
my $char1_notalphanum = 0; |
805
|
|
|
|
|
|
|
#check for apostrophe, quote, bracket, or parenthesis, before first word |
806
|
|
|
|
|
|
|
#remove if found and add to non-word counter |
807
|
49
|
|
|
|
|
125
|
while ($title =~ /^["'\[\(*]/){ |
808
|
4
|
|
|
|
|
5
|
$char1_notalphanum++; |
809
|
4
|
|
|
|
|
14
|
$title =~ s/^["'\[\(*]//; |
810
|
|
|
|
|
|
|
} |
811
|
|
|
|
|
|
|
# split title into first word + rest on space, parens, bracket, apostrophe, quote, or hyphen |
812
|
49
|
|
|
|
|
227
|
my ($firstword, $separator, $etc) = $title =~ /^([^ \(\)\[\]'"\-]+)([ \(\)\[\]'"\-])?(.*)/i; |
813
|
49
|
100
|
|
|
|
113
|
$firstword = '' if ! defined( $firstword ); |
814
|
49
|
100
|
|
|
|
78
|
$separator = '' if ! defined( $separator ); |
815
|
49
|
100
|
|
|
|
74
|
$etc = '' if ! defined( $etc ); |
816
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
#get length of first word plus the number of chars removed above plus one for the separator |
818
|
49
|
|
|
|
|
84
|
my $nonfilingchars = length($firstword) + $char1_notalphanum + 1; |
819
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
#check to see if first word is an exception |
821
|
49
|
|
|
|
|
59
|
my $isan_exception = 0; |
822
|
49
|
|
|
|
|
210
|
$isan_exception = grep {$title =~ /^\Q$_\E/i} (keys %exceptions); |
|
1078
|
|
|
|
|
5726
|
|
823
|
|
|
|
|
|
|
|
824
|
|
|
|
|
|
|
#lowercase chars of $firstword for comparison with article list |
825
|
49
|
|
|
|
|
131
|
$firstword = lc($firstword); |
826
|
|
|
|
|
|
|
|
827
|
49
|
|
|
|
|
66
|
my $isan_article = 0; |
828
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
#see if first word is in the list of articles and not an exception |
830
|
49
|
100
|
100
|
|
|
135
|
$isan_article = 1 if (($article{$firstword}) && !($isan_exception)); |
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
#if article then $nonfilingchars should match $ind |
833
|
49
|
100
|
|
|
|
94
|
if ($isan_article) { |
834
|
|
|
|
|
|
|
#account for quotes, apostrophes, parens, or brackets before 2nd word |
835
|
|
|
|
|
|
|
# if (($separator eq ' ') && ($etc =~ /^['"]/)) { |
836
|
9
|
100
|
66
|
|
|
45
|
if (($separator) && ($etc =~ /^[ \(\)\[\]'"\-]+/)) { |
837
|
4
|
|
|
|
|
10
|
while ($etc =~ /^[ "'\[\]\(\)*]/){ |
838
|
6
|
|
|
|
|
8
|
$nonfilingchars++; |
839
|
6
|
|
|
|
|
20
|
$etc =~ s/^[ "'\[\]\(\)*]//; |
840
|
|
|
|
|
|
|
} #while etc starts with nonfiling chars |
841
|
|
|
|
|
|
|
} #if separator defined and etc starts with nonfiling chars |
842
|
|
|
|
|
|
|
#special case for 'en' (unsure why) |
843
|
9
|
50
|
|
|
|
60
|
if ($firstword eq 'en') { |
|
|
100
|
|
|
|
|
|
844
|
0
|
0
|
0
|
|
|
0
|
$self->warn ( $tagno, ": First word, , $firstword, may be an article, check $first_or_second indicator ($ind)." ) unless (($ind eq '3') || ($ind eq '0')); |
845
|
|
|
|
|
|
|
} |
846
|
|
|
|
|
|
|
elsif ($nonfilingchars ne $ind) { |
847
|
3
|
|
|
|
|
12
|
$self->warn ( $tagno, ": First word, $firstword, may be an article, check $first_or_second indicator ($ind)." ); |
848
|
|
|
|
|
|
|
} #unless ind is same as length of first word and nonfiling characters |
849
|
|
|
|
|
|
|
} #if first word is in article list |
850
|
|
|
|
|
|
|
#not an article so warn if $ind is not 0 |
851
|
|
|
|
|
|
|
else { |
852
|
40
|
100
|
|
|
|
366
|
unless ($ind eq '0') { |
853
|
1
|
|
|
|
|
7
|
$self->warn ( $tagno, ": First word, $firstword, does not appear to be an article, check $first_or_second indicator ($ind)." ); |
854
|
|
|
|
|
|
|
} #unless ind is 0 |
855
|
|
|
|
|
|
|
} #else not in article list |
856
|
|
|
|
|
|
|
|
857
|
|
|
|
|
|
|
####################################### |
858
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
} #_check_article |
860
|
|
|
|
|
|
|
|
861
|
|
|
|
|
|
|
|
862
|
|
|
|
|
|
|
############ |
863
|
|
|
|
|
|
|
|
864
|
|
|
|
|
|
|
=head1 SEE ALSO |
865
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
Check the docs for L. All software links are there. |
867
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
=head1 TODO |
869
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
=over 4 |
871
|
|
|
|
|
|
|
|
872
|
|
|
|
|
|
|
=item * Subfield 6 |
873
|
|
|
|
|
|
|
|
874
|
|
|
|
|
|
|
For subfield 6, it should always be the 1st subfield according to MARC 21 specifications. Perhaps a generic check should be added that warns if subfield 6 is not the 1st subfield. |
875
|
|
|
|
|
|
|
|
876
|
|
|
|
|
|
|
=item * Subfield 8. |
877
|
|
|
|
|
|
|
|
878
|
|
|
|
|
|
|
This subfield could be the 1st or 2nd subfield, so the code that checks for the 1st few subfields (check_245, check_250) should take that into account. |
879
|
|
|
|
|
|
|
|
880
|
|
|
|
|
|
|
=item * Subfield 9 |
881
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
This subfield is not officially allowed in MARC, since it is locally defined. Some way needs to be made to allow messages/warnings about this subfield to be turned off (or otherwise deal with records using/allowing locally defined subfield 9). |
883
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
=item * 008 length and presence check |
885
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
Currently, 008 validation is not implemented in MARC::Lint, but is left to MARC::Errorchecks. It might be useful if MARC::Lint's basic validation checks included a verification that the 008 exists and is exactly 40 characters long. Additional 008-related checking and byte validation would remain in MARC::Errorchecks. |
887
|
|
|
|
|
|
|
|
888
|
|
|
|
|
|
|
=item * ISBN and ISSN checking |
889
|
|
|
|
|
|
|
|
890
|
|
|
|
|
|
|
020 and 022 fields are validated with the C and |
891
|
|
|
|
|
|
|
C modules, respectively. Business::ISBN versions between 2 and |
892
|
|
|
|
|
|
|
2.02_01 are incompatible with MARC::Lint. |
893
|
|
|
|
|
|
|
|
894
|
|
|
|
|
|
|
=item * check_041 cleanup |
895
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
Splitting subfield code strings every 3 chars could probably be written more efficiently. |
897
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
=item * check_245 cleanup |
899
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
The article checking in particular. |
901
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
=item * Method for turning off checks |
903
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
Provide a way for users to skip checks more easily when using check_record, or a |
905
|
|
|
|
|
|
|
specific check_xxx method (e.g. skip article checking). |
906
|
|
|
|
|
|
|
|
907
|
|
|
|
|
|
|
=back |
908
|
|
|
|
|
|
|
|
909
|
|
|
|
|
|
|
=head1 LICENSE |
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
This code may be distributed under the same terms as Perl itself. |
912
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
Please note that these modules are not products of or supported by the |
914
|
|
|
|
|
|
|
employers of the various contributors to the code. |
915
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
=cut |
917
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
# Used only to read the stuff from __DATA__ |
919
|
|
|
|
|
|
|
sub _read_rules { |
920
|
6
|
|
|
6
|
|
30
|
my $self = shift; |
921
|
|
|
|
|
|
|
|
922
|
6
|
|
|
|
|
22
|
my $tell = tell(DATA); # Stash the position so we can reset it for next time |
923
|
|
|
|
|
|
|
|
924
|
6
|
|
|
|
|
25
|
local $/ = ""; |
925
|
6
|
|
|
|
|
97
|
while ( my $tagblock = ) { |
926
|
1440
|
|
|
|
|
6085
|
my @lines = split( /\n/, $tagblock ); |
927
|
1440
|
|
|
|
|
24309
|
s/\s+$// for @lines; |
928
|
|
|
|
|
|
|
|
929
|
1440
|
100
|
|
|
|
3151
|
next unless @lines >= 4; # Some of our entries are tag-only |
930
|
|
|
|
|
|
|
|
931
|
1338
|
|
|
|
|
2126
|
my $tagline = shift @lines; |
932
|
1338
|
|
|
|
|
3804
|
my @keyvals = split( /\s+/, $tagline, 3 ); |
933
|
1338
|
|
|
|
|
1957
|
my $tagno = shift @keyvals; |
934
|
1338
|
|
|
|
|
1773
|
my $repeatable = shift @keyvals; |
935
|
|
|
|
|
|
|
|
936
|
1338
|
|
|
|
|
2807
|
$self->_parse_tag_rules( $tagno, $repeatable, @lines ); |
937
|
|
|
|
|
|
|
} # while |
938
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
# Set the pointer back to where it was, in case we do this again |
940
|
6
|
|
|
|
|
57
|
seek( DATA, $tell, 0 ); |
941
|
|
|
|
|
|
|
} |
942
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
sub _parse_tag_rules { |
944
|
1338
|
|
|
1338
|
|
1990
|
my $self = shift; |
945
|
1338
|
|
|
|
|
1652
|
my $tagno = shift; |
946
|
1338
|
|
|
|
|
1568
|
my $repeatable = shift; |
947
|
1338
|
|
|
|
|
3722
|
my @lines = @_; |
948
|
|
|
|
|
|
|
|
949
|
1338
|
|
50
|
|
|
5385
|
my $rules = ($self->{_rules}->{$tagno} ||= {}); |
950
|
1338
|
|
|
|
|
2542
|
$rules->{'repeatable'} = $repeatable; |
951
|
|
|
|
|
|
|
|
952
|
1338
|
|
|
|
|
1915
|
for my $line ( @lines ) { |
953
|
16140
|
|
|
|
|
42715
|
my @keyvals = split( /\s+/, $line, 3 ); |
954
|
16140
|
|
|
|
|
22937
|
my $key = shift @keyvals; |
955
|
16140
|
|
|
|
|
20047
|
my $val = shift @keyvals; |
956
|
|
|
|
|
|
|
|
957
|
|
|
|
|
|
|
# Do magic for indicators |
958
|
16140
|
100
|
|
|
|
25337
|
if ( $key =~ /^ind/ ) { |
959
|
2676
|
|
|
|
|
4677
|
$rules->{$key} = $val; |
960
|
|
|
|
|
|
|
|
961
|
2676
|
|
|
|
|
3397
|
my $desc; |
962
|
|
|
|
|
|
|
my $regex; |
963
|
|
|
|
|
|
|
|
964
|
2676
|
100
|
|
|
|
4088
|
if ( $val eq "blank" ) { |
965
|
1638
|
|
|
|
|
1929
|
$desc = "blank"; |
966
|
1638
|
|
|
|
|
4141
|
$regex = qr/^ $/; |
967
|
|
|
|
|
|
|
} else { |
968
|
1038
|
|
|
|
|
1610
|
$desc = _nice_list($val); |
969
|
1038
|
|
|
|
|
2133
|
$val =~ s/^b/ /; |
970
|
1038
|
|
|
|
|
10352
|
$regex = qr/^[$val]$/; |
971
|
|
|
|
|
|
|
} |
972
|
|
|
|
|
|
|
|
973
|
2676
|
|
|
|
|
5844
|
$rules->{$key."_desc"} = $desc; |
974
|
2676
|
|
|
|
|
5606
|
$rules->{$key."_regex"} = $regex; |
975
|
|
|
|
|
|
|
} # if indicator |
976
|
|
|
|
|
|
|
else { |
977
|
13464
|
100
|
|
|
|
17649
|
if ( $key =~ /(.)-(.)/ ) { |
978
|
18
|
|
|
|
|
53
|
my ($min,$max) = ($1,$2); |
979
|
18
|
|
|
|
|
255
|
$rules->{$_} = $val for ($min..$max); |
980
|
|
|
|
|
|
|
} else { |
981
|
13446
|
|
|
|
|
33682
|
$rules->{$key} = $val; |
982
|
|
|
|
|
|
|
} |
983
|
|
|
|
|
|
|
} # not an indicator |
984
|
|
|
|
|
|
|
} # for $line |
985
|
|
|
|
|
|
|
} |
986
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
|
988
|
|
|
|
|
|
|
sub _nice_list { |
989
|
1038
|
|
|
1038
|
|
1314
|
my $str = shift; |
990
|
|
|
|
|
|
|
|
991
|
1038
|
100
|
|
|
|
2382
|
if ( $str =~ s/(\d)-(\d)/$1 thru $2/ ) { |
992
|
66
|
|
|
|
|
221
|
return $str; |
993
|
|
|
|
|
|
|
} |
994
|
|
|
|
|
|
|
|
995
|
972
|
|
|
|
|
2027
|
my @digits = split( //, $str ); |
996
|
972
|
100
|
|
|
|
1766
|
$digits[0] = "blank" if $digits[0] eq "b"; |
997
|
972
|
|
|
|
|
1356
|
my $last = pop @digits; |
998
|
972
|
|
|
|
|
2976
|
return join( ", ", @digits ) . " or $last"; |
999
|
|
|
|
|
|
|
} |
1000
|
|
|
|
|
|
|
|
1001
|
|
|
|
|
|
|
sub _ind_regex { |
1002
|
0
|
|
|
0
|
|
|
my $str = shift; |
1003
|
|
|
|
|
|
|
|
1004
|
0
|
0
|
|
|
|
|
return qr/^ $/ if $str eq "blank"; |
1005
|
|
|
|
|
|
|
|
1006
|
0
|
|
|
|
|
|
return qr/^[$str]$/; |
1007
|
|
|
|
|
|
|
} |
1008
|
|
|
|
|
|
|
|
1009
|
|
|
|
|
|
|
|
1010
|
|
|
|
|
|
|
1; |
1011
|
|
|
|
|
|
|
|
1012
|
|
|
|
|
|
|
__DATA__ |