line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package MARC::Lint; |
2
|
|
|
|
|
|
|
|
3
|
5
|
|
|
5
|
|
425697
|
use strict; |
|
5
|
|
|
|
|
47
|
|
|
5
|
|
|
|
|
148
|
|
4
|
5
|
|
|
5
|
|
26
|
use warnings; |
|
5
|
|
|
|
|
9
|
|
|
5
|
|
|
|
|
130
|
|
5
|
5
|
|
|
5
|
|
23
|
use integer; |
|
5
|
|
|
|
|
12
|
|
|
5
|
|
|
|
|
30
|
|
6
|
5
|
|
|
5
|
|
1692
|
use MARC::Record; |
|
5
|
|
|
|
|
8989
|
|
|
5
|
|
|
|
|
216
|
|
7
|
5
|
|
|
5
|
|
30
|
use MARC::Field; |
|
5
|
|
|
|
|
10
|
|
|
5
|
|
|
|
|
146
|
|
8
|
|
|
|
|
|
|
|
9
|
5
|
|
|
5
|
|
2527
|
use MARC::Lint::CodeData qw(%GeogAreaCodes %ObsoleteGeogAreaCodes %LanguageCodes %ObsoleteLanguageCodes); |
|
5
|
|
|
|
|
15
|
|
|
5
|
|
|
|
|
4668
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
our $VERSION = 1.53; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=head1 NAME |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
MARC::Lint - Perl extension for checking validity of MARC records |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 SYNOPSIS |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
use MARC::File::USMARC; |
20
|
|
|
|
|
|
|
use MARC::Lint; |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
my $lint = new MARC::Lint; |
23
|
|
|
|
|
|
|
my $filename = shift; |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
my $file = MARC::File::USMARC->in( $filename ); |
26
|
|
|
|
|
|
|
while ( my $marc = $file->next() ) { |
27
|
|
|
|
|
|
|
$lint->check_record( $marc ); |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
# Print the title tag |
30
|
|
|
|
|
|
|
print $marc->title, "\n"; |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
# Print the errors that were found |
33
|
|
|
|
|
|
|
print join( "\n", $lint->warnings ), "\n"; |
34
|
|
|
|
|
|
|
} # while |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
Given the following MARC record: |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
LDR 00000nam 22002538a 4500 |
39
|
|
|
|
|
|
|
040 _aMdSSJTT |
40
|
|
|
|
|
|
|
_cMdSSJTT |
41
|
|
|
|
|
|
|
040 _aMdSSJTT |
42
|
|
|
|
|
|
|
_beng |
43
|
|
|
|
|
|
|
_cMdSSJTT |
44
|
|
|
|
|
|
|
100 14 _aWall, Larry. |
45
|
|
|
|
|
|
|
110 1 _aO'Reilly & Associates. |
46
|
|
|
|
|
|
|
245 90 _aProgramming Perl / |
47
|
|
|
|
|
|
|
_aBig Book of Perl / |
48
|
|
|
|
|
|
|
_cLarry Wall, Tom Christiansen & Jon Orwant. |
49
|
|
|
|
|
|
|
250 _a3rd ed. |
50
|
|
|
|
|
|
|
250 _a3rd ed. |
51
|
|
|
|
|
|
|
260 _aCambridge, Mass. : |
52
|
|
|
|
|
|
|
_bO'Reilly, |
53
|
|
|
|
|
|
|
_r2000. |
54
|
|
|
|
|
|
|
590 4 _aPersonally signed by Larry. |
55
|
|
|
|
|
|
|
856 43 _uhttp://www.perl.com/ |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
the following errors are generated: |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
1XX: Only one 1XX tag is allowed, but I found 2 of them. |
60
|
|
|
|
|
|
|
100: Indicator 2 must be blank but it's "4" |
61
|
|
|
|
|
|
|
245: Indicator 1 must be 0 or 1 but it's "9" |
62
|
|
|
|
|
|
|
245: Subfield _a is not repeatable. |
63
|
|
|
|
|
|
|
040: Field is not repeatable. |
64
|
|
|
|
|
|
|
260: Subfield _r is not allowed. |
65
|
|
|
|
|
|
|
856: Indicator 2 must be blank, 0, 1, 2 or 8 but it's "3" |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head1 DESCRIPTION |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
Module for checking validity of MARC records. 99% of the users will want to do |
70
|
|
|
|
|
|
|
something like is shown in the synopsis. The other intrepid 1% will overload the |
71
|
|
|
|
|
|
|
C module's methods and provide their own special field-level checking. |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
What this means is that if you have certain requirements, such as making sure that |
74
|
|
|
|
|
|
|
all 952 tags have a certain call number in them, you can write a function that |
75
|
|
|
|
|
|
|
checks for that, and still get all the benefits of the MARC::Lint framework. |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=head1 EXPORT |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
None. Everything is done through objects. |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=head1 METHODS |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=head2 new() |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
No parms needed. The C object is little more than a list of warnings |
86
|
|
|
|
|
|
|
and a bunch of rules. |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=cut |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
sub new { |
91
|
6
|
|
|
6
|
1
|
6580
|
my $class = shift; |
92
|
|
|
|
|
|
|
|
93
|
6
|
|
|
|
|
24
|
my $self = { |
94
|
|
|
|
|
|
|
_warnings => [], |
95
|
|
|
|
|
|
|
}; |
96
|
6
|
|
|
|
|
16
|
bless $self, $class; |
97
|
|
|
|
|
|
|
|
98
|
6
|
|
|
|
|
30
|
$self->_read_rules(); |
99
|
|
|
|
|
|
|
|
100
|
6
|
|
|
|
|
44
|
return $self; |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=head2 warnings() |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
Returns a list of warnings found by C and its brethren. |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=cut |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
sub warnings { |
110
|
64
|
|
|
64
|
1
|
1114
|
my $self = shift; |
111
|
|
|
|
|
|
|
|
112
|
64
|
50
|
|
|
|
116
|
return wantarray ? @{$self->{_warnings}} : scalar @{$self->{_warnings}}; |
|
64
|
|
|
|
|
191
|
|
|
0
|
|
|
|
|
0
|
|
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=head2 clear_warnings() |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
Clear the list of warnings for this linter object. It's automatically called |
118
|
|
|
|
|
|
|
when you call C. |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=cut |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
sub clear_warnings { |
123
|
64
|
|
|
64
|
1
|
180
|
my $self = shift; |
124
|
|
|
|
|
|
|
|
125
|
64
|
|
|
|
|
214
|
$self->{_warnings} = []; |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=head2 warn( $str [, $str...] ) |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
Create a warning message, built from strings passed, like a C |
131
|
|
|
|
|
|
|
statement. |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
Typically, you'll leave this to C, but industrious |
134
|
|
|
|
|
|
|
programmers may want to do their own checking as well. |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=cut |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
sub warn { |
139
|
59
|
|
|
59
|
1
|
1681
|
my $self = shift; |
140
|
|
|
|
|
|
|
|
141
|
59
|
|
|
|
|
76
|
push( @{$self->{_warnings}}, join( "", @_ ) ); |
|
59
|
|
|
|
|
197
|
|
142
|
|
|
|
|
|
|
|
143
|
59
|
|
|
|
|
225
|
return; |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=head2 check_record( $marc ) |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
Does all sorts of lint-like checks on the MARC record I<$marc>, |
149
|
|
|
|
|
|
|
both on the record as a whole, and on the individual fields & |
150
|
|
|
|
|
|
|
subfields. |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=cut |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
sub check_record { |
155
|
13
|
|
|
13
|
1
|
36395
|
my $self = shift; |
156
|
13
|
|
|
|
|
24
|
my $marc = shift; |
157
|
|
|
|
|
|
|
|
158
|
13
|
|
|
|
|
33
|
$self->clear_warnings(); |
159
|
|
|
|
|
|
|
|
160
|
13
|
50
|
33
|
|
|
95
|
( (ref $marc) && $marc->isa('MARC::Record') ) |
161
|
|
|
|
|
|
|
or return $self->warn( "Must pass a MARC::Record object to check_record" ); |
162
|
|
|
|
|
|
|
|
163
|
13
|
|
|
|
|
42
|
my @_1xx = $marc->field( "1.." ); |
164
|
13
|
|
|
|
|
1384
|
my $n1xx = scalar @_1xx; |
165
|
13
|
100
|
|
|
|
38
|
if ( $n1xx > 1 ) { |
166
|
1
|
|
|
|
|
7
|
$self->warn( "1XX: Only one 1XX tag is allowed, but I found $n1xx of them." ); |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
|
169
|
13
|
50
|
|
|
|
28
|
if ( not $marc->field( 245 ) ) { |
170
|
0
|
|
|
|
|
0
|
$self->warn( "245: No 245 tag." ); |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
|
174
|
13
|
|
|
|
|
972
|
my %field_seen; |
175
|
13
|
|
|
|
|
33
|
my $rules = $self->{_rules}; |
176
|
13
|
|
|
|
|
30
|
for my $field ( $marc->fields ) { |
177
|
208
|
|
|
|
|
467
|
my $tagno = $field->tag; |
178
|
|
|
|
|
|
|
|
179
|
208
|
|
|
|
|
848
|
my $tagrules = ''; |
180
|
|
|
|
|
|
|
#if 880 field, inherit rules from tagno in subfield _6 |
181
|
208
|
|
|
|
|
252
|
my $is_880 = 0; |
182
|
208
|
100
|
|
|
|
354
|
if ($tagno eq '880') { |
183
|
3
|
|
|
|
|
6
|
$is_880 = 1; |
184
|
3
|
100
|
|
|
|
7
|
if ($field->subfield('6')) { |
185
|
2
|
|
|
|
|
48
|
my $sub6 = $field->subfield('6'); |
186
|
2
|
|
|
|
|
44
|
$tagno = substr($sub6, 0, 3); |
187
|
|
|
|
|
|
|
|
188
|
2
|
50
|
|
|
|
7
|
$tagrules = $rules->{$tagno} or next; |
189
|
|
|
|
|
|
|
#880 is repeatable, but its linked field may not be |
190
|
2
|
50
|
66
|
|
|
15
|
if ( ($tagrules->{'repeatable'} && ( $tagrules->{'repeatable'} eq 'NR' )) && $field_seen{'880.'.$tagno} ) { |
|
|
|
66
|
|
|
|
|
191
|
0
|
|
|
|
|
0
|
$self->warn( "$tagno: Field is not repeatable." ); |
192
|
|
|
|
|
|
|
} #if repeatability |
193
|
|
|
|
|
|
|
} #if subfield 6 present |
194
|
|
|
|
|
|
|
else { |
195
|
1
|
|
|
|
|
23
|
$self->warn( "880: No subfield 6." ); |
196
|
1
|
|
|
|
|
2
|
next; |
197
|
|
|
|
|
|
|
} #else no subfield 6 in 880 field |
198
|
|
|
|
|
|
|
} #if this is 880 field |
199
|
|
|
|
|
|
|
else { |
200
|
205
|
100
|
|
|
|
446
|
$tagrules = $rules->{$tagno} or next; |
201
|
|
|
|
|
|
|
|
202
|
204
|
50
|
66
|
|
|
851
|
if ( ($tagrules->{'repeatable'} && ( $tagrules->{'repeatable'} eq 'NR' )) && $field_seen{$tagno} ) { |
|
|
|
66
|
|
|
|
|
203
|
0
|
|
|
|
|
0
|
$self->warn( "$tagno: Field is not repeatable." ); |
204
|
|
|
|
|
|
|
} #if repeatability |
205
|
|
|
|
|
|
|
} #else not 880 |
206
|
|
|
|
|
|
|
|
207
|
206
|
100
|
|
|
|
440
|
if ( $tagno >= 10 ) { |
|
|
50
|
|
|
|
|
|
208
|
159
|
|
|
|
|
274
|
for my $ind ( 1..2 ) { |
209
|
318
|
|
|
|
|
586
|
my $indvalue = $field->indicator($ind); |
210
|
318
|
100
|
|
|
|
4519
|
if ( not ($indvalue =~ $tagrules->{"ind$ind" . "_regex"}) ) { |
211
|
|
|
|
|
|
|
$self->warn( |
212
|
|
|
|
|
|
|
"$tagno: Indicator $ind must be ", |
213
|
5
|
|
|
|
|
43
|
$tagrules->{"ind$ind" . "_desc"}, |
214
|
|
|
|
|
|
|
" but it's \"$indvalue\"" |
215
|
|
|
|
|
|
|
); |
216
|
|
|
|
|
|
|
} |
217
|
|
|
|
|
|
|
} # for |
218
|
|
|
|
|
|
|
|
219
|
159
|
|
|
|
|
222
|
my %sub_seen; |
220
|
159
|
|
|
|
|
301
|
for my $subfield ( $field->subfields ) { |
221
|
264
|
|
|
|
|
3006
|
my ($code,$data) = @$subfield; |
222
|
|
|
|
|
|
|
|
223
|
264
|
|
|
|
|
393
|
my $rule = $tagrules->{$code}; |
224
|
264
|
100
|
66
|
|
|
734
|
if ( not defined $rule ) { |
|
|
100
|
|
|
|
|
|
225
|
3
|
|
|
|
|
31
|
$self->warn( "$tagno: Subfield _$code is not allowed." ); |
226
|
|
|
|
|
|
|
} elsif ( ($rule eq "NR") && $sub_seen{$code} ) { |
227
|
1
|
|
|
|
|
6
|
$self->warn( "$tagno: Subfield _$code is not repeatable." ); |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
|
230
|
264
|
50
|
|
|
|
556
|
if ( $data =~ /[\t\r\n]/ ) { |
231
|
0
|
|
|
|
|
0
|
$self->warn( "$tagno: Subfield _$code has an invalid control character" ); |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
|
234
|
264
|
|
|
|
|
547
|
++$sub_seen{$code}; |
235
|
|
|
|
|
|
|
} # for $subfields |
236
|
|
|
|
|
|
|
} # if $tagno >= 10 |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
elsif ($tagno < 10) { |
239
|
|
|
|
|
|
|
#check for subfield characters |
240
|
47
|
100
|
|
|
|
93
|
if ($field->data() =~ /\x1F/) { |
241
|
1
|
|
|
|
|
36
|
$self->warn( "$tagno: Subfields are not allowed in fields lower than 010" ); |
242
|
|
|
|
|
|
|
} #if control field has subfield delimiter |
243
|
|
|
|
|
|
|
} #elsif $tagno < 10 |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
# Check to see if a check_xxx() function exists, and call it on the field if it does |
246
|
206
|
|
|
|
|
812
|
my $checker = "check_$tagno"; |
247
|
206
|
100
|
|
|
|
708
|
if ( $self->can( $checker ) ) { |
248
|
25
|
|
|
|
|
84
|
$self->$checker( $field ); |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
|
251
|
206
|
100
|
|
|
|
3290
|
if ($is_880) { |
252
|
2
|
|
|
|
|
7
|
++$field_seen{'880.'.$tagno}; |
253
|
|
|
|
|
|
|
} #if 880 field |
254
|
|
|
|
|
|
|
else { |
255
|
204
|
|
|
|
|
408
|
++$field_seen{$tagno}; |
256
|
|
|
|
|
|
|
} |
257
|
|
|
|
|
|
|
} # for my $fields |
258
|
|
|
|
|
|
|
|
259
|
13
|
|
|
|
|
54
|
return; |
260
|
|
|
|
|
|
|
} |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
=head2 check_I( $field ) |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
Various functions to check the different fields. If the function doesn't exist, |
265
|
|
|
|
|
|
|
then it doesn't get checked. |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
=head2 check_020() |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
Looks at 020$a and reports errors if the check digit is wrong. |
270
|
|
|
|
|
|
|
Looks at 020$z and validates number if hyphens are present. |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
Uses Business::ISBN to do validation. Thirteen digit checking is currently done |
273
|
|
|
|
|
|
|
with the internal sub _isbn13_check_digit(), based on code from Business::ISBN. |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
TO DO (check_020): |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
Fix 13-digit ISBN checking. |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
=cut |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
sub check_020 { |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
|
284
|
5
|
|
|
5
|
|
2869
|
use Business::ISBN; |
|
5
|
|
|
|
|
257226
|
|
|
5
|
|
|
|
|
18335
|
|
285
|
|
|
|
|
|
|
|
286
|
22
|
|
|
22
|
1
|
12920
|
my $self = shift; |
287
|
22
|
|
|
|
|
45
|
my $field = shift; |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
################################################### |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
# break subfields into code-data array and validate data |
292
|
|
|
|
|
|
|
|
293
|
22
|
|
|
|
|
50
|
my @subfields = $field->subfields(); |
294
|
|
|
|
|
|
|
|
295
|
22
|
|
|
|
|
394
|
while (my $subfield = pop(@subfields)) { |
296
|
22
|
|
|
|
|
44
|
my ($code, $data) = @$subfield; |
297
|
22
|
|
|
|
|
37
|
my $isbnno = $data; |
298
|
|
|
|
|
|
|
#remove any hyphens |
299
|
22
|
|
|
|
|
62
|
$isbnno =~ s/\-//g; |
300
|
|
|
|
|
|
|
#remove nondigits |
301
|
22
|
|
|
|
|
138
|
$isbnno =~ s/^\D*(\d{9,12}[X\d])\b.*$/$1/; |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
#report error if this is subfield 'a' |
304
|
|
|
|
|
|
|
#and the first 10 or 13 characters are not a match for $isbnno |
305
|
22
|
100
|
|
|
|
61
|
if ($code eq 'a') { |
|
|
50
|
|
|
|
|
|
306
|
21
|
100
|
|
|
|
58
|
if ((substr($data,0,length($isbnno)) ne $isbnno)) { |
307
|
2
|
|
|
|
|
5
|
$self->warn( "020: Subfield a may have invalid characters."); |
308
|
|
|
|
|
|
|
} #if first characters don't match |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
#report error if no space precedes a qualifier in subfield a |
311
|
21
|
100
|
|
|
|
59
|
if ($data =~ /\(/) { |
312
|
8
|
100
|
|
|
|
36
|
$self->warn( "020: Subfield a qualifier must be preceded by space, $data.") unless ($data =~ /[X0-9] \(/); |
313
|
|
|
|
|
|
|
} #if data has parenthetical qualifier |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
#report error if unable to find 10-13 digit string of digits in subfield 'a' |
316
|
21
|
100
|
|
|
|
68
|
if (($isbnno !~ /(?:^\d{10}$)|(?:^\d{13}$)|(?:^\d{9}X$)/)) { |
317
|
3
|
|
|
|
|
12
|
$self->warn( "020: Subfield a has the wrong number of digits, $data."); |
318
|
|
|
|
|
|
|
} # if subfield 'a' but not 10 or 13 digit isbn |
319
|
|
|
|
|
|
|
#otherwise, check 10 and 13 digit checksums for validity |
320
|
|
|
|
|
|
|
else { |
321
|
18
|
100
|
|
|
|
47
|
if ((length ($isbnno) == 10)) { |
|
|
50
|
|
|
|
|
|
322
|
|
|
|
|
|
|
|
323
|
16
|
50
|
33
|
|
|
45
|
if (($Business::ISBN::VERSION gt '2.02_01') || ($Business::ISBN::VERSION gt '2.009')) { |
|
|
0
|
|
|
|
|
|
324
|
16
|
100
|
|
|
|
56
|
$self->warn( "020: Subfield a has bad checksum, $data." ) if (Business::ISBN::valid_isbn_checksum($isbnno) != 1); |
325
|
|
|
|
|
|
|
} #if Business::ISBN version higher than 2.02_01 or 2.009 |
326
|
|
|
|
|
|
|
elsif ($Business::ISBN::VERSION lt '2') { |
327
|
0
|
0
|
|
|
|
0
|
$self->warn( "020: Subfield a has bad checksum, $data." ) if (Business::ISBN::is_valid_checksum($isbnno) != 1); |
328
|
|
|
|
|
|
|
} #elsif Business::ISBN version lower than 2 |
329
|
|
|
|
|
|
|
else { |
330
|
0
|
|
|
|
|
0
|
$self->warn( "Business::ISBN version must be below 2 or above 2.02_02 or 2.009." ); |
331
|
|
|
|
|
|
|
} #else Business::ISBN version between 2 and 2.02_02 |
332
|
|
|
|
|
|
|
} #if 10 digit ISBN has invalid check digit |
333
|
|
|
|
|
|
|
# do validation check for 13 digit isbn |
334
|
|
|
|
|
|
|
######################################### |
335
|
|
|
|
|
|
|
### Not yet fully implemented ########### |
336
|
|
|
|
|
|
|
######################################### |
337
|
|
|
|
|
|
|
elsif (length($isbnno) == 13){ |
338
|
|
|
|
|
|
|
#change line below once Business::ISBN handles 13-digit ISBNs |
339
|
2
|
|
|
|
|
4
|
my $is_valid_13 = _isbn13_check_digit($isbnno); |
340
|
2
|
100
|
|
|
|
16
|
$self->warn( "020: Subfield a has bad checksum (13 digit), $data.") unless ($is_valid_13 == 1); |
341
|
|
|
|
|
|
|
} #elsif 13 digit ISBN has invalid check digit |
342
|
|
|
|
|
|
|
################################################### |
343
|
|
|
|
|
|
|
} #else subfield 'a' has 10 or 13 digits |
344
|
|
|
|
|
|
|
} #if subfield 'a' |
345
|
|
|
|
|
|
|
#look for valid isbn in 020$z |
346
|
|
|
|
|
|
|
elsif ($code eq 'z') { |
347
|
1
|
50
|
33
|
|
|
13
|
if (($data =~ /^ISBN/) || ($data =~ /^\d*\-\d+/)){ |
348
|
|
|
|
|
|
|
################################################## |
349
|
|
|
|
|
|
|
## Turned on for now--Comment to unimplement #### |
350
|
|
|
|
|
|
|
################################################## |
351
|
0
|
0
|
0
|
|
|
0
|
$self->warn( "020: Subfield z is numerically valid.") if ((length ($isbnno) == 10) && (Business::ISBN::is_valid_checksum($isbnno) == 1)); |
352
|
|
|
|
|
|
|
} #if 10 digit ISBN has invalid check digit |
353
|
|
|
|
|
|
|
} #elsif subfield 'z' |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
} # while @subfields |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
} #check_020 |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
=head2 _isbn13_check_digit($ean) |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
Internal sub to determine if 13-digit ISBN has a valid checksum. The code is |
362
|
|
|
|
|
|
|
taken from Business::ISBN::as_ean. It is expected to be temporary until |
363
|
|
|
|
|
|
|
Business::ISBN is updated to check 13-digit ISBNs itself. |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
=cut |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
sub _isbn13_check_digit { |
368
|
|
|
|
|
|
|
|
369
|
2
|
|
|
2
|
|
4
|
my $ean = shift; |
370
|
|
|
|
|
|
|
#remove and store current check digit |
371
|
2
|
|
|
|
|
4
|
my $check_digit = chop($ean); |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
#calculate valid checksum |
374
|
2
|
|
|
|
|
4
|
my $sum = 0; |
375
|
2
|
|
|
|
|
4
|
foreach my $index ( 0, 2, 4, 6, 8, 10 ) |
376
|
|
|
|
|
|
|
{ |
377
|
12
|
|
|
|
|
16
|
$sum += substr($ean, $index, 1); |
378
|
12
|
|
|
|
|
24
|
$sum += 3 * substr($ean, $index + 1, 1); |
379
|
|
|
|
|
|
|
} |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
#take the next higher multiple of 10 and subtract the sum. |
382
|
|
|
|
|
|
|
#if $sum is 37, the next highest multiple of ten is 40. the |
383
|
|
|
|
|
|
|
#check digit would be 40 - 37 => 3. |
384
|
2
|
|
|
|
|
10
|
my $valid_check_digit = ( 10 * ( int( $sum / 10 ) + 1 ) - $sum ) % 10; |
385
|
|
|
|
|
|
|
|
386
|
2
|
100
|
|
|
|
6
|
return $check_digit == $valid_check_digit ? 1 : 0; |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
} # _isbn13_check_digit |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
######################################### |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
=head2 check_041( $field ) |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
Warns if subfields are not evenly divisible by 3 unless second indicator is 7 |
395
|
|
|
|
|
|
|
(future implementation would ensure that each subfield is exactly 3 characters |
396
|
|
|
|
|
|
|
unless ind2 is 7--since subfields are now repeatable. This is not implemented |
397
|
|
|
|
|
|
|
here due to the large number of records needing to be corrected.). Validates |
398
|
|
|
|
|
|
|
against the MARC Code List for Languages (L) using the |
399
|
|
|
|
|
|
|
MARC::Lint::CodeData data pack to MARC::Lint (%LanguageCodes, |
400
|
|
|
|
|
|
|
%ObsoleteLanguageCodes). |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
=cut |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
sub check_041 { |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
|
407
|
3
|
|
|
3
|
1
|
3506
|
my $self = shift; |
408
|
3
|
|
|
|
|
7
|
my $field = shift; |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
# break subfields into code-data array (so the entire field is in one array) |
411
|
|
|
|
|
|
|
|
412
|
3
|
|
|
|
|
11
|
my @subfields = $field->subfields(); |
413
|
3
|
|
|
|
|
66
|
my @newsubfields = (); |
414
|
|
|
|
|
|
|
|
415
|
3
|
|
|
|
|
11
|
while (my $subfield = pop(@subfields)) { |
416
|
7
|
|
|
|
|
14
|
my ($code, $data) = @$subfield; |
417
|
7
|
|
|
|
|
28
|
unshift (@newsubfields, $code, $data); |
418
|
|
|
|
|
|
|
} # while |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
#warn if length of each subfield is not divisible by 3 unless ind2 is 7 |
421
|
3
|
50
|
|
|
|
14
|
unless ($field->indicator(2) eq '7') { |
422
|
3
|
|
|
|
|
55
|
for (my $index = 0; $index <=$#newsubfields; $index+=2) { |
423
|
7
|
100
|
|
|
|
23
|
if (length ($newsubfields[$index+1]) %3 != 0) { |
424
|
3
|
|
|
|
|
18
|
$self->warn( "041: Subfield _$newsubfields[$index] must be evenly divisible by 3 or exactly three characters if ind2 is not 7, ($newsubfields[$index+1])." ); |
425
|
|
|
|
|
|
|
} #if field length not divisible evenly by 3 |
426
|
|
|
|
|
|
|
############################################## |
427
|
|
|
|
|
|
|
# validation against code list data |
428
|
|
|
|
|
|
|
## each subfield has a multiple of 3 chars |
429
|
|
|
|
|
|
|
# need to look at each group of 3 characters |
430
|
|
|
|
|
|
|
else { |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
#break each character of the subfield into an array position |
433
|
4
|
|
|
|
|
19
|
my @codechars = split '', $newsubfields[$index+1]; |
434
|
|
|
|
|
|
|
|
435
|
4
|
|
|
|
|
8
|
my $pos = 0; |
436
|
|
|
|
|
|
|
#store each 3 char code in a slot of @codes041 |
437
|
4
|
|
|
|
|
7
|
my @codes041 = (); |
438
|
4
|
|
|
|
|
23
|
while ($pos <= $#codechars) { |
439
|
6
|
|
|
|
|
22
|
push @codes041, (join '', @codechars[$pos..$pos+2]); |
440
|
6
|
|
|
|
|
16
|
$pos += 3; |
441
|
|
|
|
|
|
|
} |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
|
444
|
4
|
|
|
|
|
8
|
foreach my $code041 (@codes041) { |
445
|
|
|
|
|
|
|
#see if language code matches valid code |
446
|
6
|
50
|
|
|
|
28
|
my $validlang = $LanguageCodes{$code041} ? 1 : 0; |
447
|
|
|
|
|
|
|
#look for invalid code match if valid code was not matched |
448
|
6
|
100
|
|
|
|
15
|
my $obsoletelang = $ObsoleteLanguageCodes{$code041} ? 1 : 0; |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
# skip valid subfields |
451
|
6
|
50
|
|
|
|
25
|
unless ($validlang) { |
452
|
|
|
|
|
|
|
#report invalid matches as possible obsolete codes |
453
|
6
|
100
|
|
|
|
13
|
if ($obsoletelang) { |
454
|
1
|
|
|
|
|
5
|
$self->warn( "041: Subfield _$newsubfields[$index], $newsubfields[$index+1], may be obsolete."); |
455
|
|
|
|
|
|
|
} |
456
|
|
|
|
|
|
|
else { |
457
|
5
|
|
|
|
|
49
|
$self->warn( "041: Subfield _$newsubfields[$index], $newsubfields[$index+1] ($code041), is not valid."); |
458
|
|
|
|
|
|
|
} #else code not found |
459
|
|
|
|
|
|
|
} # unless found valid code |
460
|
|
|
|
|
|
|
} #foreach code in 041 |
461
|
|
|
|
|
|
|
} # else subfield has multiple of 3 chars |
462
|
|
|
|
|
|
|
############################################## |
463
|
|
|
|
|
|
|
} # foreach subfield |
464
|
|
|
|
|
|
|
} #unless ind2 is 7 |
465
|
|
|
|
|
|
|
} #check_041 |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
=head2 check_043( $field ) |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
Warns if each subfield a is not exactly 7 characters. Validates each code |
470
|
|
|
|
|
|
|
against the MARC code list for Geographic Areas (L) |
471
|
|
|
|
|
|
|
using the MARC::Lint::CodeData data pack to MARC::Lint (%GeogAreaCodes, |
472
|
|
|
|
|
|
|
%ObsoleteGeogAreaCodes). |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
=cut |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
sub check_043 { |
477
|
|
|
|
|
|
|
|
478
|
2
|
|
|
2
|
1
|
2485
|
my $self = shift; |
479
|
2
|
|
|
|
|
5
|
my $field = shift; |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
# break subfields into code-data array (so the entire field is in one array) |
482
|
|
|
|
|
|
|
|
483
|
2
|
|
|
|
|
7
|
my @subfields = $field->subfields(); |
484
|
2
|
|
|
|
|
41
|
my @newsubfields = (); |
485
|
|
|
|
|
|
|
|
486
|
2
|
|
|
|
|
8
|
while (my $subfield = pop(@subfields)) { |
487
|
5
|
|
|
|
|
11
|
my ($code, $data) = @$subfield; |
488
|
5
|
|
|
|
|
17
|
unshift (@newsubfields, $code, $data); |
489
|
|
|
|
|
|
|
} # while |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
#warn if length of subfield a is not exactly 7 |
492
|
2
|
|
|
|
|
10
|
for (my $index = 0; $index <=$#newsubfields; $index+=2) { |
493
|
5
|
100
|
66
|
|
|
36
|
if (($newsubfields[$index] eq 'a') && (length ($newsubfields[$index+1]) != 7)) { |
|
|
50
|
|
|
|
|
|
494
|
2
|
|
|
|
|
7
|
$self->warn( "043: Subfield _a must be exactly 7 characters, $newsubfields[$index+1]" ); |
495
|
|
|
|
|
|
|
} # if suba and length is not 7 |
496
|
|
|
|
|
|
|
#check against code list for geographic areas. |
497
|
|
|
|
|
|
|
elsif ($newsubfields[$index] eq 'a') { |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
#see if geog area code matches valid code |
500
|
3
|
50
|
|
|
|
15
|
my $validgac = $GeogAreaCodes{$newsubfields[$index+1]} ? 1 : 0; |
501
|
|
|
|
|
|
|
#look for obsolete code match if valid code was not matched |
502
|
3
|
100
|
|
|
|
13
|
my $obsoletegac = $ObsoleteGeogAreaCodes{$newsubfields[$index+1]} ? 1 : 0; |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
# skip valid subfields |
505
|
3
|
50
|
|
|
|
31
|
unless ($validgac) { |
506
|
|
|
|
|
|
|
#report invalid matches as possible obsolete codes |
507
|
3
|
100
|
|
|
|
11
|
if ($obsoletegac) { |
508
|
1
|
|
|
|
|
5
|
$self->warn( "043: Subfield _a, $newsubfields[$index+1], may be obsolete."); |
509
|
|
|
|
|
|
|
} |
510
|
|
|
|
|
|
|
else { |
511
|
2
|
|
|
|
|
11
|
$self->warn( "043: Subfield _a, $newsubfields[$index+1], is not valid."); |
512
|
|
|
|
|
|
|
} #else code not found |
513
|
|
|
|
|
|
|
} # unless found valid code |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
} #elsif suba |
516
|
|
|
|
|
|
|
} #foreach subfield |
517
|
|
|
|
|
|
|
} #check_043 |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
=head2 check_245( $field ) |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
-Makes sure $a exists (and is first subfield). |
522
|
|
|
|
|
|
|
-Warns if last character of field is not a period |
523
|
|
|
|
|
|
|
--Follows LCRI 1.0C, Nov. 2003 rather than MARC21 rule |
524
|
|
|
|
|
|
|
-Verifies that $c is preceded by / (space-/) |
525
|
|
|
|
|
|
|
-Verifies that initials in $c are not spaced |
526
|
|
|
|
|
|
|
-Verifies that $b is preceded by :;= (space-colon, space-semicolon, space-equals) |
527
|
|
|
|
|
|
|
-Verifies that $h is not preceded by space unless it is dash-space |
528
|
|
|
|
|
|
|
-Verifies that data of $h is enclosed in square brackets |
529
|
|
|
|
|
|
|
-Verifies that $n is preceded by . (period) |
530
|
|
|
|
|
|
|
--As part of that, looks for no-space period, or dash-space-period (for replaced elipses) |
531
|
|
|
|
|
|
|
-Verifies that $p is preceded by , (no-space-comma) when following $n and . (period) when following other subfields. |
532
|
|
|
|
|
|
|
-Performs rudimentary article check of 245 2nd indicator vs. 1st word of 245$a (for manual verification). |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
Article checking is done by internal _check_article method, which should work for 130, 240, 245, 440, 630, 730, and 830. |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
=cut |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
sub check_245 { |
539
|
|
|
|
|
|
|
|
540
|
49
|
|
|
49
|
1
|
27701
|
my $self = shift; |
541
|
49
|
|
|
|
|
71
|
my $field = shift; |
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
#set tagno for reporting |
544
|
49
|
|
|
|
|
76
|
my $tagno = '245'; |
545
|
|
|
|
|
|
|
|
546
|
49
|
100
|
|
|
|
146
|
if ( not $field->subfield( "a" ) ) { |
547
|
1
|
|
|
|
|
25
|
$self->warn( "245: Must have a subfield _a." ); |
548
|
|
|
|
|
|
|
} |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
# break subfields into code-data array (so the entire field is in one array) |
551
|
|
|
|
|
|
|
|
552
|
49
|
|
|
|
|
1192
|
my @subfields = $field->subfields(); |
553
|
49
|
|
|
|
|
896
|
my @newsubfields = (); |
554
|
49
|
|
|
|
|
70
|
my $has_sub_6 = 0; |
555
|
|
|
|
|
|
|
|
556
|
49
|
|
|
|
|
162
|
while (my $subfield = pop(@subfields)) { |
557
|
90
|
|
|
|
|
154
|
my ($code, $data) = @$subfield; |
558
|
|
|
|
|
|
|
#check for subfield 6 being present |
559
|
90
|
100
|
|
|
|
161
|
$has_sub_6 = 1 if ($code eq '6'); |
560
|
90
|
|
|
|
|
303
|
unshift (@newsubfields, $code, $data); |
561
|
|
|
|
|
|
|
} # while |
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
# 245 must end in period (may want to make this less restrictive by allowing trailing spaces) |
564
|
|
|
|
|
|
|
#do 2 checks--for final punctuation (MARC21 rule), and for period (LCRI 1.0C, Nov. 2003; LCPS 1.7.1) |
565
|
49
|
100
|
|
|
|
301
|
if ($newsubfields[$#newsubfields] !~ /[.?!]$/) { |
|
|
100
|
|
|
|
|
|
566
|
1
|
|
|
|
|
5
|
$self->warn ( "245: Must end with . (period)."); |
567
|
|
|
|
|
|
|
} |
568
|
|
|
|
|
|
|
elsif($newsubfields[$#newsubfields] =~ /[?!]$/) { |
569
|
2
|
|
|
|
|
7
|
$self->warn ( "245: MARC21 allows ? or ! as final punctuation but LCRI 1.0C, Nov. 2003 (LCPS 1.7.1 for RDA records), requires period."); |
570
|
|
|
|
|
|
|
} |
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
##Check for first subfield |
573
|
|
|
|
|
|
|
#subfield a should be first subfield (or 2nd if subfield '6' is present) |
574
|
49
|
100
|
|
|
|
96
|
if ($has_sub_6) { |
575
|
|
|
|
|
|
|
#make sure there are at least 2 subfields |
576
|
2
|
50
|
|
|
|
5
|
if ($#newsubfields < 3) { |
577
|
0
|
|
|
|
|
0
|
$self->warn ("$tagno: May have too few subfields."); |
578
|
|
|
|
|
|
|
} #if fewer than 2 subfields |
579
|
|
|
|
|
|
|
else { |
580
|
2
|
50
|
|
|
|
5
|
if ($newsubfields[0] ne '6') { |
581
|
0
|
|
|
|
|
0
|
$self->warn ( "$tagno: First subfield must be _6, but it is $newsubfields[0]"); |
582
|
|
|
|
|
|
|
} #if 1st subfield not '6' |
583
|
2
|
50
|
|
|
|
4
|
if ($newsubfields[2] ne 'a') { |
584
|
0
|
|
|
|
|
0
|
$self->warn ( "$tagno: First subfield after subfield _6 must be _a, but it is _$newsubfields[2]"); |
585
|
|
|
|
|
|
|
} #if 2nd subfield not 'a' |
586
|
|
|
|
|
|
|
} #else at least 2 subfields |
587
|
|
|
|
|
|
|
} #if has subfield 6 |
588
|
|
|
|
|
|
|
else { |
589
|
|
|
|
|
|
|
#1st subfield must be 'a' |
590
|
47
|
100
|
|
|
|
119
|
if ($newsubfields[0] ne 'a') { |
591
|
1
|
|
|
|
|
5
|
$self->warn ( "$tagno: First subfield must be _a, but it is _$newsubfields[0]"); |
592
|
|
|
|
|
|
|
} #if 2nd subfield not 'a' |
593
|
|
|
|
|
|
|
} #else no subfield _6 |
594
|
|
|
|
|
|
|
##End check for first subfield |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
#subfield c, if present, must be preceded by / |
597
|
|
|
|
|
|
|
#also look for space between initials |
598
|
49
|
100
|
|
|
|
134
|
if ($field->subfield("c")) { |
599
|
|
|
|
|
|
|
|
600
|
14
|
|
|
|
|
367
|
for (my $index = 2; $index <=$#newsubfields; $index+=2) { |
601
|
|
|
|
|
|
|
# 245 subfield c must be preceded by / (space-/) |
602
|
17
|
100
|
|
|
|
46
|
if ($newsubfields[$index] eq 'c') { |
603
|
14
|
100
|
|
|
|
65
|
$self->warn ( "245: Subfield _c must be preceded by /") if ($newsubfields[$index-1] !~ /\s\/$/); |
604
|
|
|
|
|
|
|
# 245 subfield c initials should not have space |
605
|
14
|
100
|
66
|
|
|
70
|
$self->warn ( "245: Subfield _c initials should not have a space.") if (($newsubfields[$index+1] =~ /\b\w\. \b\w\./) && ($newsubfields[$index+1] !~ /\[\bi\.e\. \b\w\..*\]/)); |
606
|
14
|
|
|
|
|
31
|
last; |
607
|
|
|
|
|
|
|
} #if |
608
|
|
|
|
|
|
|
} #for |
609
|
|
|
|
|
|
|
} # subfield c exists |
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
#each subfield b, if present, should be preceded by :;= (colon, semicolon, or equals sign) |
612
|
|
|
|
|
|
|
### Are there others? ### |
613
|
49
|
100
|
|
|
|
761
|
if ($field->subfield("b")) { |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
# 245 subfield b should be preceded by space-:;= (colon, semicolon, or equals sign) |
616
|
13
|
|
|
|
|
307
|
for (my $index = 2; $index <=$#newsubfields; $index+=2) { |
617
|
|
|
|
|
|
|
#report error if subfield 'b' is not preceded by space-:;= (colon, semicolon, or equals sign) |
618
|
16
|
100
|
100
|
|
|
108
|
if (($newsubfields[$index] eq 'b') && ($newsubfields[$index-1] !~ / [:;=]$/)) { |
619
|
4
|
|
|
|
|
10
|
$self->warn ( "245: Subfield _b should be preceded by space-colon, space-semicolon, or space-equals sign."); |
620
|
|
|
|
|
|
|
} #if |
621
|
|
|
|
|
|
|
} #for |
622
|
|
|
|
|
|
|
} # subfield b exists |
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
#each subfield h, if present, should be preceded by non-space |
626
|
49
|
100
|
|
|
|
753
|
if ($field->subfield("h")) { |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
# 245 subfield h should not be preceded by space |
629
|
4
|
|
|
|
|
94
|
for (my $index = 2; $index <=$#newsubfields; $index+=2) { |
630
|
|
|
|
|
|
|
#report error if subfield 'h' is preceded by space (unless dash-space) |
631
|
6
|
100
|
100
|
|
|
59
|
if (($newsubfields[$index] eq 'h') && ($newsubfields[$index-1] !~ /(\S$)|(\-\- $)/)) { |
632
|
1
|
|
|
|
|
6
|
$self->warn ( "245: Subfield _h should not be preceded by space."); |
633
|
|
|
|
|
|
|
} #if h and not preceded by no-space (unless dash) |
634
|
|
|
|
|
|
|
#report error if subfield 'h' does not start with open square bracket with a matching close bracket |
635
|
|
|
|
|
|
|
##could have check against list of valid values here |
636
|
6
|
100
|
100
|
|
|
40
|
if (($newsubfields[$index] eq 'h') && ($newsubfields[$index+1] !~ /^\[\w*\s*\w*\]/)) { |
637
|
1
|
|
|
|
|
6
|
$self->warn ( "245: Subfield _h must have matching square brackets, $newsubfields[$index]."); |
638
|
|
|
|
|
|
|
} |
639
|
|
|
|
|
|
|
} #for |
640
|
|
|
|
|
|
|
} # subfield h exists |
641
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
#each subfield n, if present, must be preceded by . (period) |
643
|
49
|
100
|
|
|
|
935
|
if ($field->subfield("n")) { |
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
# 245 subfield n must be preceded by . (period) |
646
|
4
|
|
|
|
|
92
|
for (my $index = 2; $index <=$#newsubfields; $index+=2) { |
647
|
|
|
|
|
|
|
#report error if subfield 'n' is not preceded by non-space-period or dash-space-period |
648
|
6
|
100
|
100
|
|
|
47
|
if (($newsubfields[$index] eq 'n') && ($newsubfields[$index-1] !~ /(\S\.$)|(\-\- \.$)/)) { |
649
|
1
|
|
|
|
|
5
|
$self->warn ( "245: Subfield _n must be preceded by . (period)."); |
650
|
|
|
|
|
|
|
} #if |
651
|
|
|
|
|
|
|
} #for |
652
|
|
|
|
|
|
|
} # subfield n exists |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
#each subfield p, if present, must be preceded by a , (no-space-comma) if it follows subfield n, or by . (no-space-period or dash-space-period) following other subfields |
655
|
49
|
100
|
|
|
|
926
|
if ($field->subfield("p")) { |
656
|
|
|
|
|
|
|
|
657
|
|
|
|
|
|
|
# 245 subfield p must be preceded by . (period) or , (comma) |
658
|
4
|
|
|
|
|
90
|
for (my $index = 2; $index <=$#newsubfields; $index+=2) { |
659
|
|
|
|
|
|
|
#only looking for subfield p |
660
|
6
|
100
|
|
|
|
16
|
if ($newsubfields[$index] eq 'p') { |
661
|
|
|
|
|
|
|
# case for subfield 'n' being field before this one (allows dash-space-comma) |
662
|
4
|
100
|
100
|
|
|
51
|
if (($newsubfields[$index-2] eq 'n') && ($newsubfields[$index-1] !~ /(\S,$)|(\-\- ,$)/)) { |
|
|
100
|
100
|
|
|
|
|
663
|
1
|
|
|
|
|
3
|
$self->warn ( "245: Subfield _p must be preceded by , (comma) when it follows subfield _n."); |
664
|
|
|
|
|
|
|
} #if subfield n precedes this one |
665
|
|
|
|
|
|
|
# elsif case for subfield before this one is not n |
666
|
|
|
|
|
|
|
elsif (($newsubfields[$index-2] ne 'n') && ($newsubfields[$index-1] !~ /(\S\.$)|(\-\- \.$)/)) { |
667
|
1
|
|
|
|
|
5
|
$self->warn ( "245: Subfield _p must be preceded by . (period) when it follows a subfield other than _n."); |
668
|
|
|
|
|
|
|
} #elsif subfield p preceded by non-period when following a non-subfield 'n' |
669
|
|
|
|
|
|
|
} #if index is looking at subfield p |
670
|
|
|
|
|
|
|
} #for |
671
|
|
|
|
|
|
|
} # subfield p exists |
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
###################################### |
674
|
|
|
|
|
|
|
#check for invalid 2nd indicator |
675
|
49
|
|
|
|
|
971
|
$self->_check_article($field); |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
} # check_245 |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
############ |
683
|
|
|
|
|
|
|
# Internal # |
684
|
|
|
|
|
|
|
############ |
685
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
=head2 _check_article |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
Check of articles is based on code from Ian Hamilton. This version is more |
689
|
|
|
|
|
|
|
limited in that it focuses on English, Spanish, French, Italian and German |
690
|
|
|
|
|
|
|
articles. Certain possible articles have been removed if they are valid English |
691
|
|
|
|
|
|
|
non-articles. This version also disregards 008_language/041 codes and just uses |
692
|
|
|
|
|
|
|
the list of articles to provide warnings/suggestions. |
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
source for articles = L |
695
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
Should work with fields 130, 240, 245, 440, 630, 730, and 830. Reports error if |
697
|
|
|
|
|
|
|
another field is passed in. |
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
=cut |
700
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
sub _check_article { |
702
|
|
|
|
|
|
|
|
703
|
49
|
|
|
49
|
|
76
|
my $self = shift; |
704
|
49
|
|
|
|
|
81
|
my $field = shift; |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
#add articles here as needed |
707
|
|
|
|
|
|
|
##Some omitted due to similarity with valid words (e.g. the German 'die'). |
708
|
49
|
|
|
|
|
744
|
my %article = ( |
709
|
|
|
|
|
|
|
'a' => 'eng glg hun por', |
710
|
|
|
|
|
|
|
'an' => 'eng', |
711
|
|
|
|
|
|
|
'das' => 'ger', |
712
|
|
|
|
|
|
|
'dem' => 'ger', |
713
|
|
|
|
|
|
|
'der' => 'ger', |
714
|
|
|
|
|
|
|
'ein' => 'ger', |
715
|
|
|
|
|
|
|
'eine' => 'ger', |
716
|
|
|
|
|
|
|
'einem' => 'ger', |
717
|
|
|
|
|
|
|
'einen' => 'ger', |
718
|
|
|
|
|
|
|
'einer' => 'ger', |
719
|
|
|
|
|
|
|
'eines' => 'ger', |
720
|
|
|
|
|
|
|
'el' => 'spa', |
721
|
|
|
|
|
|
|
'en' => 'cat dan nor swe', |
722
|
|
|
|
|
|
|
'gl' => 'ita', |
723
|
|
|
|
|
|
|
'gli' => 'ita', |
724
|
|
|
|
|
|
|
'il' => 'ita mlt', |
725
|
|
|
|
|
|
|
'l' => 'cat fre ita mlt', |
726
|
|
|
|
|
|
|
'la' => 'cat fre ita spa', |
727
|
|
|
|
|
|
|
'las' => 'spa', |
728
|
|
|
|
|
|
|
'le' => 'fre ita', |
729
|
|
|
|
|
|
|
'les' => 'cat fre', |
730
|
|
|
|
|
|
|
'lo' => 'ita spa', |
731
|
|
|
|
|
|
|
'los' => 'spa', |
732
|
|
|
|
|
|
|
'os' => 'por', |
733
|
|
|
|
|
|
|
'the' => 'eng', |
734
|
|
|
|
|
|
|
'um' => 'por', |
735
|
|
|
|
|
|
|
'uma' => 'por', |
736
|
|
|
|
|
|
|
'un' => 'cat spa fre ita', |
737
|
|
|
|
|
|
|
'una' => 'cat spa ita', |
738
|
|
|
|
|
|
|
'une' => 'fre', |
739
|
|
|
|
|
|
|
'uno' => 'ita', |
740
|
|
|
|
|
|
|
); |
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
#add exceptions here as needed |
743
|
|
|
|
|
|
|
# may want to make keys lowercase |
744
|
49
|
|
|
|
|
531
|
my %exceptions = ( |
745
|
|
|
|
|
|
|
'A & E' => 1, |
746
|
|
|
|
|
|
|
'A & ' => 1, |
747
|
|
|
|
|
|
|
'A-' => 1, |
748
|
|
|
|
|
|
|
'A+' => 1, |
749
|
|
|
|
|
|
|
'A is ' => 1, |
750
|
|
|
|
|
|
|
'A isn\'t ' => 1, |
751
|
|
|
|
|
|
|
'A l\'' => 1, |
752
|
|
|
|
|
|
|
'A la ' => 1, |
753
|
|
|
|
|
|
|
'A posteriori' => 1, |
754
|
|
|
|
|
|
|
'A priori' => 1, |
755
|
|
|
|
|
|
|
'A to ' => 1, |
756
|
|
|
|
|
|
|
'El Nino' => 1, |
757
|
|
|
|
|
|
|
'El Salvador' => 1, |
758
|
|
|
|
|
|
|
'L is ' => 1, |
759
|
|
|
|
|
|
|
'L-' => 1, |
760
|
|
|
|
|
|
|
'La Salle' => 1, |
761
|
|
|
|
|
|
|
'Las Vegas' => 1, |
762
|
|
|
|
|
|
|
'Lo cual' => 1, |
763
|
|
|
|
|
|
|
'Lo mein' => 1, |
764
|
|
|
|
|
|
|
'Lo que' => 1, |
765
|
|
|
|
|
|
|
'Los Alamos' => 1, |
766
|
|
|
|
|
|
|
'Los Angeles' => 1, |
767
|
|
|
|
|
|
|
); |
768
|
|
|
|
|
|
|
|
769
|
|
|
|
|
|
|
#get tagno to determine which indicator to check and for reporting |
770
|
49
|
|
|
|
|
126
|
my $tagno = $field->tag(); |
771
|
|
|
|
|
|
|
#retrieve tagno from subfield 6 if 880 field |
772
|
49
|
100
|
|
|
|
282
|
if ($tagno eq '880') { |
773
|
1
|
50
|
|
|
|
13
|
if ($field->subfield('6')) { |
774
|
1
|
|
|
|
|
31
|
my $sub6 = $field->subfield('6'); |
775
|
1
|
|
|
|
|
23
|
$tagno = substr($sub6, 0, 3); |
776
|
|
|
|
|
|
|
} #if subfield 6 |
777
|
|
|
|
|
|
|
} #if 880 field |
778
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
#$ind holds nonfiling character indicator value |
780
|
49
|
|
|
|
|
91
|
my $ind = ''; |
781
|
|
|
|
|
|
|
#$first_or_second holds which indicator is for nonfiling char value |
782
|
49
|
|
|
|
|
71
|
my $first_or_second = ''; |
783
|
49
|
50
|
|
|
|
344
|
if ($tagno !~ /^(?:130|240|245|440|630|730|830)$/) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
784
|
0
|
|
|
|
|
0
|
print $tagno, " is not a valid field for article checking\n"; |
785
|
0
|
|
|
|
|
0
|
return; |
786
|
|
|
|
|
|
|
} #if field is not one of those checked for articles |
787
|
|
|
|
|
|
|
#130, 630, 730 => ind1 |
788
|
|
|
|
|
|
|
elsif ($tagno =~ /^(?:130|630|730)$/) { |
789
|
0
|
|
|
|
|
0
|
$ind = $field->indicator(1); |
790
|
0
|
|
|
|
|
0
|
$first_or_second = '1st'; |
791
|
|
|
|
|
|
|
} #if field is 130, 630, or 730 |
792
|
|
|
|
|
|
|
#240, 245, 440, 830 => ind2 |
793
|
|
|
|
|
|
|
elsif ($tagno =~ /^(?:240|245|440|830)$/) { |
794
|
49
|
|
|
|
|
107
|
$ind = $field->indicator(2); |
795
|
49
|
|
|
|
|
553
|
$first_or_second = '2nd'; |
796
|
|
|
|
|
|
|
} #if field is 240, 245, 440, or 830 |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
#report non-numeric non-filing indicators as invalid |
800
|
49
|
50
|
|
|
|
121
|
$self->warn ( $tagno, ": Non-filing indicator is non-numeric" ) unless ($ind =~ /^[0-9]$/); |
801
|
|
|
|
|
|
|
#get subfield 'a' of the title field |
802
|
49
|
|
100
|
|
|
116
|
my $title = $field->subfield('a') || ''; |
803
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
|
805
|
49
|
|
|
|
|
1026
|
my $char1_notalphanum = 0; |
806
|
|
|
|
|
|
|
#check for apostrophe, quote, bracket, or parenthesis, before first word |
807
|
|
|
|
|
|
|
#remove if found and add to non-word counter |
808
|
49
|
|
|
|
|
155
|
while ($title =~ /^["'\[\(*]/){ |
809
|
4
|
|
|
|
|
11
|
$char1_notalphanum++; |
810
|
4
|
|
|
|
|
16
|
$title =~ s/^["'\[\(*]//; |
811
|
|
|
|
|
|
|
} |
812
|
|
|
|
|
|
|
# split title into first word + rest on space, parens, bracket, apostrophe, quote, or hyphen |
813
|
49
|
|
|
|
|
238
|
my ($firstword, $separator, $etc) = $title =~ /^([^ \(\)\[\]'"\-]+)([ \(\)\[\]'"\-])?(.*)/i; |
814
|
49
|
100
|
|
|
|
122
|
$firstword = '' if ! defined( $firstword ); |
815
|
49
|
100
|
|
|
|
82
|
$separator = '' if ! defined( $separator ); |
816
|
49
|
100
|
|
|
|
90
|
$etc = '' if ! defined( $etc ); |
817
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
#get length of first word plus the number of chars removed above plus one for the separator |
819
|
49
|
|
|
|
|
83
|
my $nonfilingchars = length($firstword) + $char1_notalphanum + 1; |
820
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
#check to see if first word is an exception |
822
|
49
|
|
|
|
|
59
|
my $isan_exception = 0; |
823
|
49
|
|
|
|
|
229
|
$isan_exception = grep {$title =~ /^\Q$_\E/i} (keys %exceptions); |
|
1078
|
|
|
|
|
6258
|
|
824
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
#lowercase chars of $firstword for comparison with article list |
826
|
49
|
|
|
|
|
146
|
$firstword = lc($firstword); |
827
|
|
|
|
|
|
|
|
828
|
49
|
|
|
|
|
66
|
my $isan_article = 0; |
829
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
#see if first word is in the list of articles and not an exception |
831
|
49
|
100
|
100
|
|
|
166
|
$isan_article = 1 if (($article{$firstword}) && !($isan_exception)); |
832
|
|
|
|
|
|
|
|
833
|
|
|
|
|
|
|
#if article then $nonfilingchars should match $ind |
834
|
49
|
100
|
|
|
|
101
|
if ($isan_article) { |
835
|
|
|
|
|
|
|
#account for quotes, apostrophes, parens, or brackets before 2nd word |
836
|
|
|
|
|
|
|
# if (($separator eq ' ') && ($etc =~ /^['"]/)) { |
837
|
9
|
100
|
66
|
|
|
38
|
if (($separator) && ($etc =~ /^[ \(\)\[\]'"\-]+/)) { |
838
|
4
|
|
|
|
|
11
|
while ($etc =~ /^[ "'\[\]\(\)*]/){ |
839
|
6
|
|
|
|
|
9
|
$nonfilingchars++; |
840
|
6
|
|
|
|
|
23
|
$etc =~ s/^[ "'\[\]\(\)*]//; |
841
|
|
|
|
|
|
|
} #while etc starts with nonfiling chars |
842
|
|
|
|
|
|
|
} #if separator defined and etc starts with nonfiling chars |
843
|
|
|
|
|
|
|
#special case for 'en' (unsure why) |
844
|
9
|
50
|
|
|
|
72
|
if ($firstword eq 'en') { |
|
|
100
|
|
|
|
|
|
845
|
0
|
0
|
0
|
|
|
0
|
$self->warn ( $tagno, ": First word, , $firstword, may be an article, check $first_or_second indicator ($ind)." ) unless (($ind eq '3') || ($ind eq '0')); |
846
|
|
|
|
|
|
|
} |
847
|
|
|
|
|
|
|
elsif ($nonfilingchars ne $ind) { |
848
|
3
|
|
|
|
|
14
|
$self->warn ( $tagno, ": First word, $firstword, may be an article, check $first_or_second indicator ($ind)." ); |
849
|
|
|
|
|
|
|
} #unless ind is same as length of first word and nonfiling characters |
850
|
|
|
|
|
|
|
} #if first word is in article list |
851
|
|
|
|
|
|
|
#not an article so warn if $ind is not 0 |
852
|
|
|
|
|
|
|
else { |
853
|
40
|
100
|
|
|
|
384
|
unless ($ind eq '0') { |
854
|
1
|
|
|
|
|
7
|
$self->warn ( $tagno, ": First word, $firstword, does not appear to be an article, check $first_or_second indicator ($ind)." ); |
855
|
|
|
|
|
|
|
} #unless ind is 0 |
856
|
|
|
|
|
|
|
} #else not in article list |
857
|
|
|
|
|
|
|
|
858
|
|
|
|
|
|
|
####################################### |
859
|
|
|
|
|
|
|
|
860
|
|
|
|
|
|
|
} #_check_article |
861
|
|
|
|
|
|
|
|
862
|
|
|
|
|
|
|
|
863
|
|
|
|
|
|
|
############ |
864
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
=head1 SEE ALSO |
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
Check the docs for L. All software links are there. |
868
|
|
|
|
|
|
|
|
869
|
|
|
|
|
|
|
=head1 TODO |
870
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
=over 4 |
872
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
=item * Subfield 6 |
874
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
For subfield 6, it should always be the 1st subfield according to MARC 21 specifications. Perhaps a generic check should be added that warns if subfield 6 is not the 1st subfield. |
876
|
|
|
|
|
|
|
|
877
|
|
|
|
|
|
|
=item * Subfield 8. |
878
|
|
|
|
|
|
|
|
879
|
|
|
|
|
|
|
This subfield could be the 1st or 2nd subfield, so the code that checks for the 1st few subfields (check_245, check_250) should take that into account. |
880
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
=item * Subfield 9 |
882
|
|
|
|
|
|
|
|
883
|
|
|
|
|
|
|
This subfield is not officially allowed in MARC, since it is locally defined. Some way needs to be made to allow messages/warnings about this subfield to be turned off (or otherwise deal with records using/allowing locally defined subfield 9). |
884
|
|
|
|
|
|
|
|
885
|
|
|
|
|
|
|
=item * 008 length and presence check |
886
|
|
|
|
|
|
|
|
887
|
|
|
|
|
|
|
Currently, 008 validation is not implemented in MARC::Lint, but is left to MARC::Errorchecks. It might be useful if MARC::Lint's basic validation checks included a verification that the 008 exists and is exactly 40 characters long. Additional 008-related checking and byte validation would remain in MARC::Errorchecks. |
888
|
|
|
|
|
|
|
|
889
|
|
|
|
|
|
|
=item * ISBN and ISSN checking |
890
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
020 and 022 fields are validated with the C and |
892
|
|
|
|
|
|
|
C modules, respectively. Business::ISBN versions between 2 and |
893
|
|
|
|
|
|
|
2.02_01 are incompatible with MARC::Lint. |
894
|
|
|
|
|
|
|
|
895
|
|
|
|
|
|
|
=item * check_041 cleanup |
896
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
Splitting subfield code strings every 3 chars could probably be written more efficiently. |
898
|
|
|
|
|
|
|
|
899
|
|
|
|
|
|
|
=item * check_245 cleanup |
900
|
|
|
|
|
|
|
|
901
|
|
|
|
|
|
|
The article checking in particular. |
902
|
|
|
|
|
|
|
|
903
|
|
|
|
|
|
|
=item * Method for turning off checks |
904
|
|
|
|
|
|
|
|
905
|
|
|
|
|
|
|
Provide a way for users to skip checks more easily when using check_record, or a |
906
|
|
|
|
|
|
|
specific check_xxx method (e.g. skip article checking). |
907
|
|
|
|
|
|
|
|
908
|
|
|
|
|
|
|
=back |
909
|
|
|
|
|
|
|
|
910
|
|
|
|
|
|
|
=head1 LICENSE |
911
|
|
|
|
|
|
|
|
912
|
|
|
|
|
|
|
This code may be distributed under the same terms as Perl itself. |
913
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
Please note that these modules are not products of or supported by the |
915
|
|
|
|
|
|
|
employers of the various contributors to the code. |
916
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
=cut |
918
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
# Used only to read the stuff from __DATA__ |
920
|
|
|
|
|
|
|
sub _read_rules { |
921
|
6
|
|
|
6
|
|
14
|
my $self = shift; |
922
|
|
|
|
|
|
|
|
923
|
6
|
|
|
|
|
24
|
my $tell = tell(DATA); # Stash the position so we can reset it for next time |
924
|
|
|
|
|
|
|
|
925
|
6
|
|
|
|
|
31
|
local $/ = ""; |
926
|
6
|
|
|
|
|
129
|
while ( my $tagblock = ) { |
927
|
1464
|
|
|
|
|
6942
|
my @lines = split( /\n/, $tagblock ); |
928
|
1464
|
|
|
|
|
17880
|
s/\s+$// for @lines; |
929
|
|
|
|
|
|
|
|
930
|
1464
|
100
|
|
|
|
3270
|
next unless @lines >= 4; # Some of our entries are tag-only |
931
|
|
|
|
|
|
|
|
932
|
1362
|
|
|
|
|
2255
|
my $tagline = shift @lines; |
933
|
1362
|
|
|
|
|
4142
|
my @keyvals = split( /\s+/, $tagline, 3 ); |
934
|
1362
|
|
|
|
|
2265
|
my $tagno = shift @keyvals; |
935
|
1362
|
|
|
|
|
1870
|
my $repeatable = shift @keyvals; |
936
|
|
|
|
|
|
|
|
937
|
1362
|
|
|
|
|
2875
|
$self->_parse_tag_rules( $tagno, $repeatable, @lines ); |
938
|
|
|
|
|
|
|
} # while |
939
|
|
|
|
|
|
|
|
940
|
|
|
|
|
|
|
# Set the pointer back to where it was, in case we do this again |
941
|
6
|
|
|
|
|
97
|
seek( DATA, $tell, 0 ); |
942
|
|
|
|
|
|
|
} |
943
|
|
|
|
|
|
|
|
944
|
|
|
|
|
|
|
sub _parse_tag_rules { |
945
|
1362
|
|
|
1362
|
|
1834
|
my $self = shift; |
946
|
1362
|
|
|
|
|
1739
|
my $tagno = shift; |
947
|
1362
|
|
|
|
|
1635
|
my $repeatable = shift; |
948
|
1362
|
|
|
|
|
4234
|
my @lines = @_; |
949
|
|
|
|
|
|
|
|
950
|
1362
|
|
50
|
|
|
7992
|
my $rules = ($self->{_rules}->{$tagno} ||= {}); |
951
|
1362
|
|
|
|
|
2861
|
$rules->{'repeatable'} = $repeatable; |
952
|
|
|
|
|
|
|
|
953
|
1362
|
|
|
|
|
2062
|
for my $line ( @lines ) { |
954
|
17190
|
|
|
|
|
50461
|
my @keyvals = split( /\s+/, $line, 3 ); |
955
|
17190
|
|
|
|
|
26909
|
my $key = shift @keyvals; |
956
|
17190
|
|
|
|
|
23211
|
my $val = shift @keyvals; |
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
# Do magic for indicators |
959
|
17190
|
100
|
|
|
|
28758
|
if ( $key =~ /^ind/ ) { |
960
|
2724
|
|
|
|
|
5017
|
$rules->{$key} = $val; |
961
|
|
|
|
|
|
|
|
962
|
2724
|
|
|
|
|
3535
|
my $desc; |
963
|
|
|
|
|
|
|
my $regex; |
964
|
|
|
|
|
|
|
|
965
|
2724
|
100
|
|
|
|
4256
|
if ( $val eq "blank" ) { |
966
|
1674
|
|
|
|
|
2039
|
$desc = "blank"; |
967
|
1674
|
|
|
|
|
5028
|
$regex = qr/^ $/; |
968
|
|
|
|
|
|
|
} else { |
969
|
1050
|
|
|
|
|
1717
|
$desc = _nice_list($val); |
970
|
1050
|
|
|
|
|
2419
|
$val =~ s/^b/ /; |
971
|
1050
|
|
|
|
|
12108
|
$regex = qr/^[$val]$/; |
972
|
|
|
|
|
|
|
} |
973
|
|
|
|
|
|
|
|
974
|
2724
|
|
|
|
|
6720
|
$rules->{$key."_desc"} = $desc; |
975
|
2724
|
|
|
|
|
6086
|
$rules->{$key."_regex"} = $regex; |
976
|
|
|
|
|
|
|
} # if indicator |
977
|
|
|
|
|
|
|
else { |
978
|
14466
|
50
|
|
|
|
19686
|
if ( $key =~ /(.)-(.)/ ) { |
979
|
0
|
|
|
|
|
0
|
my ($min,$max) = ($1,$2); |
980
|
0
|
|
|
|
|
0
|
$rules->{$_} = $val for ($min..$max); |
981
|
|
|
|
|
|
|
} else { |
982
|
14466
|
|
|
|
|
39826
|
$rules->{$key} = $val; |
983
|
|
|
|
|
|
|
} |
984
|
|
|
|
|
|
|
} # not an indicator |
985
|
|
|
|
|
|
|
} # for $line |
986
|
|
|
|
|
|
|
} |
987
|
|
|
|
|
|
|
|
988
|
|
|
|
|
|
|
|
989
|
|
|
|
|
|
|
sub _nice_list { |
990
|
1050
|
|
|
1050
|
|
1355
|
my $str = shift; |
991
|
|
|
|
|
|
|
|
992
|
1050
|
100
|
|
|
|
2471
|
if ( $str =~ s/(\d)-(\d)/$1 thru $2/ ) { |
993
|
60
|
|
|
|
|
202
|
return $str; |
994
|
|
|
|
|
|
|
} |
995
|
|
|
|
|
|
|
|
996
|
990
|
|
|
|
|
2380
|
my @digits = split( //, $str ); |
997
|
990
|
100
|
|
|
|
1899
|
$digits[0] = "blank" if $digits[0] eq "b"; |
998
|
990
|
|
|
|
|
1413
|
my $last = pop @digits; |
999
|
990
|
|
|
|
|
3149
|
return join( ", ", @digits ) . " or $last"; |
1000
|
|
|
|
|
|
|
} |
1001
|
|
|
|
|
|
|
|
1002
|
|
|
|
|
|
|
sub _ind_regex { |
1003
|
0
|
|
|
0
|
|
|
my $str = shift; |
1004
|
|
|
|
|
|
|
|
1005
|
0
|
0
|
|
|
|
|
return qr/^ $/ if $str eq "blank"; |
1006
|
|
|
|
|
|
|
|
1007
|
0
|
|
|
|
|
|
return qr/^[$str]$/; |
1008
|
|
|
|
|
|
|
} |
1009
|
|
|
|
|
|
|
|
1010
|
|
|
|
|
|
|
|
1011
|
|
|
|
|
|
|
1; |
1012
|
|
|
|
|
|
|
|
1013
|
|
|
|
|
|
|
__DATA__ |