File Coverage

blib/lib/MARC/Errorchecks.pm
Criterion Covered Total %
statement 863 1189 72.5
branch 423 878 48.1
condition 63 260 24.2
subroutine 43 45 95.5
pod 28 29 96.5
total 1420 2401 59.1


line stmt bran cond sub pod time code
1             #!perl
2              
3             package MARC::Errorchecks;
4              
5 5     5   171784 use strict;
  5         12  
  5         226  
6 5     5   31 use warnings;
  5         9  
  5         220  
7              
8 5     5   28 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
  5         13  
  5         675  
9              
10             require Exporter;
11              
12             @ISA = qw(Exporter);
13             # Items to export into callers namespace by default. @EXPORT = qw();
14              
15             $VERSION = 1.18;
16              
17             =head1 NAME
18              
19             MARC::Errorchecks -- Collection of MARC 21/AACR2 error checks
20              
21             =head1 DESCRIPTION
22              
23             Module for storing MARC error checking subroutines,
24             based on MARC 21, AACR2, and LCRIs.
25             These are used to find errors not easily checked by
26             the MARC::Lint and MARC::Lintadditions modules,
27             such as those that cross field boundaries.
28              
29             Each subroutine should generally be passed a MARC::Record object.
30              
31             Returned warnings/errors are generated as follows:
32             push @warningstoreturn, join '', ($field->tag(), ": [ERROR TEXT]\t");
33             return \@warningstoreturn;
34              
35             =head1 SYNOPSIS
36              
37             use MARC::Batch;
38             use MARC::Errorchecks;
39              
40             #See also MARC::Lintadditions for more checks
41             #use MARC::Lintadditions;
42              
43             #change file names as desired
44             my $inputfile = 'marcfile.mrc';
45             my $errorfilename = 'errors.txt';
46             my $errorcount = 0;
47             open (OUT, ">$errorfilename");
48             #initialize $infile as new MARC::Batch object
49             my $batch = MARC::Batch->new('USMARC', "$inputfile");
50             my $errorcount = 0;
51             #loop through batch file of records
52             while (my $record = $batch->next()) {
53             #if $record->field('001') #add this if some records in file do not contain an '001' field
54             my $controlno = $record->field('001')->as_string(); #call MARC::Errorchecks subroutines
55              
56             my @errorstoreturn = ();
57              
58             # check everything
59              
60             push @errorstoreturn, (@{MARC::Errorchecks::check_all_subs($record)});
61              
62             # or only a few
63             push @errorstoreturn, (@{MARC::Errorchecks::check_010($record)});
64             push @errorstoreturn, (@{MARC::Errorchecks::check_bk008_vs_bibrefandindex($record)});
65              
66             # report results
67             if (@errorstoreturn){
68             #########################################
69             print OUT join( "\t", "$controlno", @errorstoreturn, "\t\n");
70              
71             $errorcount++;
72             }
73              
74             } #while
75              
76             =head1 TO DO
77              
78             Maintain check-all subroutine, a wrapper that calls all the subroutines in Errorchecks, to simplify calling code in .pl.
79              
80             Determine whether extra tabs are being added to warnings.
81             Examine how warnings are returned and see if a better way is available.
82              
83             Add functionality.
84              
85             -Ending punctuation (in Lintadditions.pm, and 300 dealt with here, and now 5xx (some)).
86             -Matching brackets and parentheses in fields?
87             -Geographical headings miscoded as subjects.
88            
89             Possibly rewrite as object-oriented?
90             If not, optimize this and the Lintadditions.pm checks.
91             Example: reduce number of repeated breaking-out of fields into subfield parts.
92             So, subroutines that look for double spaces and double punctuation might be combined.
93              
94             Remove local practice code or facilitate its modification/customization.
95              
96             Deal with other TO DO items found below.
97             This includes fixing problem of "bibliographical references" being required if 008 contents has 'b'.
98              
99             =cut
100              
101             #########################################
102             ########## Initial includes #############
103             #########################################
104              
105 5     5   3251 use MARC::Record;
  5         29509  
  5         86422  
106              
107             #########################################
108             #########################################
109             #########################################
110              
111             #########################################
112              
113             =head2 check_all_subs
114              
115             Calls each error-checking subroutine in Errorchecks.
116             Gathers all errors and returns those errors in an array (reference).
117              
118             =head2 TO DO (check_all_subs)
119              
120             Make sure to update this subroutine as additional subroutines are added.
121              
122             =cut
123              
124             sub check_all_subs {
125              
126 1     1 1 3454 my $record = shift;
127 1         3 my @errorstoreturn = ();
128              
129             #call each subroutine and add its errors to @errorstoreturn
130              
131 1         3 push @errorstoreturn, (@{check_internal_spaces($record)});
  1         5  
132              
133 1         3 push @errorstoreturn, (@{check_trailing_spaces($record)});
  1         5  
134              
135 1         2 push @errorstoreturn, (@{check_double_periods($record)});
  1         4  
136              
137 1         2 push @errorstoreturn, (@{check_006($record)});
  1         5  
138              
139 1         3 push @errorstoreturn, (@{check_008($record)});
  1         15  
140              
141 1         2 push @errorstoreturn, (@{check_010($record)});
  1         5  
142              
143 1         6 push @errorstoreturn, (@{check_end_punct_300($record)});
  1         5  
144              
145 1         2 push @errorstoreturn, (@{check_bk008_vs_300($record)});
  1         4  
146              
147 1         2 push @errorstoreturn, (@{check_490vs8xx($record)});
  1         5  
148              
149 1         3 push @errorstoreturn, (@{check_240ind1vs1xx($record)});
  1         4  
150              
151 1         3 push @errorstoreturn, (@{check_245ind1vs1xx($record)});
  1         4  
152              
153 1         2 push @errorstoreturn, (@{matchpubdates($record)});
  1         4  
154              
155 1         2 push @errorstoreturn, (@{check_bk008_vs_bibrefandindex($record)});
  1         18  
156              
157 1         73 push @errorstoreturn, (@{check_041vs008lang($record)});
  1         5  
158              
159 1         3 push @errorstoreturn, (@{check_5xxendingpunctuation($record)});
  1         6  
160              
161 1         2 push @errorstoreturn, (@{findfloatinghyphens($record)});
  1         4  
162              
163 1         2 push @errorstoreturn, (@{check_floating_punctuation($record)});
  1         4  
164              
165 1         3 push @errorstoreturn, (@{video007vs300vs538($record)});
  1         4  
166              
167 1         2 push @errorstoreturn, (@{ldrvalidate($record)});
  1         5  
168              
169 1         2 push @errorstoreturn, (@{geogsubjvs043($record)});
  1         5  
170              
171 1         3 push @errorstoreturn, (@{findemptysubfields($record)});
  1         4  
172              
173 1         3 push @errorstoreturn, (@{check_040present($record)});
  1         4  
174              
175 1         2 push @errorstoreturn, (@{check_nonpunctendingfields($record)});
  1         5  
176              
177             #push @errorstoreturn, (@{check_fieldlength($record)});
178              
179              
180              
181             ## add more here ##
182             ##push @errorstoreturn, (@{});
183              
184 1         15 return \@errorstoreturn;
185              
186             } # check_all_subs
187              
188              
189             #########################################
190             #########################################
191             #########################################
192             #########################################
193              
194              
195              
196             #########################################
197             #########################################
198             #########################################
199             #########################################
200              
201             =head2 is_RDA($record)
202              
203             Checks to see if record is coded as an RDA record or not (based on 040$e).
204              
205             =cut
206              
207             sub is_RDA {
208              
209             #get passed MARC::Record object
210 61     61 1 82 my $record = shift;
211 61         86 my $is_RDA_record = 0;
212              
213             #declaration of return array
214 61 100       194 if ($record->field('040')) {
215 5         274 my $field040 = $record->field('040');
216 5 50       258 if ($field040->subfield('e')) {
217 0 0       0 if ($field040->subfield('e') =~ /^rda$/) {
218 0         0 $is_RDA_record = 1;
219             }#if 040 is rda
220             } #if 040 has subfield e
221             } #if 040
222              
223 61         2446 return $is_RDA_record;
224              
225             } #is_RDA($record)
226              
227             #########################################
228             #########################################
229             #########################################
230             #########################################
231              
232             =head2 check_double_periods($record)
233              
234             Looks for more than one period within subfields after 010.
235             Exception: Exactly 3 periods together are treated as ellipses.
236              
237             Looks for multiple commas.
238              
239             =head2 TO DO (check_double_periods)
240              
241             Find exceptions where double periods may be allowed.
242             Find exceptions where more than 3 periods can be next to each other.
243             Find exceptions where double commas are allowed (URI subfields, 856 field).
244              
245             Deal with the exceptions. Currently, skips 856 field completely. Needs to skip URI subfields.
246              
247             =cut
248              
249             sub check_double_periods {
250              
251             #get passed MARC::Record object
252 1     1 1 3 my $record = shift;
253             #declaration of return array
254 1         3 my @warningstoreturn = ();
255              
256              
257             #get all fields in record
258 1         3 my @fields = $record->fields();
259              
260 1         12 foreach my $field (@fields) {
261 34         92 my $tag = $field->tag();
262             #skip non-numeric tags
263 34 50       217 next unless ($tag =~ /^[0-9][0-9][0-9]$/);
264             #skip tags lower than 011
265 34 100       74 next if ($tag <= 10);
266             #skip 856
267 31 50       63 next if ($tag eq '856');
268 31         78 my @subfields = $field->subfields();
269 31         497 my @newsubfields = ();
270              
271             #break subfields into code-data array (so the entire field is in one array)
272 31         78 while (my $subfield = pop(@subfields)) {
273 39         58 my ($code, $data) = @$subfield;
274 39         157 unshift (@newsubfields, $code, $data);
275             } # while
276              
277             #examine data portion of each subfield
278 31         86 for (my $index = 1; $index <=$#newsubfields; $index+=2) {
279 39         62 my $subdata = $newsubfields[$index];
280             #report subfield data with more than one period but not exactly 3
281 39 100 100     108 if (($subdata =~ /\.\.+/) && ($subdata !~ /\.\.\.[^\.]*/)) {
282              
283 1         4 push @warningstoreturn, join '', ($tag, ": has multiple consecutive periods that do not appear to be ellipses.");
284              
285             } #if has multiple periods
286             #report subfield data with more than one comma
287 39 50       190 if ($subdata =~ /\,\,+/) {
288              
289 0         0 push @warningstoreturn, join '', ($tag, ": has multiple consecutive commas.");
290              
291             } #if has multiple commas
292             } #for each subfield
293             } #for each field
294              
295 1         7 return \@warningstoreturn;
296              
297              
298             } # check_double_periods
299              
300             #########################################
301             #########################################
302             #########################################
303             #########################################
304              
305             =head2 check_internal_spaces($record)
306              
307             Looks for more than one space within subfields after 010.
308             Ignores 035 field, since multiple spaces could be allowed.
309             Accounts for extra spaces between angle brackets for open date in 260c. Current version allows extra spaces in any 260 subfield containing angle brackets.
310              
311              
312             =head2 TO DO (check_internal_spaces)
313              
314             Account for non-numeric tags? Will likely complain for non-numeric tags in a record, since comparisons rely upon numeric tag checking.
315              
316             =cut
317              
318             sub check_internal_spaces {
319              
320             #get passed MARC::Record object
321 1     1 1 3 my $record = shift;
322             #declaration of return array
323 1         2 my @warningstoreturn = ();
324              
325             #get all fields in record
326 1         7 my @fields = $record->fields();
327              
328 1         14 foreach my $field (@fields) {
329 34         85 my $tag = $field->tag();
330             #skip non-numeric tags
331 34 50       219 next unless ($tag =~ /^[0-9][0-9][0-9]$/);
332             #skip tags lower than 011
333 34 100       71 next if ($tag <= 10);
334             #skip 035 field as well
335 31 50       62 next if ($tag eq '035');
336             #skip 787 field as well
337 31 50       59 next if ($tag eq '787');
338              
339 31         86 my @subfields = $field->subfields();
340 31         461 my @newsubfields = ();
341              
342             #break subfields into code-data array (so the entire field is in one array)
343 31         89 while (my $subfield = pop(@subfields)) {
344 39         59 my ($code, $data) = @$subfield;
345 39         154 unshift (@newsubfields, $code, $data);
346             } # while
347              
348             #examine data portion of each subfield
349 31         79 for (my $index = 1; $index <=$#newsubfields; $index+=2) {
350 39         57 my $subdata = $newsubfields[$index];
351              
352             #report subfield data with more than one space
353 39 100       128 if (my @internal_spaces = ($subdata =~ /(.{0,10} +?.{0,10})/g)) {
354             #warn, with exception for 260c with open date in angle brackets
355 2 50 33     13 push @warningstoreturn, join '', ($tag, ": has multiple internal spaces (", (join '_', @internal_spaces), ").") unless (($tag eq '260') && ($subdata =~ /\<.*?\>/));
356             } #if has multiple spaces
357              
358              
359             ########################################
360             ### added check for space at beginning of field
361             ########################################
362 39 100       184 if ($subdata =~ /^ /) {
363             #skip 016 field
364 1 50       4 return \@warningstoreturn if ($tag eq '016');
365 1         7 push @warningstoreturn, join '', ($tag, ": Subfield starts with a space.");
366             } #if has multiple spaces
367             ########################################
368             ########################################
369              
370             } #for each subfield
371             } #for each field
372              
373 1         6 return \@warningstoreturn;
374              
375             } # check_internal_spaces
376              
377             #########################################
378             #########################################
379             #########################################
380             #########################################
381              
382             =head2 check_trailing_spaces($record)
383              
384             Looks for extra spaces at the end of fields greater than 010.
385             Ignores 016 extra space at end.
386              
387             =head2 TO DO (check_trailing_spaces)
388              
389             Rewrite to incorporate 010 and 016 space checking.
390              
391             Consider allowing trailing spaces in 035 field.
392              
393             =cut
394              
395             sub check_trailing_spaces {
396              
397             #get passed MARC::Record object
398 1     1 1 2 my $record = shift;
399             #declaration of return array
400 1         2 my @warningstoreturn = ();
401              
402             #look at each field in record
403 1         5 foreach my $field ($record->fields()) {
404 34         91 my $tag = $field->tag();
405             #skip non-numeric tags
406 34 50       206 next unless ($tag =~ /^[0-9][0-9][0-9]$/);
407             #skip control fields and LCCN (010)
408 34 100       80 next if ($tag <= 10);
409             #skip 016 fields
410 31 50       61 next if ($tag eq '016');
411              
412             #create array holding arrayrefs for subfield code and data
413 31         72 my @subfields= $field->subfields();
414              
415             #look at data in last subfield
416 31         463 my $lastsubfield = pop (@subfields);
417              
418             #each $subfield is an array ref containing a subfield code character and subfield data
419 31         51 my ($code, $data) = @$lastsubfield;
420              
421             #look for one or more instances of spaces at end of subfield data
422 31 100       152 if ($data =~ /\s+$/) {
423             #field had extra spaces
424 2         9 push @warningstoreturn, join '', ($tag, ": has trailing spaces.");
425             } #if had extra spaces
426             } #foreach field
427              
428 1         6 return \@warningstoreturn;
429              
430             } # check_trailing_spaces
431              
432             #########################################
433             #########################################
434             #########################################
435             #########################################
436              
437             =head2 check_006($record)
438              
439             Code for validating 006s in MARC records.
440             Validates each byte of the 006, based on #MARC::Errorchecks::validate008($field008, $mattype, $biblvl)
441              
442             =head2 TO DO (check_006)
443              
444             Use validate008 subroutine:
445             -Break byte 18-34 checking into separate sub so it can be used for 006 validation as well.
446             -Optimize efficiency.
447              
448            
449             =cut
450              
451             sub check_006 {
452              
453             #get passed MARC::Record object
454 2     2 1 3159 my $record = shift;
455             #declaration of return array
456 2         6 my @warningstoreturn = ();
457              
458             #get 006 fields from record
459 2 100       13 my @fields006 = $record->field('006') if $record->field('006');
460             #done if no 006
461 2 100       718 return \@warningstoreturn unless (@fields006);
462              
463 1         3 FIELD: foreach my $field006 (@fields006) {
464 54         130 my $field006_string = $field006->as_string();
465 54 100       523 unless (length($field006_string) eq 18) {
466 2         14 my $length006 = length($field006_string);
467 2         7 push @warningstoreturn, "006: Must be 18 bytes long but is $length006 bytes long ($field006_string).";
468 2         4 next FIELD;
469              
470             } #unless 18 bytes
471             else {
472             #call _validate006 subroutine from Errorchecks.pm (this package)
473 52         51 push @warningstoreturn, @{MARC::Errorchecks::_validate006($field006_string)};
  52         63  
474              
475             } #else 18 bytes
476             } #foreach 006
477            
478 1         13 return \@warningstoreturn;
479              
480             } # check_006
481              
482             #########################################
483             #########################################
484             #########################################
485             #########################################
486              
487             =head2 check_008($record)
488              
489             Code for validating 008s in MARC records.
490             Validates each byte of the 008, based on MARC::Errorchecks::validate008($field008, $mattype, $biblvl)
491              
492             =head2 TO DO (check_008)
493              
494             Improve validate008 subroutine (see that sub for more information):
495             -Break byte 18-34 checking into separate sub so it can be used for 006 validation as well.
496             -Optimize efficiency.
497              
498             Revised 12-2-2004 to use new validate008() sub.
499            
500             =cut
501              
502             sub check_008 {
503              
504             #get passed MARC::Record object
505 1     1 1 2 my $record = shift;
506             #declaration of return array
507 1         3 my @warningstoreturn = ();
508              
509             # set variables needed for 008 validation
510 1         4 my $leader = $record->leader();
511             #$mattype and $biblvl are from LDR/06 and LDR/07
512 1         11 my $mattype = substr($leader, 6, 1);
513 1         4 my $biblvl = substr($leader, 7, 1);
514 1 50       4 my $field008 = $record->field('008')->as_string() if $record->field('008');
515            
516             #report missing 008 field
517 1 50       102 unless ($field008) {
518 0         0 push @warningstoreturn, ("008: Record lacks 008 field") ;
519 0         0 return \@warningstoreturn;
520             } #unless field 008 exists
521            
522             #call validate008 subroutine from Errorchecks.pm (this package)
523 1         2 @warningstoreturn = @{MARC::Errorchecks::validate008($field008, $mattype, $biblvl)};
  1         6  
524              
525 1         7 return \@warningstoreturn;
526              
527             } # check_008
528              
529             #########################################
530             #########################################
531             #########################################
532             #########################################
533              
534             =head2 check_010($record)
535              
536             Verifies 010 subfield 'a' has proper spacing.
537              
538             =head2 TO DO (check_010)
539              
540             Compare efficiency of getting current date vs. setting global current date. Determine best way to establish global date.
541              
542             Think about whether subfield 'z' needs proper spacing.
543              
544             Deal with non-digit characters in original 010a field.
545             Currently these are simply reported and the space checking is skipped.
546              
547             Revise local treatment of LCCN checking (invalid 8-digits pre-1980) for more universal use.
548              
549             Maintain date ranges in checking validity of numbers.
550              
551             Modify date ranges according to local catalog needs.
552              
553             Determine whether this subroutine can be implemented in MARC::Lintadditions/Lint--I don't remember why it is here rather than there?
554              
555             =cut
556              
557              
558             sub check_010 {
559              
560             #get passed MARC::Record object
561 17     17 1 17342 my $record = shift;
562             #declaration of return array
563 17         30 my @warningstoreturn = ();
564              
565             #set current year for validation of year portion of 10-digit LCCNs
566 17         35 my $current_date = _get_current_date();
567 17         36 my $current_year = substr($current_date, 0, 4);
568              
569             ##############################################
570             ## Declare variables needed for each record ##
571             ##############################################
572              
573             # $field_010 will have MARC::Field version of the 010 field of the record
574 17         22 my $field_010 = '';
575             #$cleaned010a will have the finished cleaned 010a data
576 17         25 my $cleaned010a = '';
577              
578             #skip records with no 010 and no 010$a
579 17 50 33     60 unless (($record->field('010')) && ($record->field('010')->subfield('a'))) {return \@warningstoreturn;}
  0         0  
580              
581             # record has an 010 with subfield a, so check for errors and then do cleanup
582             else {
583              
584 17         1458 $field_010 = $record->field('010');
585             # $orig010a contains base subfield 'a' for comparison
586 17         586 my $orig010a = $field_010->subfield('a');
587             # $subfielda will be cleaned and then compared with the original
588 17         281 my $subfielda = $field_010->subfield('a');
589              
590             #Get number portion of subfield
591 17         288 $subfielda =~ s/^\D*(\d{8,10})\b\D*.*$/$1/;
592             #report error if 8-10 digit number was not found
593 17 50       52 unless ($1) {
594 0         0 push @warningstoreturn, ("010: Could not find an 8-10 digit number in subfield 'a'.");
595             #no need to continue processing 010a so return
596 0         0 return \@warningstoreturn;
597             } #unless 8-10 digit number found in 010a
598              
599             #######################################################
600             # LCCN validity checks and setting of cleaned version #
601             #######################################################
602             #check validity of resulting digits
603 17 100       69 if ($subfielda =~ /^\d{8}$/) {
    50          
604              
605             =head2 local practice
606              
607             #this section could be implemented to validate 8-digit LCCN being between a specific set of years (1900-1980, for example).
608              
609             #code has been commented/podded out for general practice
610             my $year = substr($subfielda, 0, 2);
611             #should be old lccn, so first 2 digits are 00 or > 80
612             #The 1980 limit is a local practice.
613             #Change the date ranges according to local needs (e.g. if LC records back to 1900 exist in the catalog, do not implement this section of the error check)
614             if (($year >= 1) && ($year < 80)) {push @warningstoreturn, ("010: First digits of LCCN are $year.");}
615              
616             =cut
617              
618             #8 digit lccn needs 3 spaces before, 1 after, so put that in $cleaned010a
619             #else year is valid
620             ##used in case local practice year validation is being done
621 9         16 $cleaned010a = " $subfielda ";
622             #end else if year check implemented
623             } #if lccn is 8 digits
624              
625             #otherwise if $subfielda is 10 digits
626             elsif ($subfielda =~ /^\d{10}$/) {
627 8         19 my $year = substr($subfielda, 0, 4);
628             # no valid 10 digit will be less than 2001
629 8 100 66     50 if (($year < 2001) || ($year > $current_year)) {push @warningstoreturn, ("010: First digits of LCCN are $year.");}
  2         9  
630             #otherwise, 10 digit lccn needs 2 spaces before, 0 after, so put that in $cleaned010a
631             else {
632 6         13 $cleaned010a = " $subfielda";
633             } #else $subfielda has valid lccn
634             } #elsif lccn is 10 digits
635              
636             # lccn is not 8 or 10 digits so report error
637             else {
638             #should have already returned but just in case,
639 0         0 push @warningstoreturn, ("010: LCCN subfield 'a' is not 8 or 10 digits");
640             } #else not 8-10 digits?
641              
642             #return if warnings have been found to this point
643 17 100       41 if (@warningstoreturn) {return \@warningstoreturn;}
  2         8  
644              
645             ###########################################
646             ### Compare cleaned field with original ###
647             ###########################################
648              
649             #if original and cleaned match, go to next record
650 15 100       50 if ($orig010a eq $cleaned010a) {return \@warningstoreturn;}
  3 100       11  
651             #elsif non-digits are present in 010a
652             elsif ($orig010a =~ /[^ 0-9]/) {
653 2         3 my $orig010a_lccn = $orig010a;
654             #get uncleaned numeric portion
655 2         7 $orig010a_lccn =~ s/^( *\d+ *).*/$1/;
656             #report error if non-digits are in number portion
657             ##(shouldn't happen as should have returned above)
658 2 50       11 if ($subfielda !~ /^[ \d]*$/) {push @warningstoreturn, ("010: Subfield 'a' has non-digits ($orig010a).");} #if non-digits
  0 50       0  
  2         7  
659             elsif ($orig010a_lccn eq $cleaned010a) {return \@warningstoreturn;}
660             else {
661 0         0 push @warningstoreturn, ("010: Subfield 'a' has improper spacing ($orig010a).");
662             } #else improper spacing
663             } #elsif non-digits in 010a
664             else {
665 10         27 push @warningstoreturn, ("010: Subfield 'a' has improper spacing ($orig010a).");
666              
667             } #else original and cleaned 010 do not match
668             } # else record has 010subfielda
669              
670              
671 10         42 return \@warningstoreturn;
672              
673              
674             } # check_010
675              
676             #########################################
677             #########################################
678             #########################################
679             #########################################
680              
681             =head2 NAME
682              
683             check_end_punct_300($record)
684              
685             =head2 DESCRIPTION
686              
687             Reports an error if an ending period in 300 is missing if 4xx exists, or if 300 ends with closing parens-period if 4xx does not exist.
688              
689             =cut
690              
691              
692             sub check_end_punct_300 {
693              
694 1     1 0 2 my $record = shift;
695             #declaration of return array
696 1         3 my @warningstoreturn = ();
697              
698             #get leader and retrieve its relevant bytes
699 1         3 my $leader = $record->leader();
700             #$encodelvl ('8' for CIP, ' ' [space] for 'full')
701 1         10 my $encodelvl = substr($leader, 17, 1);
702              
703              
704             #skip CIP-level records
705 1 50       4 if ($encodelvl eq '8') {return \@warningstoreturn;}
  0         0  
706              
707             #retrieve any 4xx fields in record
708 1         5 my @fields4xx = $record->field('4..');
709              
710 1 50       273 if ($record->field('300')) {
  0         0  
711 1         122 my $field300 = $record->field('300');
712 1         109 my @subfields = $field300->subfields();
713 1         23 my @newsubfields = ();
714            
715             #break down code and data for last subfield
716 1         1 my $subfield = pop(@subfields);
717 1         3 my ($code, $data) = @$subfield;
718 1         3 unshift (@newsubfields, $code, $data);
719              
720             #last subfield should end in period if 4xx exists
721 1 50 33     9 if (@fields4xx && ($newsubfields[-1] !~ /\.$/)) {
  0 0 0     0  
    0 0        
      0        
722 1         4 push @warningstoreturn, ("300: 4xx exists but 300 does not end with period.");
723             }
724             #last subfield should not end in closing parens-period unless 4xx exists
725             elsif (($newsubfields[-1] =~ /\)\.$/) && !(@fields4xx)) {push @warningstoreturn, ("300: 4xx does not exist but 300 ends with parens-period.");
726             }
727             #last subfield of RDA record should not end with period unless 4xx exists
728             elsif (is_RDA($record) && ($newsubfields[-1] =~ /\.$/) && !(@fields4xx)) {
729 0         0 push @warningstoreturn, ("300: 4xx does not exist but 300 ends with period.");
730             }
731             } #if 300 field exists
732              
733             ####testing ######
734             # see what records have no 300
735             else {push @warningstoreturn, ("300: Record has no 300.");}
736             ##########################################
737              
738             # report any errors
739 1         6 return \@warningstoreturn;
740              
741             } # check_end_punct_300
742              
743             #########################################
744             #########################################
745             #########################################
746             #########################################
747              
748             =head2 NAME
749              
750             check_bk008_vs_300($record)
751              
752             =head2 DESCRIPTION
753              
754             300 subfield 'b' vs. presence of coding for illustrations in 008/18-21.
755              
756             Ignores CIP records completely.
757             Ignores non-book records completely (for the purposes of this subroutine).
758              
759             If 300 'b' has wording, reports errors if matching 008/18-21 coding is not present.
760             If 008/18-21 coding is present, but similar wording is not present in 300, reports errors.
761              
762             Note: plates are an exception, since they are noted in $a rather than $b of the 300.
763             So, they need to be checked twice--once if 'f' is the only code in the 008/18-21, and again amongst other codes.
764              
765             Also checks for 'p.' or 'v.' in subfield 'a'
766              
767             =head2 LIMITATIONS
768              
769             Only accounts for a single 300 field (300 was recently made repeatable).
770              
771             Older/more specific code checking is limited due to lack of use (by our catalogers).
772             For example, coats of arms, facsim., etc. are usually now given as just 'ill.'
773             So the error check allows either the specific or just ill. for all except maps.
774              
775             Depends upon 008 being coded for book monographs.
776              
777             Subfield 'a' and 'c' wording checks ('p.' or 'v.'; 'cm.', 'in.', 'mm.') only look at first of each kind of subfield.
778              
779             =head2 TO DO (check_bk008_vs_300($record))
780              
781             Take care of case of 008 coded for serials/continuing resources.
782              
783             Find exceptions to $a having 'p.' or 'v.' (and leaves, columns) for books.
784              
785             Find exceptions to $c having 'cm.', 'mm.', or 'in.' preceded by digits.
786              
787             Deal with other LIMITATIONS.
788              
789             Account for upcoming rule change in which metric units have no punctuation.
790             When that rule goes into effect, move 300$c checking to check_end_punct_300($record).
791              
792             Reverse checks to report missing 008 code if specific wording is present in 300.
793              
794             Reverse check for plates vs. 'f'
795              
796             =cut
797              
798             sub check_bk008_vs_300 {
799              
800 1     1 1 3 my $record = shift;
801             #declaration of return array
802 1         3 my @warningstoreturn = ();
803              
804             #declaration of variable for electronic resource vs. not
805 1         2 my $is_electronic = 0;
806             #determine whether record is RDA or not
807 1         3 my $record_is_RDA = is_RDA($record);
808              
809             #get leader and retrieve its relevant bytes (mattype ('a' for 'books')),
810             #$encodelvl ('8' for CIP, ' ' [space] for 'full')
811             #$biblvl will be useful in future version, where seriality matters
812              
813 1         4 my $leader = $record->leader();
814 1         8 my $mattype = substr($leader, 6, 1);
815             #my $biblvl = substr($leader, 7, 1);
816 1         3 my $encodelvl = substr($leader, 17, 1);
817              
818              
819             #skip CIP-level records
820 1 50       6 if ($encodelvl eq '8') {return \@warningstoreturn;
  0 50       0  
  0         0  
821             }
822             #####################################
823             #####################################
824             ### skip non-book records for now ###
825             elsif ($mattype ne 'a') {return \@warningstoreturn;}
826             #####################################
827             #####################################
828             #otherwise, match 008/18-21 vs. 300.
829             else {
830              
831 1 50       3 my $field008 = $record->field('008')->as_string() if $record->field('008');
832 1 50       72 return \@warningstoreturn unless $field008;
833              
834 1 50 33     5 if (($record->subfield('245', 'h')) && ($record->subfield('245', 'h') =~ /\[electronic resource\]/)) {
835 0         0 $is_electronic = 1;
836             } #if 245 _h has electronic resource
837              
838             #illustration codes are in bytes 18-21
839 1         151 my $illcodes = substr($field008, 18, 4);
840 1         8 my ($hasill, $hasmap, $hasport, $hascharts, $hasplans, $hasplates, $hasmusic, $hasfacsim, $hascoats, $hasgeneal, $hasforms, $hassamples, $hasphono, $hasphotos, $hasillumin);
841              
842             #make sure field 300 exists
843 1 50       8 if ($record->field('300')) {
  0         0  
844             #get 300 field as a MARC::Field object
845 1         112 my $field300 = $record->field('300');
846             #set variables for
847 1 50       108 my $subfielda = $field300->subfield('a') if ($field300->subfield('a'));
848 1 50       49 my $subfieldb = $field300->subfield('b') if ($field300->subfield('b'));
849 1 50       50 my $subfieldc = $field300->subfield('c') if ($field300->subfield('c'));
850              
851             #######################################
852             ### 300 subfield 'a' and 'c' checks ###
853             #######################################
854              
855             #Check for 'p.' or 'v.' or leaves in subfield 'a' unless electronic resource
856 1 50       43 if ($subfielda) {
857 1 50       4 unless ($is_electronic == 1) {
858 1 50       4 unless ($record_is_RDA) {
859             #error if no 'p.', 'v.', 'column', 'leaf', or 'leaves' found
860 1 50 33     16 push @warningstoreturn, ("300: Check subfield _a for p. or v.") unless (
      33        
      33        
861             ($subfielda =~ /\(?.*\b[pv]\.[,\) ]?/) ||
862             ($subfielda =~ /\(?.*\bcolumns?\)?/) ||
863             ($subfielda =~ / leaves /) ||
864             ($subfielda =~ / leaf /)
865             );
866             #error if 'p.' found after parenthetical qualifier on 'v.'
867 1 50       5 if (($subfielda =~ /\(((?:unpaged)|(?:various pagings))\) p\.?\b/)) {
868 0         0 push @warningstoreturn, ("300: Check subfield _a for extra p.")
869             } #if extra 'p.'
870             } #unless RDA record
871             else {
872             #error if no 'page(s)', 'volume(s)', 'column', 'leaf', or 'leaves' found
873 0 0 0     0 push @warningstoreturn, ("300: Check subfield _a for page(s) or volume(s)") unless (
      0        
      0        
      0        
874             ($subfielda =~ /\(?.*\bpages?[,\) ]?/) ||
875             ($subfielda =~ /\(?.*\bvolumes?[,\) ]?/) ||
876             ($subfielda =~ /\(?.*\bcolumns?\)?/) ||
877             ($subfielda =~ / leaves /) ||
878             ($subfielda =~ / leaf /)
879             );
880             #error if 'p.' found after parenthetical qualifier on 'v.'
881 0 0       0 if (($subfielda =~ /\(((?:unpaged)|(?:various pagings))\) p\.?\b/)) {
882 0         0 push @warningstoreturn, ("300: Check subfield _a for extra p.")
883             } #if extra 'p.'
884             }
885             } #unless electronic resource
886             } #if 300 subfielda exists
887             #report missing subfield a
888             else {
889 0         0 push @warningstoreturn, ("300: Subfield _a is not present.");
890             } #else $subfielda is undefined
891              
892             #check for 'cm.', 'mm.' or 'in.' in subfield 'c'
893 1 50       8 if ($subfieldc) {
894 1 50       9 unless ($record_is_RDA) {
895 1 50       17 push @warningstoreturn, ("300: Check subfield _c for cm., mm. or in.") unless ($subfieldc =~ /\d+ (([cm]m\.)|(in\.))/);
896             } #unless RDA
897             else {
898 0 0       0 push @warningstoreturn, ("300: Check subfield _c for cm, mm or in.") unless ($subfieldc =~ /\d+ (([cm]m)|(in\.))/);
899             } #else RDA
900             } #if subfield c
901             #report missing subfield c
902             else {
903 0         0 push @warningstoreturn, ("300: Subfield _c is not present.");
904             } #else $subfieldc is undefined
905             #######################################
906              
907             #if $subfieldb present with 'col', ensure period exists after all
908 1 50       3 unless ($record_is_RDA) {
909 1 50 33     11 if ($subfieldb && ($subfieldb =~ /col[^\.]/)) {
910 1         2 push @warningstoreturn, ("300: Check subfield _b for missing period after col.");
911             } #if subfield b has 'col' with missing period
912             } #unless RDA
913             else {
914 0 0 0     0 if ($subfieldb && ($subfieldb =~ /col\./)) {
915 0         0 push @warningstoreturn, ("300: Check subfield _b for abbreviated col.");
916             } #if subfield b has 'col.' rather than colo(u)red
917             }
918             ##### 008 ill. vs. 300 wording basic checks
919             # if $illcodes not coded and no subfield 'b' no problem so move on
920 1 50 33     24 if (($illcodes =~ /^\s{4}$/) && !($subfieldb)) {return \@warningstoreturn;}
  0 50 33     0  
  0 50 33     0  
    50 33        
    0 0        
921             # 008 is coded blank (4 spaces) but 300 subfield 'b' exists so error
922 0         0 elsif (($illcodes =~ /^\s{4}$/) && ($subfieldb)) {push @warningstoreturn, ("008: bytes 18-21 (Illustrations) coded blank but 300 has subfield 'b'."); return \@warningstoreturn;}
  0         0  
923             # 008 has valid code but no 300 subfield 'b' so error
924 0         0 elsif (($illcodes =~ /[a-e,g-m,o,p]/) && !($subfieldb)) {push @warningstoreturn, ("008: bytes 18-21 (Illustrations) have valid code but 300 has no subfield 'b'."); return \@warningstoreturn;}
925              
926             ##############
927             #otherwise, check 008/18-21 vs. 300 subfield 'b'
928             # valid coding in 008/18-21 and have 300 $b
929             elsif (($illcodes =~ /[a-e,g-m,o,p]/) && ($subfieldb)) {
930             # start comparing
931             #call subroutine to do main checking
932 1         5 my $illcodewarnref = parse008vs300b($illcodes, $subfieldb, $record_is_RDA);
933 1 50       6 push @warningstoreturn, (join "\t", @$illcodewarnref) if (@$illcodewarnref);
934              
935             #take care of special case of plates when other codes are present
936 1 50 33     7 if (($illcodes =~ /f/) && ($subfielda)) {
937             #report error if 'plate' does not appear in 300$a
938 0 0       0 unless ($subfielda =~ /plate/) {push @warningstoreturn, ("300: bytes 18-21 (Illustrations) is coded f for plates but 300 subfield a is $subfielda ");
  0         0  
939             } #unless subfield 'a' has plate(s)
940             } #if 008ill. has 'f' but 300 does not have 'plate'(s)
941             } #elsif valid 008/18-21 and 300$b exists
942              
943             #elsif $illcodes is coded only 'f' (plates), which are noted in 300$a
944             elsif (($illcodes =~ /f/) && ($subfielda)) {
945             #report error if 'plate' does not appear in 300$a
946 0 0       0 unless ($subfielda =~ /plate/) {
947 0         0 push @warningstoreturn, ("300: bytes 18-21 (Illustrations) is coded f for plates but 300 subfield a is $subfielda ");
948 0         0 return \@warningstoreturn;
949             } #unless subfield 'a' has plate(s)
950             } #elsif 008ill. has 'f' but 300a does not have 'plate'(s)
951              
952             #otherwise, not valid 008/18-21
953             else {
954 0         0 push @warningstoreturn, ("008: bytes 18-21 (Illustrations) have a least one invalid character."); return \@warningstoreturn;
  0         0  
955             } #else not valid 008/18-21
956             } # if record has 300 field
957              
958             #else 300 does not exist in full book record so report error
959 0         0 else {push @warningstoreturn, ("300: Record has no 300."); return \@warningstoreturn;}
960             } #else (record is not CIP and is a book-type)
961              
962 1         9 return \@warningstoreturn;
963              
964             } # check_bk008_vs_300($record)
965              
966             #########################################
967             #########################################
968             #########################################
969             #########################################
970              
971             =head2 NAME
972              
973             parse008vs300b($illcodes, $field300subb)
974            
975             =head2 DESCRIPTION
976              
977             008 illustration parse subroutine
978              
979             checks 008/18-21 code against 300 $b
980              
981             =head2 WHY?
982              
983             To simplify the check_bk008_vs_300($record) subroutine, which had many if-then statements. This moves the additional checking conditionals out of the way.
984             It may be integrated back into the main subroutine once it works.
985             This was written while constructing check_bk008_vs_300($record) as a separate script.
986              
987             =head2 Synopsis/Usage description
988              
989             parse008vs300b($illcodes, $field300subb)
990              
991             #$illcodes is bytes 18-21 of 008
992             #$subfieldb is subfield 'b' of record's 300 field
993              
994             =head2 TO DO (parse008vs300b($$))
995              
996             Integrate code into check_bk008_vs_300($record)?
997              
998             Verify possibilities for 300 text
999              
1000             Move 'm' next to 'f' since it is likely to be indicated in subfield 'e' not 'b' of the 300.
1001             Our catalogers do not generally code for sound recordings in this way in book records.
1002              
1003             =cut
1004              
1005             sub parse008vs300b {
1006              
1007 1     1 1 2 my $illcodes = shift;
1008 1         2 my $subfieldb = shift;
1009 1         1 my $record_is_RDA = shift;
1010             #parse $illcodes
1011 1         9 my ($hasill, $hasmap, $hasport, $hascharts, $hasplans, $hasplates, $hasmusic, $hasfacsim, $hascoats, $hasgeneal, $hasforms, $hassamples, $hasphono, $hasphotos, $hasillumin);
1012 1 50       5 ($illcodes =~ /a/) ? ($hasill = 1) : ($hasill = 0);
1013 1 50       13 ($illcodes =~ /b/) ? ($hasmap = 1) : ($hasmap = 0);
1014 1 50       5 ($illcodes =~ /c/) ? ($hasport = 1) : ($hasport = 0);
1015 1 50       4 ($illcodes =~ /d/) ? ($hascharts = 1) : ($hascharts = 0);
1016 1 50       3 ($illcodes =~ /e/) ? ($hasplans = 1) : ($hasplans = 0);
1017 1 50       4 ($illcodes =~ /f/) ? ($hasplates = 1) : ($hasplates = 0);
1018 1 50       4 ($illcodes =~ /g/) ? ($hasmusic = 1) : ($hasmusic = 0);
1019 1 50       5 ($illcodes =~ /h/) ? ($hasfacsim = 1) : ($hasfacsim = 0);
1020 1 50       4 ($illcodes =~ /i/) ? ($hascoats = 1) : ($hascoats = 0);
1021 1 50       4 ($illcodes =~ /j/) ? ($hasgeneal = 1) : ($hasgeneal = 0);
1022 1 50       9 ($illcodes =~ /k/) ? ($hasforms = 1) : ($hasforms = 0);
1023 1 50       4 ($illcodes =~ /l/) ? ($hassamples = 1) : ($hassamples = 0);
1024 1 50       4 ($illcodes =~ /m/) ? ($hasphono = 1) : ($hasphono = 0);
1025 1 50       4 ($illcodes =~ /o/) ? ($hasphotos = 1) : ($hasphotos = 0);
1026 1 50       3 ($illcodes =~ /p/) ? ($hasillumin = 1) : ($hasillumin = 0);
1027              
1028 1         2 my @illcodewarns = ();
1029              
1030             # Check and report errors
1031              
1032             #if 008/18-21 has code 'a', 300$b needs to have 'ill.'
1033 1 50 33     10 if ($hasill) {
  1 50       9  
    0          
1034 0 0       0 unless ($record_is_RDA) {
1035 0 0       0 push @illcodewarns, ("300: bytes 18-21 have code 'a' but 300 subfield b is $subfieldb") unless ($subfieldb =~ /ill\./);
1036             } #unless RDA
1037             else {
1038 0 0       0 if ($subfieldb =~ /ill\./) {
1039 0         0 push @illcodewarns, ("300: Check for abbreviated 'ill.'");
1040             }
1041             else {
1042 0 0       0 push @illcodewarns, ("300: bytes 18-21 have code 'a' but 300 subfield b is $subfieldb") unless ($subfieldb =~ /illustration/);
1043             } #else no "illustration" in 300 with 008 coded with 'a'
1044             } #else RDA
1045             } #if hasill
1046             # if 300$b has 'ill.', 008/18-21 should have 'a'
1047             elsif (!$record_is_RDA && ($subfieldb =~ /ill\./)) {push @illcodewarns, ("008: Bytes 18-21 do not have code 'a' but 300 subfield 'b' has 'ill.'")}
1048             elsif ($record_is_RDA) {
1049 0 0       0 if ($subfieldb =~ /illustration/) {
    0          
1050 0         0 push @illcodewarns, ("008: Bytes 18-21 do not have code 'a' but 300 subfield 'b' has 'illustration'")
1051             } #if illustration in 300 and no 'a' in 008
1052             elsif ($subfieldb =~ /ill\./) {
1053 0         0 push @illcodewarns, ("008: Bytes 18-21 do not have code 'a' but 300 subfield 'b' has 'ill.'", "300: Check for abbreviated 'ill.'")
1054             }
1055             }
1056              
1057             #if 008/18-21 has code 'b', 300$b needs to have 'map' (or 'maps')
1058 1 50       4 if ($hasmap) {push @illcodewarns, ("300: bytes 18-21 have code 'b' but 300 subfield b is $subfieldb") unless ($subfieldb =~ /map[ \,s]/);}
  1 50       7  
  0 0       0  
1059             # if 300$b has 'map', 008/18-21 should have 'b'
1060             elsif ($subfieldb =~ /map/) {push @illcodewarns, ("008: Bytes 18-21 do not have code 'b' but 300 subfield 'b' has 'map' or 'maps'")}
1061              
1062             #if 008/18-21 has code 'c', 300$b needs to have 'port.' or 'ports.' (or ill.)
1063 1 50 33     13 if ($hasport) {
  0 50       0  
    50          
1064 0 0       0 unless ($record_is_RDA) {
1065 0 0       0 push @illcodewarns, ("300: bytes 18-21 have code 'c' but 300 subfield b is $subfieldb") unless ($subfieldb =~ /port\.|ports\.|ill\./);
1066             } #unless RDA
1067             else {
1068 0 0       0 if ($subfieldb =~ /port\.|ports\./) {
1069 0         0 push @illcodewarns, ("300: Check for abbreviated 'port(s).'");
1070             }
1071             else {
1072 0 0       0 push @illcodewarns, ("300: bytes 18-21 have code 'c' but 300 subfield b is $subfieldb") unless ($subfieldb =~ /portrait/);
1073             } #else no "illustration" in 300 with 008 coded with 'c'
1074             } #else RDA
1075             } #if hasill
1076             # if 300$b has 'port(s).', 008/18-21 should have 'c'
1077             elsif (!$record_is_RDA && ($subfieldb =~ /port\.|ports\./)) {push @illcodewarns, ("008: Bytes 18-21 do not have code 'c' but 300 subfield 'b' has 'port(s).'")}
1078             elsif ($record_is_RDA) {
1079 0 0       0 if ($subfieldb =~ /portrait/) {
    0          
1080 0         0 push @illcodewarns, ("008: Bytes 18-21 do not have code 'c' but 300 subfield 'b' has 'portrait'")
1081             } #if illustration in 300 and no 'a' in 008
1082             elsif ($subfieldb =~ /port\.|ports\./) {
1083 0         0 push @illcodewarns, ("008: Bytes 18-21 do not have code 'c' but 300 subfield 'b' has 'port(s).'", "300: Check for abbreviated 'port(s).'")
1084             }
1085             }
1086            
1087             #if 008/18-21 has code 'd', 300$b needs to have 'chart' (or 'charts') (or ill.)
1088 1 0       4 if ($hascharts) {push @illcodewarns, ("300: bytes 18-21 have code 'd' but 300 subfield b is $subfieldb") unless ($subfieldb =~ /chart|ill\.|illustration/);}
  0 50       0  
1089             #### add cross-check ###
1090              
1091              
1092             #if 008/18-21 has code 'e', 300$b needs to have 'plan' (or 'plans') (or ill.)
1093 1 0       3 if ($hasplans) {push @illcodewarns, ("300: bytes 18-21 have code 'e' but 300 subfield b is $subfieldb") unless ($subfieldb =~ /plan|ill\.|illustration/);}
  0 50       0  
1094             #### add cross-check ###
1095              
1096             ### Skip 'f' for plates, which are in 300$a ###
1097              
1098             #if 008/18-21 has code 'g', 300$b needs to have 'music' (or ill.)
1099 1 0       6 if ($hasmusic) {push @illcodewarns, ("300: bytes 18-21 have code 'g' but 300 subfield b is $subfieldb") unless ($subfieldb =~ /music|ill\.|illustration/);}
  0 50       0  
  0 50       0  
1100             # if 300$b has 'music', 008/18-21 should have 'g'
1101             elsif ($subfieldb =~ /music/) {push @illcodewarns, ("008: Bytes 18-21 do not have code 'g' but 300 subfield 'b' has 'music'")}
1102              
1103             #if 008/18-21 has code 'h', 300$b needs to have 'facsim.' or 'facsims.' (or ill.)
1104 1 0       3 if ($hasfacsim) {push @illcodewarns, ("300: bytes 18-21 have code 'h' but 300 subfield b is $subfieldb") unless ($subfieldb =~ /facsim\.|facsims\.|facimile|ill\.|illustration/);}
  0 50       0  
1105             #### add cross-check ###
1106              
1107             #if 008/18-21 has code 'i', 300$b needs to have 'coats of arms' (or 'coat of arms'?) (or ill.)
1108 1 0       3 if ($hascoats) {push @illcodewarns, ("300: bytes 18-21 have code 'i' but 300 subfield b is $subfieldb") unless ($subfieldb =~ /coats of arms|ill\.|illustration/);}
  0 50       0  
1109             #### add cross-check ###
1110              
1111             #if 008/18-21 has code 'j', 300$b needs to have 'geneal. table' (or 'geneal. tables') (or ill.)
1112 1 0       2 if ($hasgeneal) {push @illcodewarns, ("300: bytes 18-21 have code 'j' but 300 subfield b is $subfieldb") unless ($subfieldb =~ /geneal\. table|genealogical table|ill\.|illustration/);}
  0 50       0  
1113             #### add cross-check ###
1114              
1115             #if 008/18-21 has code 'k', 300$b needs to have 'forms' or 'form' (or ill.)
1116 1 0       4 if ($hasforms) {push @illcodewarns, ("300: bytes 18-21 have code 'k' but 300 subfield b is $subfieldb") unless ($subfieldb =~ /form[ s]|ill\.|illustration/);}
  0 50       0  
1117             #### add cross-check ###
1118              
1119             #if 008/18-21 has code 'l', 300$b needs to have 'samples' (or ill.)
1120 1 0       4 if ($hassamples) {push @illcodewarns, ("300: bytes 18-21 have code 'l' but 300 subfield b is $subfieldb") unless ($subfieldb =~ /samples|ill\.|illustration/);}
  0 50       0  
1121             #### add cross-check ###
1122              
1123             ##########################################
1124             ##########################################
1125             ### code 'm' appears to be for 'sound disc', 'sound cartridge', 'sound tape reel', 'sound cassette', 'roll' or 'cylinder'
1126             #these would likely appear in subfield 'e' of the 300 (as accompanying material) for book records.
1127             #so this should be treated separately, like plates ('f')
1128             #This code is not used by our catalogers
1129             #if 008/18-21 has code 'm', 300$b needs to have 'phono'? (or ill.)
1130 1 50       3 if ($hasphono) {push @illcodewarns, ("300: bytes 18-21 have code 'm' (phonodisc, sound disc, etc.).");}
  0         0  
1131             ##########################################
1132             ##########################################
1133              
1134             #if 008/18-21 has code 'o', 300$b needs to have 'photo.' or 'photos.' (or ill.)
1135 1 0       10 if ($hassamples) {push @illcodewarns, ("300: bytes 18-21 have code 'o' but 300 subfield b is $subfieldb") unless ($subfieldb =~ /photo\.|photos\.|photograph|ill\.|illustration/);}
  0 50       0  
1136             #### add cross-check ###
1137              
1138             ##########################################
1139             ##########################################
1140             ### I don't know what this is, so for this, report all
1141             #if 008/18-21 has code 'p', 300$b needs to have 'illumin'? (or ill.)
1142 1 50       9 if ($hasillumin) {push @illcodewarns, ("300: bytes 18-21 have code 'p' but 300 subfield b is $subfieldb");}
  0         0  
1143             #### add cross-check ###
1144             ##########################################
1145             ##########################################
1146              
1147 1         3 return \@illcodewarns;
1148              
1149             } #sub parse008vs300b
1150              
1151              
1152             #########################################
1153             #########################################
1154             #########################################
1155             #########################################
1156              
1157             =head2 check_490vs8xx($record)
1158              
1159             If 490 with 1st indicator '1' exists, then 8xx (800, 810, 811, 830) should exist.
1160              
1161             =cut
1162              
1163             sub check_490vs8xx {
1164              
1165             #get passed MARC::Record object
1166 1     1 1 2 my $record = shift;
1167             #declaration of return array
1168 1         3 my @warningstoreturn = ();
1169              
1170 1         2 my $has_series_field = 0;
1171 1         4 my @series_fields = ('800', '810', '811', '830');
1172              
1173 1 50       4 $has_series_field = 1 if ($record->field(@series_fields));
1174              
1175             #report error if 490 1st ind is 1 but 8xx does not exist
1176 1 50 33     861 if ($record->field(490) && ($record->field(490)->indicator(1) eq '1')) {
1177 1 50       263 push @warningstoreturn, ("490: Indicator is 1 but 8xx does not exist.") unless ($has_series_field);
1178             }
1179              
1180 1         7 return \@warningstoreturn;
1181              
1182             } # check_490vs8xx
1183              
1184             #########################################
1185             #########################################
1186             #########################################
1187             #########################################
1188              
1189             #########################################
1190             #########################################
1191             #########################################
1192             #########################################
1193              
1194             =head2 check_240ind1vs1xx($record)
1195              
1196             If 1xx exists then 240 1st indicator should be '1'.
1197             If 1xx does not exist then 240 should not be present.
1198              
1199             However, exceptions to this rule are possible, so this should be considered an optional error.
1200              
1201             =cut
1202              
1203             sub check_240ind1vs1xx {
1204              
1205             #get passed MARC::Record object
1206 1     1 1 3 my $record = shift;
1207             #declaration of return array
1208 1         2 my @warningstoreturn = ();
1209              
1210             #report error if 240 exists but 1xx does not exist
1211 1 50 33     3 if (($record->field(240)) && !($record->field('1..'))) {
    0 0        
      0        
1212 1         289 push @warningstoreturn, ("240: Is present but 1xx does not exist.");
1213             }
1214            
1215             #report error if 240 1st ind is 0 but 1xx exists
1216             elsif (($record->field(240)) && ($record->field(240)->indicator(1) eq '0') && ($record->field('1..'))) {
1217 0         0 push @warningstoreturn, ("240: First indicator is 0 but 1xx exists.");
1218             }
1219              
1220 1         12 return \@warningstoreturn;
1221              
1222             } # check_240ind1vs1xx
1223              
1224             #########################################
1225             #########################################
1226             #########################################
1227             #########################################
1228              
1229             =head2 check_245ind1vs1xx($record)
1230              
1231             If 1xx exists then 245 1st indicator should be '1'.
1232             If 1xx does not exist then 245 1st indicator should be '0'.
1233              
1234             However, exceptions to this rule are possible, so this should be considered an optional error.
1235              
1236             =head2 TODO (check_245ind1vs1xx($record))
1237              
1238             Provide some way to easily turn off reporting of "245: Indicator is 0 but 1xx exists." errors. In some cases, catalogers may choose to code a 245 with 1st indicator 0 if they do not wish that 245 to be indexed. There is not likely a way to programmatically determine this choice by the cataloger, so in situations where catalogers are likely to choose not to index a 245, this error should be supressed.
1239              
1240             =cut
1241              
1242             sub check_245ind1vs1xx {
1243              
1244             #get passed MARC::Record object
1245 1     1 1 3 my $record = shift;
1246             #declaration of return array
1247 1         2 my @warningstoreturn = ();
1248              
1249             #report error if 245 1st ind is 1 but 1xx does not exist
1250 1 50       10 if (($record->field(245)->indicator(1) eq '1')) {
    0          
1251 1 50       79 push @warningstoreturn, ("245: Indicator is 1 but 1xx does not exist.") unless ($record->field('1..'));
1252             } #if 245 1st ind. is 1
1253             #report error if 245 1st ind is 0 but 1xx exists
1254             elsif (($record->field(245)->indicator(1) eq '0')) {
1255             #comment out the line below if your records have unindexed 245s by cataloger's choice
1256 0 0       0 push @warningstoreturn, ("245: Indicator is 0 but 1xx exists.") if ($record->field('1..'));
1257             } #elsif 245 1st ind. is 0
1258              
1259 1         181 return \@warningstoreturn;
1260              
1261             } # check_245ind1vs1xx
1262              
1263             #########################################
1264             #########################################
1265             #########################################
1266             #########################################
1267              
1268              
1269             =head2 matchpubdates($record)
1270              
1271             Date matching 008, 050, 260
1272              
1273             Attempts to match date of publication in 008 date1, 050 subfield 'b', and 260 subfield 'c'.
1274              
1275             Reports errors when one of the fields does not match.
1276             Reports errors if one of the dates cannot be found
1277              
1278             Handles cases where 050 or 260 (or 260c) does not exist.
1279             -Currently if the subroutine is unable to get either the date1, any 050 with $b, or a 260 with $c, it returns (exits).
1280             -Future, or better, behavior, might be to continue processing for the other fields.
1281              
1282             Handles cases where 050 is different due to conference dates.
1283             Conference exception handling is currently limited to presence of 111 field or 110$d.
1284              
1285             For RDA, checks 264 _1 $c as well as 1st 260$c.
1286              
1287             =head2 KNOWN PROBLEMS
1288              
1289             May not deal well with serial records (problem not even approached).
1290              
1291             Only examines 1st 260, does not account for more than one 260 (recent addition).
1292              
1293             Relies upon 260$c date being the first date in the last 260$c subfield.
1294              
1295             Has problem finding 050 date if it is not last set of digits in 050$b.
1296              
1297             Process of getting 008date1 duplicates similar check in C subroutine.
1298              
1299             =head2 TO DO
1300              
1301             Improve Conference publication checking (limited to 111 field or 110$d being present for this version)
1302             This may include comparing 110$d or 111$d vs. 050, and then comparing 008date1 vs. 260$c.
1303              
1304             Fix parsing for 050$bdate.
1305              
1306             For CIP, if 260 does not exist, compare only 050 and 008date1.
1307             Currently, CIP records without 260 are skipped.
1308              
1309             Account for undetermined dates, e.g. [19--?] in 260 and 008.
1310              
1311             Account for older 050s with no date present.
1312              
1313             =cut
1314              
1315             sub matchpubdates {
1316              
1317             #get passed MARC::Record object
1318 1     1 1 2 my $record = shift;
1319             #declaration of return array
1320 1         2 my @warningstoreturn = ();
1321 1         3 my $record_is_RDA = is_RDA($record);
1322              
1323             #get leader and retrieve its relevant bytes,
1324             #$encodelvl ('8' for CIP, ' ' [space] for 'full')
1325              
1326 1         4 my $leader = $record->leader();
1327 1         7 my $encodelvl = substr($leader, 17, 1);
1328              
1329             ########################################
1330             ####### may be used in future ##########
1331             # my $mattype = substr($leader, 6, 1); #
1332             # my $biblvl = substr($leader, 7, 1); #
1333             ########################################
1334              
1335             #skip CIP-level records unless 260 exists
1336 1 0       4 if ($encodelvl eq '8') {return \@warningstoreturn unless ($record->field('260', '264'));}
  0 50       0  
1337              
1338 1 50       3 my $field008 = $record->field('008')->as_string() if ($record->field('008'));
1339 1 50       71 return \@warningstoreturn unless ($field008);
1340              
1341             #date1 is in bytes 7-10
1342 1         3 my $date1 = substr($field008, 7, 4);
1343              
1344             #report error in getting $date1
1345             ## then ignore the rest of the record
1346             ###need to account for dates such as '19--'
1347 1 50 33     10 unless ($date1 && ($date1 =~ /^\d{4}$/)) {push @warningstoreturn, ("008: Could not get date 1."); return \@warningstoreturn;
  0         0  
  0         0  
1348             }
1349              
1350             #get 050(s) if it (they) exist(s)
1351 1 50 33     5 my @fields050 = $record->field('050') if (($record->field('050')) && $record->field('050')->subfield('b'));
1352             #report error in getting at least 1 050 with subfield _b
1353             ##then ignore the rest of the record
1354 1 50       372 unless (@fields050) {push @warningstoreturn, ("050: Could not get 050 or 050 subfield 'b'."); return \@warningstoreturn;
  0         0  
  0         0  
1355             }
1356              
1357             #get 050 date, make sure each is the same if there are multiple fields
1358              
1359 1         3 my @dates050 = ();
1360             #look for date at end of $b in each 050
1361 1         4 foreach my $field050 (@fields050) {
1362 1 50       4 if ($field050->subfield('b')) {
1363 1         24 my $subb050 = $field050->subfield('b');
1364             #remove nondigits and look for 4 digits
1365 1         29 $subb050 =~ s/^.*?\b(\d{4}){1}\D*.*$/$1/;
1366             #add each found date to @dates050
1367 1 50       7 push @dates050, ($subb050) if ($subb050 =~ /\d{4}/);
1368             } # if 050 has $b
1369             } #foreach 050 field
1370              
1371             #compare each date in @dates050
1372 1         5 while (scalar @dates050 > 1) {
1373             #compare first and last
1374 0 0       0 ($dates050[0] == $dates050[-1]) ? (pop @dates050) : (push @warningstoreturn, ("050: Dates do not match in each of the 050s."));
1375             #stop comparing if dates don't match
1376 0 0       0 last if @warningstoreturn;
1377             } # while @dates050 has more than 1 date
1378              
1379 1         2 my $date050 = '';
1380              
1381             #if successful, only one date will remain and @warningstoreturn will not have an 050 error
1382 1 50 33     9 if (($#dates050 == 0) && ((join "\t", @warningstoreturn) !~ /Dates do not match in each of the 050s/)) {
1383              
1384             # set $date050 to the date in @dates050 if it is exactly 4 digits
1385 1 50       80 if ($dates050[0] =~ /^\d{4}$/) {$date050 = $dates050[0];}
  1         2  
  0         0  
1386             else {push @warningstoreturn, ("050: Unable to find 4 digit year in subfield 'b'.");
1387 0         0 return \@warningstoreturn;
1388             } #else
1389             } #if have 050 date without error
1390              
1391 1         2 my $date260 = '';
1392 1 50 0     4 unless ($record_is_RDA) {
    0 0        
1393             #get 260 field if it exists and has a subfield 'c'
1394 1 50 33     5 my $field260 = $record->field('260') if (($record->field('260')) && $record->field('260')->subfield('c'));
1395 1 50       362 unless ($field260) {push @warningstoreturn, ("260: Could not get 260 or 260 subfield 'c'."); return \@warningstoreturn;
  0         0  
  0         0  
1396             }
1397              
1398             #look for date in 260 _c (starting at the end of the field)
1399             ##only want first date in last subfield _c
1400              
1401 1         3 my @subfields = $field260->subfields();
1402 1         24 my @newsubfields = ();
1403 1         2 my $wantedsubc;
1404             #break subfields into code-data array
1405             #stop when first subfield _c is reached (should be the last subfield _c of the field)
1406 1         4 while (my $subfield = pop(@subfields)) {
1407 1         3 my ($code, $data) = @$subfield;
1408 1 50       3 if ($code eq 'c' ) {$wantedsubc = $data; last;}
  1         2  
  1         3  
1409             #should not be necessary to rebuild 260
1410             #unshift (@newsubfields, $code, $data);
1411             } # while
1412              
1413              
1414             #extract 4 digit date portion
1415             # account for [i.e. [date]]
1416 1 50       5 unless ($wantedsubc =~ /\[i\..?e\..*(\d{4}).*?\]/) {
  0         0  
1417 1         7 $wantedsubc =~ s/^.*?\b\D*(\d{4})\D*\b.*$/$1/;
1418             }
1419             else {$wantedsubc =~ s/.*?\[i\..?e\..*(\d{4}).*?\].*/$1/;
1420             }
1421              
1422 1 50       4 if ($wantedsubc =~ /^\d{4}$/) {$date260 = $wantedsubc;}
  1 0       4  
  0         0  
1423             # i.e. date should be 2nd string of 4 digits
1424 0         0 elsif ($wantedsubc =~ /^\d{8}$/) {$date260 = substr($wantedsubc,4,4);}
1425 0         0 else {push @warningstoreturn, ("260: Unable to find 4 digit year in subfield 'c'."); return \@warningstoreturn;
1426             }
1427             } #unless RDA
1428             elsif ($record_is_RDA && ($record->field('260') && $record->field('260')->subfield('c'))) {
1429             #get 260 field if it exists and has a subfield 'c'
1430 0 0 0     0 my $field260 = $record->field('260') if (($record->field('260')) && $record->field('260')->subfield('c'));
1431 0 0       0 unless ($field260) {push @warningstoreturn, ("260: Could not get 260 or 260 subfield 'c'."); return \@warningstoreturn;
  0         0  
  0         0  
1432             }
1433              
1434             #look for date in 260 _c (starting at the end of the field)
1435             ##only want first date in last subfield _c
1436              
1437 0         0 my @subfields = $field260->subfields();
1438 0         0 my @newsubfields = ();
1439 0         0 my $wantedsubc;
1440             #break subfields into code-data array
1441             #stop when first subfield _c is reached (should be the last subfield _c of the field)
1442 0         0 while (my $subfield = pop(@subfields)) {
1443 0         0 my ($code, $data) = @$subfield;
1444 0 0       0 if ($code eq 'c' ) {$wantedsubc = $data; last;}
  0         0  
  0         0  
1445             #should not be necessary to rebuild 260
1446             #unshift (@newsubfields, $code, $data);
1447             } # while
1448              
1449              
1450             #extract 4 digit date portion
1451             # account for [i.e. [date]]
1452 0 0       0 unless ($wantedsubc =~ /\[i\..?e\..*(\d{4}).*?\]/) {
  0         0  
1453 0         0 $wantedsubc =~ s/^.*?\b\D*(\d{4})\D*\b.*$/$1/;
1454             }
1455             else {$wantedsubc =~ s/.*?\[i\..?e\..*(\d{4}).*?\].*/$1/;
1456             }
1457              
1458 0 0       0 if ($wantedsubc =~ /^\d{4}$/) {$date260 = $wantedsubc;}
  0 0       0  
  0         0  
1459             # i.e. date should be 2nd string of 4 digits
1460 0         0 elsif ($wantedsubc =~ /^\d{8}$/) {$date260 = substr($wantedsubc,4,4);}
1461 0         0 else {push @warningstoreturn, ("260: Unable to find 4 digit year in subfield 'c'."); return \@warningstoreturn;
1462             }
1463             } #elsif RDA has 260
1464             else {
1465             #get 264 field if it exists and has a subfield 'c'
1466 0 0       0 my @fields264 = $record->field('264') if ($record->field('264'));
1467 0         0 my $field264_with_c = '';
1468 0         0 for my $field264 (@fields264) {
1469 0         0 my $ind2 = $field264->indicator('2');
1470 0 0       0 if ($ind2 =~ /1/) {
1471 0 0       0 if ($record->field('264')->subfield('c')) {
1472 0         0 $field264_with_c = $field264;
1473             } #if 264$c
1474             } #if indicator 2 is 1
1475 0 0       0 last if $field264_with_c;
1476             } #for 264 fields
1477 0 0       0 unless ($field264_with_c) {push @warningstoreturn, ("264: Could not get 264 or 264 subfield 'c'."); return \@warningstoreturn;}
  0         0  
  0         0  
1478              
1479             #look for date in 264 _c (starting at the end of the field)
1480             ##only want first date in last subfield _c
1481              
1482 0         0 my @subfields = $field264_with_c->subfields();
1483 0         0 my @newsubfields = ();
1484 0         0 my $wantedsubc;
1485             #break subfields into code-data array
1486             #stop when first subfield _c is reached (should be the last subfield _c of the field)
1487 0         0 while (my $subfield = pop(@subfields)) {
1488 0         0 my ($code, $data) = @$subfield;
1489 0 0       0 if ($code eq 'c' ) {$wantedsubc = $data; last;}
  0         0  
  0         0  
1490             #should not be necessary to rebuild 264
1491             #unshift (@newsubfields, $code, $data);
1492             } # while
1493              
1494              
1495             #extract 4 digit date portion
1496             # account for [i.e. [date]]
1497 0 0       0 unless ($wantedsubc =~ /\[i\..?e\..*(\d{4}).*?\]/) {
  0         0  
1498 0         0 $wantedsubc =~ s/^.*?\b\D*(\d{4})\D*\b.*$/$1/;
1499             }
1500             else {$wantedsubc =~ s/.*?\[i\..?e\..*(\d{4}).*?\].*/$1/;
1501             }
1502              
1503 0 0       0 if ($wantedsubc =~ /^\d{4}$/) {$date260 = $wantedsubc;}
  0 0       0  
  0         0  
1504             # i.e. date should be 2nd string of 4 digits
1505 0         0 elsif ($wantedsubc =~ /^\d{8}$/) {$date260 = substr($wantedsubc,4,4);}
1506 0         0 else {push @warningstoreturn, ("264: Unable to find 4 digit year in subfield 'c'."); return \@warningstoreturn;
1507             }
1508            
1509             } #else RDA
1510              
1511             #####################################
1512             #####################################
1513             ### to skip non-book records: ###
1514             #if ($mattype ne 'a') {return \@warningstoreturn;}
1515             #####################################
1516             #####################################
1517              
1518              
1519             ##############################################
1520             ### Check for conference publication here ####
1521             ##############################################
1522 1         3 my $isconfpub = 0;
1523              
1524 1 50 33     5 if (($record->field(111)) || ($record->field(110) && $record->field(110)->subfield('d'))) {$isconfpub = 1;}
  0   33     0  
1525              
1526             #match 008 $date1, $date050, and $date260 unless record is for conference.
1527 1 50       482 unless ($isconfpub == 1) {
1528 1 50 33     6 unless ($date1 eq $date050 && $date050 eq $date260) {
1529 1         4 push @warningstoreturn, ("Pub. Dates: 008 date1, $date1, 050 date, $date050, and 260_c date, $date260 do not match."); return \@warningstoreturn;
  1         5  
1530              
1531             } #unless all three match
1532             } #unless conf
1533             # otherwise for conf. publications match only $date1 and $date260
1534             else {
1535 0 0       0 unless ($date1 eq $date260) {
1536 0         0 push @warningstoreturn, ("Pub. Dates: 008 date1, $date1 and 260_c date, $date260 do not match."); return \@warningstoreturn;
  0         0  
1537             } #unless conf with $date1 eq $date260
1538             } #else conf
1539              
1540 0         0 return \@warningstoreturn;
1541              
1542             } # matchpubdates
1543              
1544              
1545             #########################################
1546             #########################################
1547             #########################################
1548             #########################################
1549              
1550             =head2 check_bk008_vs_bibrefandindex($record)
1551              
1552             Ignores non-book records (other than cartographic materials).
1553             For cartographic materials, checks only for index coding (not bib. refs.).
1554              
1555             Examines 008 book-contents (bytes 24-27) and book-index (byte 31).
1556             Compares with 500 and 504 fields.
1557             Reports error if 008contents has 'b' but 504 does not have "bibliographical references."
1558             Reports error if 504 has "bibliographical references" but no 'b' in 008contents.
1559             Reports error if 008index has 1 but no 500 or 504 with "Includes .* index."
1560             Reports error if a 500 or 504 has "Includes .* index" but 008index is 0.
1561             Reports error if "bibliographical references" appears in 500.
1562             Allows "bibliographical reference."
1563              
1564             =head2 TO DO/KNOWN PROBLEMS
1565              
1566             As with other subroutines, this one treats all 008 as being coded for monographs.
1567             Serials are ignored for the moment.
1568              
1569             Account for records with "Bibliography" or other wording in place of "bibliographical references."
1570             Currently 'b' in 008 must match with "bibliographical reference" or "bibliographical references" in 504 (or 500--though that reports an error).
1571              
1572             Reverse check for other wording (or subject headings) vs. 008 'b' in contents.
1573              
1574             Check for other 008contents codes.
1575              
1576             Check for misspelled "bibliographical references."
1577              
1578             Check spacing if pagination is given in 504.
1579              
1580             =cut
1581              
1582             sub check_bk008_vs_bibrefandindex {
1583              
1584             #get passed MARC::Record object
1585 1     1 1 2 my $record = shift;
1586             #declaration of return array
1587 1         3 my @warningstoreturn = ();
1588 1         3 my $record_is_RDA = is_RDA($record);
1589              
1590              
1591 1         4 my $leader = $record->leader();
1592 1         8 my $mattype = substr($leader, 6, 1);
1593             #skip non-book (other than cartographic) records
1594 1 50       6 if ($mattype !~ /^[ae]$/) {return \@warningstoreturn;}
  0         0  
1595              
1596 1 50       5 my $field008 = $record->field('008')->as_string() if ($record->field('008'));
1597 1 50       79 return \@warningstoreturn unless ($field008);
1598              
1599 1         2 my $bkindex = substr($field008,31,1);
1600             #report error if $bkindex is not 0 or 1
1601             ##this will result in dual errors if check_008 is also called.
1602 1 50       13 push @warningstoreturn, ("008: Book index must be 0 or 1.") unless $bkindex =~ /[01]/;
1603            
1604 1         2 my $bkcontents = substr($field008,24,4);
1605              
1606             #############################
1607 1         3 my @fields500 = ();
1608 1         2 my @fields504 = ();
1609 1         1 my @fields6xx = ();
1610 1         4 foreach my $field500 ($record->field('500')){
1611 10         440 push @fields500, ($field500->as_string());
1612             }
1613 1         23 foreach my $field504 ($record->field('504')){
1614 3         281 push @fields504, ($field504->as_string());
1615             }
1616              
1617             ####################################
1618             ### Workaround for bibliography as form of item.
1619 1         23 foreach my $field6xx ($record->field('6..')){
1620 1         221 push @fields6xx, ($field6xx->as_string());
1621             }
1622             ####################################
1623              
1624             ####################################
1625              
1626             ########################
1627             ## Check index coding ##
1628             ########################
1629 1         23 my $hasindexin500or504 = 0;
1630             #count 500s and 504s with 'Includes' 'index'
1631 1         3 $hasindexin500or504 = grep {$_ =~ /Includes.*index/} @fields500, @fields504;
  13         34  
1632              
1633 1 50       2 if (grep {$_ =~ /^Includes index(es)?\.$/} @fields504) {
  3         10  
1634 0         0 push @warningstoreturn, ("504: 'Includes index.' or 'Includes indexes.' should be 500.")
1635             } # if 'Includes index(es).' in 504
1636              
1637             #error if $bkindex is 0 but 500 or 504 "Includes" "index"
1638 1 50 33     8 if (($bkindex eq '0') && ($hasindexin500or504)) {
    0 0        
1639 1         3 push @warningstoreturn, ("008: Index is coded 0 but 500 or 504 mentions index.");
1640             } #if $bkindex is 0 but 500 or 504 "Includes" "index"
1641              
1642             #error if $bkindex is 1 but 500 or 504 does not have "Includes" "index"
1643             elsif (($bkindex eq '1') && !($hasindexin500or504)) {
1644 0         0 push @warningstoreturn, ("008: Index is coded 1 but 500 or 504 does not mention index.");
1645             } #elsif $bkindex is 1 but 500 or 504 does not have "Includes" "index"
1646              
1647             ###############################
1648              
1649             #return if the $mattype is 'e' (cartographic)
1650 1 50       4 if ($mattype eq 'e') {return \@warningstoreturn;}
  0         0  
1651              
1652             ###############################
1653              
1654              
1655             ##########################
1656             ## Check bib ref coding ##
1657             ##########################
1658              
1659 1         2 my $hasbibrefs = 0;
1660             #set $hasbibrefs to 1 if 'b' appears in 008 byte 24-27
1661 1 50       5 $hasbibrefs = 1 if ($bkcontents =~ /b/);
1662              
1663             #get 504s with 'bibliographical references' #modified 11-4-04 to add 's?\.?\b'
1664 1         8 my @bibrefsin504 = grep {$_ =~ /(?:bibliographical references?\.?\b)|(?:webliography)/} @fields504;
  3         21  
1665             #get 500s with 'bibliographical references'
1666 1         2 my @bibrefsin500 = grep {$_ =~ /(?:bibliographical references?\.?\b)|(?:webliography)/} @fields500;
  10         28  
1667             ###### Temporary/uncertain method of checking for bibliography as form of item
1668 1         2 my @bib6xx = grep {$_ =~ /bibliography|bibliographies/i} @fields6xx;
  1         13  
1669              
1670 1         3 my $bibrefin504 = join '', @bibrefsin504;
1671 1         2 my $bibrefin500 = join '', @bibrefsin500;
1672 1         3 my $isbibliography = join '', @bib6xx;
1673              
1674             #report 500 with "bibliographical references"
1675 1 50       4 if ($bibrefin500) {
1676 0         0 push @warningstoreturn, ("500: Bibliographical references should be in 504.");
1677             } #if $bibrefin500
1678              
1679             #report 008contents 'b' but not 504 or 500 with bib refs
1680 1 50 0     11 if (($hasbibrefs == 1) && !(($bibrefin504) || ($bibrefin500) ||($isbibliography))) {
    50 33        
      33        
      33        
1681 0         0 push @warningstoreturn, ("008: Coded 'b' but 504 (or 500) does not mention 'bibliographical references', and 'bibliography' is not present in 6xx.");
1682             } # if 008cont 'b' but not 504 or 500 with bib refs
1683             #report 504 or 500 with bib refs but no 'b' in 008contents
1684             elsif (($hasbibrefs == 0) && (($bibrefin504) || $bibrefin500)) {
1685 1         2 push @warningstoreturn, ("008: Not coded 'b' but 504 (or 500) mentions 'bibliographical references'.");
1686             } # if 008cont 'b' but not 504 or 500 with bib refs
1687              
1688 1         3 foreach my $bibref (@bibrefsin504) {
1689             #check spacing around parentheses
1690 2 50       10 if ($bibref =~ /[\(\)]/) {
1691 2 100 66     22 push @warningstoreturn, ("504: Check spacing around parentheses ($bibref).") if (($bibref =~ /\(.+?\)[^ \,\.]/) || ($bibref =~ /[^ ]\(.+?\)/));
1692             } #if 504 has parentheses
1693              
1694 2 50       19 unless ($record_is_RDA) {
1695             #check for 'p.' if pagination is present with bibliographical references
1696 2 100       11 if ($bibref =~ /bibliographical references \((?!p\. ).*?\)?/) {
1697 1 50       5 unless ($bibref =~ /bibliographical references \(t\.p\. .*?\)?/) {
1698 1         9 push @warningstoreturn, ("504: Pagination may need 'p.' ($bibref).");
1699             } #unless 't.p. ' is page (including t.p. verso)
1700             } #if 'p.' is not present in 504 with bib. ref. pagination
1701             } #unless RDA record
1702             else {
1703             #check for 'page(s)' if pagination is present with bibliographical references
1704 0 0       0 if ($bibref =~ /bibliographical references \((?!pages? ).*?\)?/) {
1705 0 0       0 unless ($bibref =~ /bibliographical references \(title page .*?\)?/) {
1706 0         0 push @warningstoreturn, ("504: Pagination may need 'page(s)' ($bibref).");
1707             } #unless 'title page ' is page (including title page verso)
1708             } #if 'page(s)' is not present in 504 with bib. ref. pagination
1709             } #else RDA
1710             } #foreach 504 field with bib. refs
1711 1         8 return \@warningstoreturn;
1712            
1713             } # check_bk008_vs_bibrefandindex
1714              
1715             #########################################
1716             #########################################
1717             #########################################
1718             #########################################
1719              
1720             =head2 check_041vs008lang($record)
1721              
1722             Compares first code in subfield 'a' of 041 vs. 008 bytes 35-37.
1723              
1724             =cut
1725              
1726             sub check_041vs008lang {
1727              
1728             #get passed MARC::Record object
1729 1     1 1 2 my $record = shift;
1730             #declaration of return array
1731 1         3 my @warningstoreturn = ();
1732              
1733 1 50       4 my $field008 = $record->field('008')->as_string() if ($record->field('008'));
1734 1 50       75 return \@warningstoreturn unless ($field008);
1735 1         3 my $langcode008 = substr($field008,35,3);
1736              
1737             #double check that lang code is present with 3 characters
1738 1 50       5 unless ($langcode008 =~ /^[\w ]{3}$/) {
1739 0         0 push @warningstoreturn, ("008: Could not get language code, $langcode008.");
1740             }
1741              
1742             #get first 041 subfield 'a' if it exists
1743 1         5 my $first041a;
1744 1 50       4 if ($record->field('041')) {
1745 1 50       79 $first041a = $record->field('041')->subfield('a') if ($record->field('041')->subfield('a'));
1746             }
1747              
1748             #skip records without 041 or 041$a
1749 1 50       174 unless ($first041a) {return \@warningstoreturn;}
  0         0  
1750             else {
1751 1         3 my $firstcode = substr($first041a,0,3);
1752             #compare 008lang vs. 1st 041a code
1753 1 50       4 unless ($firstcode eq $langcode008) {
1754 1         6 push @warningstoreturn, ("041: First code ($firstcode) does not match 008 bytes 35-37 (Language $langcode008).");
1755             }
1756             } # else $first041a exists
1757              
1758 1         3 return \@warningstoreturn;
1759              
1760             } #check_041vs008lang
1761              
1762             #########################################
1763             #########################################
1764             #########################################
1765             #########################################
1766              
1767             #########################################
1768             #########################################
1769             #########################################
1770             #########################################
1771              
1772             =head2 check_5xxendingpunctuation($record)
1773              
1774             Validates punctuation in various 5xx fields.
1775              
1776             Currently checks 500, 501, 504, 505, 508, 511, 538, 546.
1777              
1778             For 586, see check_nonpunctendingfields($record)
1779              
1780             =head2 TO DO (check_5xxendingpunctuation)
1781              
1782             Add checks for the other 5xx fields.
1783              
1784             Verify rules for these checks (particularly 505).
1785              
1786             =cut
1787              
1788             sub check_5xxendingpunctuation {
1789              
1790             #get passed MARC::Record object
1791 1     1 1 3 my $record = shift;
1792             #declaration of return array
1793 1         2 my @warningstoreturn = ();
1794              
1795 1         4 my $leader = $record->leader();
1796 1         9 my $encodelvl = substr($leader, 17, 1);
1797              
1798             #check for CIP-level
1799 1         2 my $isCIP = 0;
1800 1 50       4 if ($encodelvl eq '8') {
1801 0         0 $isCIP = 1;
1802             }
1803             # check only certain fields
1804 1         5 my @fieldstocheck = ('500', '501', '504', '505', '520', '538', '546', '508', '511');
1805              
1806             #get fields in @fieldstocheck
1807 1         5 my @fields5xx = $record->field(@fieldstocheck);
1808              
1809              
1810             #loop through set of 5xx fields to check in $record
1811 1         9376 foreach my $field5xx (@fields5xx) {
1812 16         42 my $tag = $field5xx->tag();
1813             #skip 500s with LCCN or ISBN in PCIP
1814 16 50 33     109 if (($isCIP) && ($tag eq '500') && ($field5xx->subfield('a') =~ /^(LCCN)|(ISBN)|(Preassigned)/)) {
      33        
1815 0         0 return \@warningstoreturn;
1816             } #if CIP with 'LCCN' or 'ISBN' note
1817              
1818             else {
1819             #look at last subfield (unless numeric)
1820 16         43 my @subfields = $field5xx->subfields();
1821 16         241 my @newsubfields = ();
1822              
1823             #break subfields into code-data array (so the entire field is in one array)
1824 16         42 while (my $subfield = pop(@subfields)) {
1825 16         31 my ($code, $data) = @$subfield;
1826             # skip numeric subfields (5)
1827 16 50       49 next if ($code =~ /^\d$/);
1828              
1829             #get the first 10 and last 10 characters of the field for error reporting
1830 16         20 my ($firstchars, $lastchars) = ('', '');
1831 16 50       56 if (length($data) < 10) {
    50          
1832             #get full subfield if length < 10)
1833 0         0 $firstchars = $data;
1834             #get full subfield if length < 10)
1835 0         0 $lastchars = $data;
1836             } #if subfield length < 10
1837             elsif (length($data) >= 10) {
1838             #get first 10 chars of subfield
1839 16         27 $firstchars = substr($data,0,10);
1840             #get last 10 chars of subfield
1841 16         32 $lastchars = substr($data,(length($data)-10),(length($data)));
1842             } #elsif subfield length >= 10
1843              
1844             # valid punctuation: /(\)?[\!\?\.]\'?\"?$)/
1845             # so, closing parens (or not),
1846             # either exclamation point, question mark or period,
1847             # and, optionally, single and/or double quote
1848              
1849 16 100       58 unless ($data =~ /(\)?[\!\?\.]\'?\"?$)/) {
1850 3 100       10 if ($tag eq '505') {
1851             #ignore error--505 may be unpunctuated
1852             } #if 505
1853             else {
1854 1         4 push @warningstoreturn, join '', ($tag, ": Check ending punctuation, ", $firstchars, " ___ ", $lastchars);
1855             } #else not 505
1856             } #unless valid ending punctuation
1857              
1858             #report error for floating or non-floating semi-colon-period
1859 16 50       72 push @warningstoreturn, join '', ($tag, ": Check ending punctuation, ", $firstchars, " ___ ", $lastchars) if ($data =~ /\s*;\s*\.$/);
1860              
1861             #report error for exclamation point or question mark-period
1862 16 100       44 push @warningstoreturn, join '', ($tag, ": Check ending punctuation (exclamation point or question mark should not be followed by period), ", $firstchars, " ___ ", $lastchars) if ($data =~ /(\)?[\!\?]\.\'?\"?$)/);
1863            
1864             # stop after first non-numeric
1865 16         74 last;
1866             } # while subfields
1867             } # else tag is checkable
1868            
1869             } # foreach 5xx field
1870              
1871 1         16 return \@warningstoreturn;
1872              
1873             } # check_5xxendingpunctuation
1874              
1875              
1876             #########################################
1877             #########################################
1878             #########################################
1879             #########################################
1880              
1881             =head2 findfloatinghyphens($record)
1882              
1883             Looks at various fields and reports fields with space-hypen-space as errors.
1884              
1885             =head2 TO DO (findfloatinghyphens($record))
1886              
1887             Find exceptions.
1888              
1889             =cut
1890              
1891             sub findfloatinghyphens {
1892              
1893             #get passed MARC::Record object
1894 1     1 1 3 my $record = shift;
1895             #declaration of return array
1896 1         3 my @warningstoreturn = ();
1897              
1898             # add or remove fields to be examined
1899 1         5 my @fieldstocheck = ('245', '246', '500', '501', '505', '508', '511', '538', '546'); #some may also want to check '520'
1900              
1901             #look at each of the fields
1902 1         3 foreach my $fieldtocheck (@fieldstocheck) {
1903 9         23 my @fields = $record->field($fieldtocheck);
1904 9         1951 foreach my $checkedfield (@fields) {
1905             #get field as a string, without subfield coding
1906 18         53 my $fielddata = $checkedfield->as_string();
1907             #report error if space-hyphen-space appears in field
1908             ##reporting surrounding 10 chars on either side
1909 18 100       405 if (my @floating_hyphens = ($fielddata =~ /(.{0,10} \- .{0,10})/g)) {
1910 1         4 push @warningstoreturn, join '', ($checkedfield->tag(), ": May have a floating hyphen, ", (join '_', @floating_hyphens) );
1911             } #if floating hyphen
1912             } #foreach $checkedfield
1913             } #foreach $fieldtocheck
1914              
1915 1         5 return \@warningstoreturn;
1916              
1917             } # findfloatinghyphens
1918              
1919             #########################################
1920             #########################################
1921             #########################################
1922             #########################################
1923              
1924             =head2 check_floating_punctuation($record)
1925              
1926             Looks at each non-control tag and reports an error if a floating period, comma, or question mark are found.
1927              
1928             Example:
1929              
1930             245 _aThis has a floating period .
1931              
1932             Ignores double dash-space when preceded by a non-space (example-- [where functioning as ellipsis replacement])
1933              
1934             =head2 TODO (check_floating_punctuation($record))
1935              
1936             -Add other undesirable floating punctuation.
1937              
1938             -Look for exceptions where floating punctuation should be allowed.
1939              
1940             -Merge functionality with findfloatinghyphens($record) (to reduce number of runs through the same record, especially).
1941              
1942             -Improve reporting. Current version reports approximately 10 characters before and after the floating text for fields longer than 80 characters, or the full field otherwise, to provide context, particularly in the case of multiple instances.
1943            
1944             =cut
1945              
1946             sub check_floating_punctuation {
1947              
1948             #get passed MARC::Record object
1949 1     1 1 2 my $record = shift;
1950             #declaration of return array
1951 1         2 my @warningstoreturn = ();
1952              
1953             #create hash of punctuation wording
1954 1         7 my %punct_words = (
1955             ',' => 'comma',
1956             '.' => 'period',
1957             '?' => 'question mark',
1958             );
1959              
1960             #look at each field in record
1961 1         10 foreach my $field ($record->fields()) {
1962 34         109 my $tag = $field->tag();
1963             #skip non-numeric tags
1964 34 50       217 next unless ($tag =~ /^[0-9][0-9][0-9]$/);
1965             #skip control fields and LCCN (010)
1966 34 100       80 next if ($tag <= 10);
1967              
1968             #break field into string of characters without subfield codes
1969 31         80 my $field_string = $field->as_string();
1970              
1971             #if period, comma, question mark are preceded by space and followed
1972             #by space or end of field, report error
1973             #except when preceded by ellipsis-replacement dash
1974 31 100       756 if ($field_string =~ /(?:(?![^ ]--)...) ([\.\,\?])(?: |$)/) {
1975 7         14 my $punct = $1;
1976 7   50     22 my $punctuation = ($punct_words{$punct} or 'punctuation mark');
1977 7         424 my @surrounding_text = ($field_string =~ /(.{0,10}(?![^ ]--)... [\.\,\?] ?.{0,10})/g);
1978 7 100       21 $punctuation = "punctuation marks" if (scalar @surrounding_text > 1);
1979 7         27 my $warning_text = join '', ($tag, ": May have floating $punctuation ");
1980             #add surrounding characters if field is longer than 80 chars
1981 7 100       52 $warning_text .= "\(".(length($field_string) > 80 ? join "_", substr($field_string, 0, 15), @surrounding_text : $field_string)."\).";
1982              
1983 7         22 push @warningstoreturn, $warning_text;
1984             } #if floating punctuation
1985            
1986             } #foreach field in record
1987            
1988 1         8 return \@warningstoreturn;
1989              
1990             } #check_floating_punctuation
1991              
1992              
1993              
1994             #########################################
1995             #########################################
1996             #########################################
1997             #########################################
1998              
1999              
2000             =head2 video007vs300vs538($record)
2001              
2002             Comparison of 007 coding vs. 300abc subfield data and vs. 538 data for video records (VHS and DVD).
2003              
2004             =head2 DESCRIPTION
2005              
2006             Focuses on videocassettes (VHS) and videodiscs (DVD and Video CD).
2007             Does not consider coding for motion pictures.
2008              
2009             If LDR/06 is 'g' for projected medium,
2010             (skipping those that aren't)
2011             and 007 is present,
2012             at least 1 007 should start with 'v'
2013              
2014             If 007/01 is 'd', 300a should have 'videodisc(s)'.
2015             300c should have 4 3/4 in.
2016             Also, 538 should have 'DVD'
2017             If 007/01 is 'f', 300a should have 'videocassette(s)'
2018             300c should have 1/2 in.
2019             Also, 538 should have 'VHS format' or 'VHS hi-fi format' (case insensitive on hi-fi), plus a playback mode.
2020              
2021             =head2 LIMITATIONS
2022              
2023             Checks only videocassettes (1/2) and videodiscs (4 3/4).
2024             Current version reports problems with other forms of videorecordings.
2025              
2026             Accounts for existence of only 1 300 field.
2027              
2028             Looks at only 1st subfield 'a' and 'c' of 1st 300 field.
2029              
2030             =head2 TO DO
2031              
2032             Account for motion pictures and videorecordings not on DVD (4 3/4 in.) or VHS cassettes.
2033              
2034             Check proper plurality of 300a (1 videodiscs -> error; 5 videocassette -> error)
2035              
2036             Monitor need for changes to sizes, particularly 4 3/4 in. DVDs.
2037              
2038             Expand allowed terms for 538 as needed and revise current VHS allowed terms.
2039              
2040             Update to allow SMDs of conventional terminology ('DVD') if such a rule passes.
2041              
2042             Deal with multiple 300 fields.
2043              
2044             Check GMD in 245$h
2045              
2046             Clean up redundant code.
2047              
2048             =cut
2049              
2050             sub video007vs300vs538 {
2051              
2052             #get passed MARC::Record object
2053 1     1 1 3 my $record = shift;
2054             #declaration of return array
2055 1         2 my @warningstoreturn = ();
2056 1         4 my $record_is_RDA = is_RDA($record);
2057              
2058              
2059 1         5 my $leader = $record->leader();
2060 1         9 my $mattype = substr($leader, 6, 1);
2061             #my $encodelvl = substr($leader, 17, 1);
2062              
2063             #skip non-videos
2064 1 50       7 return \@warningstoreturn unless $mattype eq 'g';
2065              
2066              
2067 0         0 my @fields007 = ();
2068              
2069 0 0       0 if ($record->field('007')) {
2070 0         0 foreach my $field007 ($record->field('007'))
2071             {
2072 0         0 my $field007string = $field007->as_string();
2073             #skip non 'v' 007s
2074 0 0       0 next unless ($field007string =~ /^v/);
2075             #add 'v' 007s to @fields007 for further processing
2076 0         0 push @fields007, $field007string;
2077             } # foreach subfield 007
2078             } # if 007s exist
2079             else {
2080             #warn about nonexistent 007 in 'g' type records
2081 0         0 push @warningstoreturn, ("007: Record is coded $mattype but 007 does not exist.");
2082             } # else no 007s
2083              
2084             #report existence of multiple 'v' 007s
2085 0 0       0 if ($#fields007 > 0){
    0          
2086 0         0 push @warningstoreturn, ("007: Multiple 007 with first byte 'v' are present.");
2087             }
2088             #report nonexistence of 'v' 007 in 'g' type recor
2089             elsif ($#fields007 == -1) {
2090 0         0 push @warningstoreturn, ("007: Record is coded $mattype but no 007 has 'v' as its first byte.");
2091             }
2092             #else have exactly one 007 'v'
2093             else {
2094             # get bytes from the 007 for use in cross checks
2095 0         0 my @field007bytes = split '', $fields007[0];
2096             #report problem getting 'v' as first byte
2097 0 0       0 print "Problem getting first byte $fields007[0]" unless ($field007bytes[0] eq 'v');
2098              
2099             #declare variables for later
2100 0         0 my ($iscassette007, $isdisc007, $subfield300a, $subfield300b, $subfield300c, $viddiscin300, $vidcassettein300, $bw_only, $col_only, $col_and_bw, $dim300, $dvd538, $vhs538);
2101              
2102             #check for byte 1 having 'd'--videodisc (DVD or VideoCD) and normal pattern
2103 0 0       0 if ($field007bytes[1] eq 'd') {
    0          
2104 0         0 $isdisc007 = 1;
2105 0 0 0     0 unless ( #normal 'vd _[vsz]aiz_'
      0        
      0        
2106             $field007bytes[4] =~ /^[vsz]$/ && #DVD, Blu-ray or other
2107             $field007bytes[5] eq 'a' &&
2108             $field007bytes[6] eq 'i' &&
2109             $field007bytes[7] eq 'z'
2110             ) {
2111 0         0 push @warningstoreturn, ("007: Coded 'vd' for videodisc but bytes do not match normal pattern.");
2112             } # unless normal pattern
2113             } # if 'vd'
2114              
2115             #elsif check for byte 1 having 'f' videocassette
2116             elsif ($field007bytes[1] eq 'f') {
2117 0         0 $iscassette007 = 1;
2118 0 0 0     0 unless ( #normal 'vf _baho_'
      0        
      0        
2119             $field007bytes[4] eq 'b' &&
2120             $field007bytes[5] eq 'a' &&
2121             $field007bytes[6] eq 'h' &&
2122             $field007bytes[7] eq 'o'
2123             ) {
2124 0         0 push @warningstoreturn, ("007: Coded 'vf' for videocassette but bytes do not match normal pattern.");}
2125             } # elsif 'vf'
2126              
2127             #get 300 and 538 fields for cross-checks
2128 0 0       0 my $field300 = $record->field('300') if ($record->field('300'));
2129              
2130             #report nonexistent 300 field
2131 0 0 0     0 unless ($field300){
    0 0        
2132 0         0 push @warningstoreturn, ("300: May be missing.");
2133             } #unless 300 field exists
2134              
2135             #get subfields 'a' 'b' and 'c' if they all exist
2136             elsif ($field300->subfield('a') && $field300->subfield('b') && $field300->subfield('c')) {
2137 0         0 $subfield300a = $field300->subfield('a');
2138 0         0 $subfield300b = $field300->subfield('b');
2139 0         0 $subfield300c = $field300->subfield('c');
2140             } #elsif 300a 300b and 300c exist
2141              
2142             #report missing subfield 'a' 'b' or 'c'
2143             else {
2144 0 0       0 push @warningstoreturn, ("300: Subfield 'a' is missing.") unless ($field300->subfield('a'));
2145 0 0       0 push @warningstoreturn, ("300: Subfield 'b' is missing.") unless ($field300->subfield('b'));
2146 0 0       0 push @warningstoreturn, ("300: Subfield 'c' is missing.") unless ($field300->subfield('c'));
2147             } # 300a or 300b or 300c is missing
2148              
2149             ######## get elements of each subfield ##########
2150             ######### get SMD ###########
2151 0 0       0 if ($subfield300a) {
2152 0 0       0 if ($subfield300a =~ /videodisc/) {
    0          
2153 0         0 $viddiscin300 = 1;
2154             } #300a has videodisc
2155             elsif ($subfield300a =~ /videocassette/) {
2156 0         0 $vidcassettein300 = 1;
2157             } #300a has videocassette
2158             else {
2159 0         0 push @warningstoreturn, ("300: Not videodisc or videocassette, $subfield300a.");
2160             } #not videodisc or videocassette in 300a
2161             } #if subfielda exists
2162             ###############################
2163              
2164             ###### get color info #######
2165 0 0       0 if ($subfield300b) {
2166 0 0       0 unless ($record_is_RDA) {
2167             #both b&w and color
2168 0 0 0     0 if (($subfield300b =~ /b.?\&.?w/) && ($subfield300b =~ /col\./)) {
    0 0        
    0 0        
    0 0        
    0 0        
2169 0         0 $col_and_bw = 1;
2170             } #if col. and b&w
2171             #both but col. missing period
2172             elsif (($subfield300b =~ /b.?\&.?w/) && ($subfield300b =~ /col[^.]/)) {
2173 0         0 $col_and_bw = 1;
2174 0         0 push @warningstoreturn, ("300: Col. may need a period, $subfield300b.");
2175             } #elsif b&w and col (without period after col.)
2176             elsif (($subfield300b =~ /b.?\&.?w/) && ($subfield300b !~ /col\./)) {
2177 0         0 $bw_only = 1;
2178             } #if b&w only
2179             elsif (($subfield300b =~ /col\./) && ($subfield300b !~ /b.?\&.?w/)) {
2180 0         0 $col_only = 1;
2181             } #if col. only
2182             elsif (($subfield300b =~ /col[^.]/) && ($subfield300b !~ /b.?\&.?w/)) {
2183 0         0 $col_only = 1;
2184 0         0 push @warningstoreturn, ("300: Col. may need a period, $subfield300b.");
2185             } #if col. only (without period after col.)
2186             else {
2187 0         0 push @warningstoreturn, ("300: Col. or b&w are not indicated, $subfield300b.");
2188             } #not indicated
2189             } #unless RDA
2190             else {
2191             #both b&w and color
2192 0 0 0     0 if (($subfield300b =~ /black \& white/) && ($subfield300b =~ /colou?r/)) {
    0 0        
    0 0        
    0 0        
    0 0        
    0 0        
2193 0         0 $col_and_bw = 1;
2194             } #if col. and b&w
2195             #both but col. and b&w abbreviated
2196             elsif (($subfield300b =~ /b.?\&.?w/) && ($subfield300b =~ /col\./)) {
2197 0         0 $col_and_bw = 1;
2198 0         0 push @warningstoreturn, ("300: Check for abbreviated col. and b&w, $subfield300b.");
2199             } #elsif b&w and col. abbreviated
2200             elsif (($subfield300b =~ /black \& white/) && ($subfield300b !~ /colou?r/)) {
2201 0         0 $bw_only = 1;
2202             } #if b&w only
2203             elsif (($subfield300b =~ /b.?\&.?w/) && ($subfield300b !~ /col/)) {
2204 0         0 $bw_only = 1;
2205 0         0 push @warningstoreturn, ("300: Check for abbreviated b&w, $subfield300b.");
2206             } #if b&w only
2207             elsif (($subfield300b =~ /colou?r/) && ($subfield300b !~ /black \& white/)) {
2208 0         0 $col_only = 1;
2209             } #if colored only
2210             elsif (($subfield300b =~ /col\./) && ($subfield300b !~ /(b.?\&.?w)|(black \& white)/)) {
2211 0         0 $col_only = 1;
2212 0         0 push @warningstoreturn, ("300: Check for abbreviated col., $subfield300b.");
2213             } #if col. only
2214             else {
2215 0         0 push @warningstoreturn, ("300: Colored or black & white are not indicated, $subfield300b.");
2216             } #not indicated
2217             } #else RDA
2218             } #if subfieldb exists
2219             ###########################
2220              
2221             #### get dimensions ####
2222 0 0       0 if ($subfield300c) {
2223 0 0       0 if ($subfield300c =~ /4 3\/4 in\./) {
    0          
2224 0         0 $dim300 = '4.75';
2225             } #4 3/4 in.
2226             elsif ($subfield300c =~ /1\/2 in\./) {
2227 0         0 $dim300 = '.5';
2228             } #1/2 in.
2229             #### add other dimensions here ####
2230             ###########################
2231             ### elsif ($subfield300c =~ //) {}
2232             ###########################
2233             ###########################
2234             else {
2235 0         0 push @warningstoreturn, ("300: Dimensions are not 4 3/4 in. or 1/2 in., $subfield300c.");
2236             } # not normal dimension
2237             } #if subfieldc exists
2238             ###########################
2239              
2240             ####################################
2241             ##### Compare SMD vs. dimensions ###
2242             ####################################
2243             #$viddiscin300, $vidcassettein300
2244             #$dim300
2245             #if notdvd_or_vhs_in538 is 1, then no 538 has the proper terminology for the format
2246 0         0 my $notdvd_or_vhs_in538 = 1; #declared and initialized here for later use
2247              
2248             ##### modify unless statement if dimensions change
2249              
2250 0 0       0 if ($viddiscin300) {
    0          
2251 0 0       0 push @warningstoreturn, ("300: Dimensions, $subfield300c, do not match SMD, $subfield300a.") unless ($dim300 eq '4.75');
2252             }
2253             elsif ($vidcassettein300) {
2254 0 0       0 push @warningstoreturn, ("300: Dimensions, $subfield300c, do not match SMD, $subfield300a.") unless ($dim300 eq '.5');
2255             }
2256             ####################################
2257              
2258             ###########################
2259             ####### Get 538s ##########
2260             ###########################
2261              
2262            
2263 0 0       0 my @fields538 = map {$_->as_string()} $record->field('538') if ($record->field('538'));
  0         0  
2264             #report nonexistent 538 field
2265 0 0       0 unless (@fields538){
2266 0         0 push @warningstoreturn, ("538: May be missing in video record.");
2267             } #unless 538 field exists
2268             else {
2269 0         0 foreach my $field538 (@fields538) {
2270 0 0       0 if ($field538 =~ /(DVD)|(Video CD)|(Blu-ray)/) {
    0          
2271 0         0 $dvd538 = 1;
2272             } #if dvd in 538
2273             #################################
2274             ###### VHS wording in 538 is subject to change, so make note of changes
2275             #################################
2276             #538 should have VHS format and a playback mode (for our catalogers' current records)
2277             elsif ($field538 =~ /VHS ([hH]i-[fF]i)?( mono\.)? ?format, [ES]?L?P playback mode/) {
2278 0         0 $vhs538 = 1;
2279             } #elsif vhs in 538
2280             ###
2281             ### Add other formats here ###
2282             ###
2283             else {
2284             #current 538 doesn't have DVD or VHS
2285 0         0 $notdvd_or_vhs_in538 = 1;
2286             } #else
2287             } #foreach 538 field
2288             } # #else 538 exists
2289              
2290             ## add other formats as first condition if necessary
2291 0 0 0     0 if (($vhs538||$dvd538) && ($notdvd_or_vhs_in538 == 1)) {
    0 0        
2292 0         0 $notdvd_or_vhs_in538 = 0;
2293             } #at least one 538 had VHS or DVD
2294              
2295             # if $notdvd_or_vhs_in538 is 1, then no 538 had VHS or DVD
2296             elsif ($notdvd_or_vhs_in538 == 1) {
2297 0         0 push @warningstoreturn, ("538: Does not indicate VHS or DVD.");
2298             } #elsif 538 does not have VHS or DVD
2299              
2300             ###################################
2301             ##### Cross field comparisons #####
2302             ###################################
2303              
2304             #compare SMD in 300 vs. 007 and 538
2305             ##for cassettes
2306 0 0       0 if ($iscassette007) {
    0          
2307 0 0       0 push @warningstoreturn, ("300: 007 coded for cassette but videocassette is not present in 300a.") unless ($vidcassettein300);
2308 0 0       0 push @warningstoreturn, ("538: 007 coded for cassette but 538 does not have 'VHS format, SP playback mode'.") unless ($vhs538);
2309             } #if coded cassette in 007
2310             ##for discs
2311             elsif ($isdisc007) {
2312 0 0       0 push @warningstoreturn, ("300: 007 coded for disc but videodisc is not present in 300a.") unless ($viddiscin300);
2313 0 0       0 push @warningstoreturn, ("538: 007 coded for disc but 538 does not have 'DVD'.") unless ($dvd538);
2314             } #elsif coded disc in 007
2315              
2316             ###$bw_only, $col_only, $col_and_bw
2317              
2318             #compare 007/03 vs. 300$b for color/b&w
2319 0 0       0 if ($field007bytes[3] eq 'b') {
    0          
    0          
    0          
2320 0 0       0 push @warningstoreturn, ("300: Color in 007 coded 'b' but 300b mentions color, $subfield300b") unless ($bw_only);
2321             } #b&w
2322             elsif ($field007bytes[3] eq 'c') {
2323 0 0       0 push @warningstoreturn, ("300: Color in 007 coded 'c' but 300b mentions black & white, $subfield300b") unless ($col_only);
2324             } #col.
2325             elsif ($field007bytes[3] eq 'm') {
2326 0 0       0 push @warningstoreturn, ("300: Color in 007 coded 'm' but 300b mentions only color or black & white, $subfield300b") unless ($col_and_bw);
2327             } #mixed
2328             elsif ($field007bytes[3] eq 'a') {
2329             #not really an error, but likely rare, especially for our current videos
2330 0         0 push @warningstoreturn, ("300: Color in 007 coded 'a', one color.");
2331             } #one col.
2332              
2333             } # else have exactly 1 'v' 007
2334              
2335 0         0 return \@warningstoreturn;
2336              
2337              
2338             } # video007vs300vs538
2339              
2340              
2341             #########################################
2342             #########################################
2343             #########################################
2344             #########################################
2345              
2346             =head2 ldrvalidate($record)
2347              
2348             Validates bytes 5, 6, 7, 17, and 18 of the leader against MARC code list valid characters.
2349              
2350             =head2 DESCRIPTION
2351              
2352             Checks bytes 5, 6, 7, 17, and 18.
2353              
2354             $ldrbytes{$key} has keys "\d\d", "\d\dvalid" for each of the bytes checked (05, 06, 07, 17, 18)
2355              
2356             "\d\dvalid" is a hash ref containing valid code linked to the meaning of that code.
2357              
2358             print $ldrbytes{'05valid'}->{'a'}, "\n";
2359             yields: 'Increase in encoding level'
2360              
2361             =head2 TO DO (ldrvalidate)
2362              
2363             Customize (comment or uncomment) bytes according to local needs. Perhaps allow %ldrbytes to be passed into ldrvalidate($record) so that that hash may be created by a calling program, rather than relying on the preset MARC 21 values. This would facilitate adding valid OCLC-MARC bytes such as byte 17--I, K, M, etc.
2364              
2365             Examine other Lintadditions/Errorchecks subroutines using the leader to see if duplicate checks are being done.
2366              
2367             Move or remove such duplicate checks.
2368              
2369             Consider whether %ldrbytes needs full text of meaning of each byte.
2370              
2371             =cut
2372              
2373             ##########################################
2374             ### Initialize valid ldr bytes in hash ###
2375             ##########################################
2376              
2377             #source: MARC field list (http://www.loc.gov/marc/bibliographic/ecbdlist.htm)
2378              
2379             #Change (comment or uncomment) according to local needs
2380              
2381             my %ldrbytes = (
2382             '05' => 'Record status',
2383             '05valid' => {
2384             'a' => 'Increase in encoding level',
2385             'c' => 'Corrected or revised',
2386             'd' => 'Deleted',
2387             'n' => 'New',
2388             'p' => 'Increase in encoding level from prepublication'
2389             },
2390             '06' => 'Type of record',
2391             '06valid' => {
2392             'a' => 'Language material',
2393             # 'b' => 'Archival and manuscripts control [OBSOLETE]',
2394             'c' => 'Notated music',
2395             'd' => 'Manuscript notated music',
2396             'e' => 'Cartographic material',
2397             'f' => 'Manuscript cartographic material',
2398             'g' => 'Projected medium',
2399             # 'h' => 'Microform publications [OBSOLETE]',
2400             'i' => 'Nonmusical sound recording',
2401             'j' => 'Musical sound recording',
2402             'k' => 'Two-dimensional nonprojectable graphic',
2403             'm' => 'Computer file',
2404             # 'n' => 'Special instructional material [OBSOLETE]',
2405             'o' => 'Kit',
2406             'p' => 'Mixed material',
2407             'r' => 'Three-dimensional artifact or naturally occurring object',
2408             't' => 'Manuscript language material'
2409             },
2410             '07' => 'Bibliographic level',
2411             '07valid' => {
2412             'a' => 'Monographic component part',
2413             'b' => 'Serial component part',
2414             'c' => 'Collection',
2415             'd' => 'Subunit',
2416             'i' => 'Integrating resource',
2417             'm' => 'Monograph/item',
2418             's' => 'Serial'
2419             },
2420             '17' => 'Encoding level',
2421             '17valid' => {
2422             ' ' => 'Full level',
2423             '1' => 'Full level, material not examined',
2424             '2' => 'Less-than-full level, material not examined',
2425             '3' => 'Abbreviated level',
2426             '4' => 'Core level',
2427             '5' => 'Partial (preliminary) level',
2428             '7' => 'Minimal level',
2429             '8' => 'Prepublication level',
2430             'u' => 'Unknown',
2431             'z' => 'Not applicable'
2432             },
2433             '18' => 'Descriptive cataloging form',
2434             '18valid' => {
2435             ' ' => 'Non-ISBD',
2436             'a' => 'AACR 2',
2437             'c' => 'ISBD punctuation omitted',
2438             'i' => 'ISBD punctuation included',
2439             # 'p' => 'Partial ISBD (BK) [OBSOLETE]',
2440             # 'r' => 'Provisional (VM MP MU) [OBSOLETE]',
2441             'u' => 'Unknown'
2442             },
2443             '19' => 'Multipart resource record level',
2444             '19valid' => {
2445             ' ' => 'Not specified or not applicable',
2446             'a' => 'Set',
2447             'b' => 'Part with independent title',
2448             'c' => 'Part with dependent title'
2449             }
2450             ); # %ldrbytes
2451             ################################
2452              
2453             sub ldrvalidate {
2454              
2455             #get passed MARC::Record object
2456 57     57 1 4373277 my $record = shift;
2457             #declaration of return array
2458 57         111 my @warningstoreturn = ();
2459 57         113 my $record_is_RDA = is_RDA($record);
2460              
2461 57         165 my $leader = $record->leader();
2462 57         379 my $status = substr($leader, 5, 1);
2463 57         89 my $mattype = substr($leader, 6, 1);
2464 57         84 my $biblvl = substr($leader, 7, 1);
2465 57         81 my $encodelvl = substr($leader, 17, 1);
2466 57         113 my $catrules = substr($leader, 18, 1);
2467              
2468             #check LDR/05
2469 57 100       179 unless ($ldrbytes{'05valid'}->{$status}) {
2470 1         4 push @warningstoreturn, "LDR: Byte 05, Status $status is invalid.";
2471             }
2472             #check LDR/06
2473 57 100       160 unless ($ldrbytes{'06valid'}->{$mattype}) {
2474 10         34 push @warningstoreturn, "LDR: Byte 06, Material type $mattype is invalid.";
2475             }
2476             #check LDR/07
2477 57 100       162 unless ($ldrbytes{'07valid'}->{$biblvl}) {
2478 1         4 push @warningstoreturn, "LDR: Byte 07, Bib. Level, $biblvl is invalid.";
2479             }
2480             #check LDR/17
2481 57 100       185 unless ($ldrbytes{'17valid'}->{$encodelvl}) {
2482 1         4 push @warningstoreturn, "LDR: Byte 17, Encoding Level, $encodelvl is invalid.";
2483             }
2484             #check LDR/18
2485 57 100       137 unless ($ldrbytes{'18valid'}->{$catrules}) {
2486 3         13 push @warningstoreturn, "LDR: Byte 18, Cataloging rules, $catrules is invalid.";
2487             }
2488             #report RDA records coded 'a', AACR2
2489 57 50       105 if ($record_is_RDA) {
2490 0 0       0 push @warningstoreturn, "LDR: Byte 18, Cataloging rules, coded $catrules (AACR2), but 040 indicates RDA." if ($catrules eq 'a');
2491             }# RDA record leader coded as AACR2
2492            
2493            
2494 57         191 return \@warningstoreturn;
2495              
2496             } # ldrvalidate
2497              
2498             #########################################
2499             #########################################
2500             #########################################
2501             #########################################
2502              
2503             =head2 geogsubjvs043($record)
2504              
2505             Reports absence of 043 if 651 or 6xx subfield z is present.
2506              
2507             =head2 TO DO (geogsubjvs043)
2508              
2509             Update/maintain list of exceptions (in the hash, %geog043exceptions).
2510              
2511             =cut
2512              
2513             my %geog043exceptions = (
2514             'English-speaking countries' => 1,
2515             'Foreign countries' => 1,
2516             );
2517              
2518             sub geogsubjvs043 {
2519              
2520             #get passed MARC::Record object
2521 1     1 1 2 my $record = shift;
2522             #declaration of return array
2523 1         3 my @warningstoreturn = ();
2524            
2525             #skip records with no subject headings
2526 1 50       3 unless ($record->field('6..')) {return \@warningstoreturn;}
  0         0  
2527             else {
2528 1         243 my $hasgeog = 0;
2529             #get 043 field
2530 1 50       4 my $field043 = $record->field('043') if ($record->field('043'));
2531             #get all 6xx fields
2532 1         254 my @fields6xx = $record->field('6..');
2533             #look at each 6xx field
2534 1         235 foreach my $field6xx (@fields6xx) {
2535             #if field is 651, it is geog
2536             ##may need to check these for exceptions
2537 1 50       10 if ($field6xx->tag() eq '651') {
    50          
2538 0         0 $hasgeog = 1
2539             } #if 6xx is 651
2540             #if field has subfield z, check for exceptions and report others
2541             elsif ($field6xx->subfield('z')) {
2542 0         0 my @subfields_z = ();
2543             #get all subfield 'z' in field
2544 0         0 push @subfields_z, ($field6xx->subfield('z'));
2545             #look at each subfield 'z'
2546 0         0 foreach my $subfieldz (@subfields_z) {
2547             #remove trailing punctuation and spaces
2548 0         0 $subfieldz =~ s/[ .,]$//;
2549             # unless text of z is an exception, it is geog.
2550 0 0       0 unless ($geog043exceptions{$subfieldz}) {
2551 0         0 $hasgeog = 1
2552             } #unless z is an exception
2553             } #foreach subfield z
2554             }# elsif has subfield 'z' but not an exception
2555             } #foreach 6xx field
2556 1 50       34 if ($hasgeog) {
2557 0 0       0 push @warningstoreturn, ("043: Record has 651 or 6xx subfield 'z' but no 043.") unless $field043;
2558             } #if record has geographic heading
2559             } #else 6xx exists
2560              
2561 1         5 return \@warningstoreturn;
2562              
2563             } # geogsubjvs043
2564              
2565              
2566              
2567              
2568             #########################################
2569             #########################################
2570             #########################################
2571             #########################################
2572              
2573             =head2 findemptysubfields($record)
2574              
2575             Looks for empty subfields.
2576             Skips 037 in CIP-level records and tags < 010.
2577              
2578             =cut
2579              
2580             sub findemptysubfields {
2581              
2582             #get passed MARC::Record object
2583 1     1 1 3 my $record = shift;
2584             #declaration of return array
2585 1         3 my @warningstoreturn = ();
2586              
2587 1         4 my $leader = $record->leader();
2588 1         10 my $encodelvl = substr($leader, 17, 1);
2589              
2590 1         4 my @fields = $record->fields();
2591 1         13 foreach my $field (@fields) {
2592 34         94 my $tag = $field->tag();
2593             #skip non-numeric tags
2594 34 50       227 next unless ($tag =~ /^[0-9][0-9][0-9]$/);
2595             #skip control tags
2596 34 100       71 next if ($tag < 10);
2597             #skip CIP-level 037 fields
2598 32 50 33     80 if (($encodelvl eq '8') && ($tag eq '037')) {
2599 0         0 next;
2600             } #if CIP and field 037
2601              
2602             #get all subfields
2603 32 50       79 my @subfields = $field->subfields() if $field->subfields();
2604             #break subfields into code and data
2605 32         1015 while (my $subfield = pop(@subfields)) {
2606 40         68 my ($code, $data) = @$subfield;
2607             #check for empty subfield data
2608 40 50       8850 if ($data eq '') {
2609 0         0 push @warningstoreturn, join '', ($tag, ": Subfield $code is empty.");
2610             } #if data completely empty
2611             #check for fields with only period(s) or space(s)
2612             else {
2613             #keep original subfield data for reporting
2614 40         60 my $orig_data = $data;
2615             #remove periods and spaces
2616 40         232 $data =~ s/[\. ]//g;
2617             #report empty subfield
2618 40 100       206 push @warningstoreturn, join '', ($tag, ": Subfield $code contains only space(s) or period(s) ($orig_data).") unless ($data);
2619             } #else $data not empty string
2620             } # while subfields
2621             } # foreach field
2622              
2623 1         7 return \@warningstoreturn;
2624              
2625             } # findemptysubfields
2626              
2627             #########################################
2628             #########################################
2629             #########################################
2630             #########################################
2631              
2632             =head2 check_040present($record)
2633              
2634             Reports error if 040 is not present.
2635             Can not use Lintadditions check_040 for this since that relies upon field existing before the check is executed.
2636              
2637             =cut
2638              
2639             sub check_040present {
2640              
2641             #get passed MARC::Record object
2642 1     1 1 3 my $record = shift;
2643             #declaration of return array
2644 1         2 my @warningstoreturn = ();
2645              
2646             #report nonexistent 040 fields
2647 1 50       14 unless ($record->field('040')) {
2648 0         0 push @warningstoreturn, ("040: Record lacks 040 field.");
2649             }
2650              
2651 1         57 return \@warningstoreturn;
2652              
2653             } # check_040present
2654              
2655             #########################################
2656             #########################################
2657             #########################################
2658             #########################################
2659              
2660             =head2 check_nonpunctendingfields($record)
2661              
2662             Checks for presence of punctuation in the fields listed below.
2663             These fields are not supposed to end in punctuation unless the data ends in abbreviation, ___, or punctuation.
2664              
2665             Ignores initialisms such as 'Q.E.D.' Certain abbrevations and initialisms are explicitly coded.
2666              
2667             Fields checked: 240, 246, 440, 490, 586.
2668              
2669             =head2 TO DO (check_nonpunctendingfields)
2670              
2671             Add exceptions--abbreviations--or deal with them.
2672             Currently all fields ending in period are reported.
2673              
2674             =cut
2675              
2676             #set exceptions for abbreviation check;
2677             #these may be useful for 6xx check of punctuation as well
2678             my %abbexceptions = (
2679             'U.S.A.' => 1,
2680             'arr.' => 1,
2681             'etc.' => 1,
2682             'L. A.' => 1,
2683             'A.D.' => 1,
2684             'B.I.G.' => 1,
2685             'Co.' => 1,
2686             'D.C.' => 1,
2687             'E.R.' => 1,
2688             'I.Q.' => 1,
2689             'Inc.' => 1,
2690             'J.F.K.' => 1,
2691             'Jr.' => 1,
2692             'O.K.' => 1,
2693             'R.E.M.' => 1,
2694             'St.' => 1,
2695             'T.R.' => 1,
2696             'U.S.' => 1,
2697             'bk.' => 1,
2698             'cc.' => 1,
2699             'ed.' => 1,
2700             'ft.' => 1,
2701             'jr.' => 1,
2702             'mgmt.' => 1,
2703             );
2704              
2705             sub check_nonpunctendingfields {
2706              
2707             #get passed MARC::Record object
2708 1     1 1 1 my $record = shift;
2709             #declaration of return array
2710 1         3 my @warningstoreturn = ();
2711              
2712             # check only certain fields
2713 1         4 my @fieldstocheck = ('240', '246', '440', '490', '586');
2714              
2715            
2716 1         4 my @fields = $record->field(@fieldstocheck);
2717              
2718              
2719             #loop through set of fields to check in $record
2720 1         1052 foreach my $field (@fields) {
2721 6         18 my $tag = $field->tag();
2722 6 50       35 return \@warningstoreturn if $tag < 10;
2723             #look at last subfield (unless numeric?)
2724 6         13 my @subfields = $field->subfields();
2725 6         91 my @newsubfields = ();
2726              
2727             #break subfields into code-data array (so the entire field is in one array)
2728 6         17 while (my $subfield = pop(@subfields)) {
2729 6         10 my ($code, $data) = @$subfield;
2730             # skip numeric subfields (5) and other subfields (e.g. 240$o)
2731 6 50 66     38 next if (($code =~ /^\d$/) || ($tag eq '240' && $code =~ /o/));
      33        
2732              
2733             # invalid punctuation: /[\.]\'?\"?$/
2734             # so, periods should not usually be present, with some exceptions,
2735             #and, optionally, single and/or double quote
2736             #error prints first 10 and last 10 chars of subfield.
2737 6         10 my ($firstchars, $lastchars) = '';
2738 6 100       19 if (length($data) < 10) {
    50          
2739             #get full subfield if length < 10)
2740 1         3 $firstchars = $data;
2741             #get full subfield if length < 10)
2742 1         1 $lastchars = $data;
2743             } #if subfield length < 10
2744             elsif (length($data) >= 10) {
2745             #get first 10 chars of subfield
2746 5         18 $firstchars = substr($data,0,10);
2747             #get last 10 chars of subfield
2748 5         9 $lastchars = substr($data,-10,10);
2749             } #elsif subfield length >= 10
2750              
2751 6 100       26 if ($data =~ /[.]\'?\"?$/) {
2752             #get last words of subfield
2753 4         18 my @lastwords = split ' ', $data;
2754             #see if last word is a known exception
2755 4 100 100     36 unless ($abbexceptions{$lastwords[-1]} || ($lastwords[-1] =~ /(?:(?:\b|\W)[a-zA-Z]\.)$/)) {
2756              
2757 1         5 push @warningstoreturn, join '', ($tag, ": Check ending punctuation (not normally added for this field), ", $firstchars, " ___ ", $lastchars);
2758             }
2759             }
2760             # stop after first non-numeric
2761 6         19 last;
2762             } # while
2763             } # foreach field
2764              
2765              
2766 1         5 return \@warningstoreturn;
2767              
2768             } # check_nonpunctendingfields($record)
2769              
2770             #########################################
2771             #########################################
2772             #########################################
2773             #########################################
2774              
2775             =head2 check_fieldlength($record)
2776              
2777             Reports error if field is longer than 1870 bytes.
2778             (1879 is actual limit, but I wanted to leave some extra room in case of miscalculation.)
2779              
2780             This check relates to certain system limitations.
2781              
2782             Also reports records with more than 50 fields.
2783              
2784             =head2 TO DO (check_fieldlength($record))
2785              
2786             Use directory information in raw MARC to get the field lengths.
2787              
2788             =cut
2789              
2790             sub check_fieldlength {
2791              
2792             #get passed MARC::Record object
2793 0     0 1 0 my $record = shift;
2794             #declaration of return array
2795 0         0 my @warningstoreturn = ();
2796              
2797 0         0 my @fields = $record->fields();
2798             # push @warningstoreturn, join '', ("Record: Contains ", scalar @fields, " fields.") if (@fields > 50);
2799 0         0 foreach my $field (@fields) {
2800 0 0       0 if (length($field->as_string()) > 1870) {
2801 0         0 push @warningstoreturn, join '', ($field->tag(), ": Field is longer than 1870 bytes.");
2802             }
2803             } #foreach field
2804              
2805 0         0 return \@warningstoreturn;
2806              
2807             } # check_fieldlength
2808              
2809             #########################################
2810             #########################################
2811             #########################################
2812             #########################################
2813              
2814             =head2
2815              
2816             Add new subs with code below.
2817              
2818             =head2
2819              
2820             sub {
2821              
2822             #get passed MARC::Record object
2823              
2824             my $record = shift;
2825              
2826             #declaration of return array
2827              
2828             my @warningstoreturn = ();
2829              
2830             push @warningstoreturn, ("");
2831              
2832             return \@warningstoreturn;
2833              
2834             } #
2835              
2836             =cut
2837              
2838             #########################################
2839             #########################################
2840             #########################################
2841             #########################################
2842              
2843             ##########################################
2844             ##########################################
2845             ##########################################
2846             ##########################################
2847             ##########################################
2848             #### Validate 006 and 008 and related ####
2849             ##########################################
2850             ##########################################
2851             ##########################################
2852             ##########################################
2853             ##########################################
2854             ##########################################
2855              
2856             ##########################
2857             ##########################
2858             ##########################
2859              
2860             =head2 _validate006($field006)
2861              
2862             Internal sub that checks the validity of 006 bytes.
2863             Used by the check_006 method for 006 validation.
2864              
2865             =head2 DESCRIPTION
2866              
2867             Checks the validity of 006 bytes.
2868             Continuing resources/serials 006 may not work (not thoroughly tested, since 006 would usually be coded for serials, with 006 for other material types?).
2869              
2870             =head2 OTHER INFO
2871              
2872             Current version implements material specific validation through internal subs for each material type. Those internal subs allow for checking either 006 or 006 material specific bytes.
2873              
2874             =cut
2875              
2876             sub _validate006 {
2877              
2878             #populate subroutine $field006 variable with passed string
2879 52     52   57 my $field006 = shift;
2880              
2881             #declaration of return array
2882 52         59 my @warningstoreturn = ();
2883              
2884             #make sure passed 006 field is exactly 18 bytes
2885 52 50       84 if (length($field006) != 18) {push @warningstoreturn, ("006: Not 18 characters long. Bytes not validated ($field006).");}
  0         0  
2886              
2887             #return if 006 field of 18 bytes was not found
2888 52 50       88 return (\@warningstoreturn) if (@warningstoreturn);
2889              
2890             ######################################
2891             ### Material Specific Bytes, 01-17 ###
2892             ######################################
2893             ##### checked via internal subs ######
2894             ######################################
2895              
2896             #first byte will be either mattype (if not 's') or biblvl ('s' for continuing resources)
2897 52         70 my $mattype = substr($field006, 0, 1);
2898 52         53 my $biblvl = substr($field006, 0, 1);
2899 52         60 my $material_specific_bytes = substr($field006, 1, 17);
2900              
2901             ### Check continuing resources (serials) ###
2902 52 50       241 if ($biblvl =~ /^[s]$/) {
    100          
    100          
    100          
    100          
    100          
    50          
2903 0         0 my @warnings_returned = _check_cont_res_bytes($mattype, $biblvl, $material_specific_bytes);
2904 0 0       0 if (@warnings_returned) {
2905             #revise warning messages to report 006 rather than 008
2906 0         0 @warnings_returned = _reword_006(@warnings_returned);
2907 0         0 push @warningstoreturn, @warnings_returned;
2908             } #if bad bytes
2909             } #continuing resources (serials)
2910              
2911             #books
2912             elsif ($mattype =~ /^[at]$/) {
2913 11         19 my @warnings_returned = _check_book_bytes($mattype, $biblvl, $material_specific_bytes);
2914 11 50       25 if (@warnings_returned) {
2915             #revise warning messages to report 006 rather than 008
2916 11         17 @warnings_returned = _reword_006(@warnings_returned);
2917 11         27 push @warningstoreturn, @warnings_returned;
2918             } #if bad bytes
2919             } #books
2920              
2921             #electronic resources/computer files
2922             elsif ($mattype =~ /^[m]$/) {
2923 8         14 my @warnings_returned = _check_electronic_resources_bytes($mattype, $biblvl, $material_specific_bytes);
2924 8 50       19 if (@warnings_returned) {
2925             #revise warning messages to report 006 rather than 008
2926 8         15 @warnings_returned = _reword_006(@warnings_returned);
2927 8         16 push @warningstoreturn, @warnings_returned;
2928             } #if bad bytes
2929             } #electronic resources
2930            
2931             #cartographic materials/maps
2932             elsif ($mattype =~ /^[ef]$/) {
2933 11         17 my @warnings_returned = _check_cartographic_bytes($mattype, $biblvl, $material_specific_bytes);
2934 11 50       22 if (@warnings_returned) {
2935             #revise warning messages to report 006 rather than 008
2936 11         17 @warnings_returned = _reword_006(@warnings_returned);
2937 11         16 push @warningstoreturn, @warnings_returned;
2938             } #if bad bytes
2939             } #cartographic
2940            
2941             #music and sound recordings
2942             elsif ($mattype =~ /^[cdij]$/) {
2943 10         16 my @warnings_returned = _check_music_bytes($mattype, $biblvl, $material_specific_bytes);
2944 10 50       19 if (@warnings_returned) {
2945             #revise warning messages to report 006 rather than 008
2946 10         18 @warnings_returned = _reword_006(@warnings_returned);
2947 10         21 push @warningstoreturn, @warnings_returned;
2948             } #if bad bytes
2949             } #music/sound recordings
2950              
2951             #visual materials
2952             elsif ($mattype =~ /^[gkor]$/) {
2953 9         16 my @warnings_returned = _check_visual_material_bytes($mattype, $biblvl, $material_specific_bytes);
2954 9 50       21 if (@warnings_returned) {
2955             #revise warning messages to report 006 rather than 008
2956 9         14 @warnings_returned = _reword_006(@warnings_returned);
2957 9         16 push @warningstoreturn, @warnings_returned;
2958             } #if bad bytes
2959             } #visual materials
2960              
2961             #mixed materials
2962             elsif ($mattype =~ /^[p]$/) {
2963 3         6 my @warnings_returned = _check_mixed_material_bytes($mattype, $biblvl, $material_specific_bytes);
2964 3 50       8 if (@warnings_returned) {
2965             #revise warning messages to report 006 rather than 008
2966 3         7 @warnings_returned = _reword_006(@warnings_returned);
2967 3         5 push @warningstoreturn, @warnings_returned;
2968             } #if bad bytes
2969             } #mixed materials
2970              
2971 52         150 return (\@warningstoreturn);
2972              
2973             } #_validate006
2974              
2975              
2976              
2977             ##########################
2978             ##########################
2979             ##########################
2980              
2981             =head2 NAME
2982              
2983             parse008date($field008string)
2984              
2985             =head2 DESCRIPTION
2986              
2987              
2988             Subroutine parse008date returns four-digit year, two-digit month, and two-digit day.
2989             It requres an 008 string at least 6 bytes long.
2990             Also checks of current year, month, day vs. 008 creation date, reporting an error if creation date appears to be later than local time. Assumes 008 dates of 00mmdd to 70mmdd represent post-2000 dates.
2991              
2992             Relies upon internal _get_current_date().
2993              
2994             =head2 SYNOPSIS
2995              
2996             my ($earlyyear, $earlymonth, $earlyday);
2997             print ("What is the earliest create date desired (008 date, in yymmdd)? ");
2998             while (my $earlydate = <>) {
2999             chomp $earlydate;
3000             my $field008 = $earlydate;
3001             my $yyyymmdderr = MARC::Errorchecks::parse008date($field008);
3002             my @parsed008date = split "\t", $yyyymmdderr;
3003             $earlyyear = shift @parsed008date;
3004             $earlymonth = shift @parsed008date;
3005             $earlyday = shift @parsed008date;
3006             my $errors = join "\t", @parsed008date;
3007             if ($errors) {
3008             if ($errors =~ /is too short/) {
3009             print "Please enter a longer date, $errors\nEnter date (yymmdd): ";
3010             }
3011             else {print "$errors\nEnter valid date (yymmdd): ";}
3012             } #if errors
3013             else {last;}
3014             }
3015              
3016             =head2 TODO parse008date
3017              
3018             Remove local practice or revise for easier updating/customization.
3019              
3020             =cut
3021              
3022             sub parse008date {
3023              
3024 70     70 1 96 my $field008 = shift;
3025 70 50       159 if (length ($field008) < 6) { return "\t\t\t$field008 is too short";}
  0         0  
3026              
3027             #get current yyyymmdd
3028 70         134 my $current_date = MARC::Errorchecks::_get_current_date();
3029             #get current year
3030 70         129 my $current_year = substr($current_date, 0, 4);
3031              
3032              
3033 70         98 my $hasbadchars = "";
3034 70         102 my $dateentered = substr($field008,0,6);
3035 70 100       465 if ($dateentered =~ /^[0-9]+$/) {
3036 69         102 my $yearentered = substr($dateentered, 0, 2);
3037             #validate year portion--change dates to reflect local implementation of code
3038             #(and for future use--after 2070)
3039             #year created less than or equal to 70 considered 20xx
3040              
3041 69 100 33     161 if ($yearentered <= 70) {$yearentered += 2000;}
  68 50       93  
  1         3  
3042             #year created between 71 and 99 considered 19xx
3043             elsif ((71 <= $yearentered) && ($yearentered <= 99)) {$yearentered += 1900;}
3044              
3045             #complain if year is after current year
3046 69 100       227 if ($yearentered > $current_year) {
    100          
3047 1         5 $hasbadchars .= "Year entered ($yearentered) is after current year ($current_year)\t";
3048             } #if creation year is greater than current year
3049              
3050             #complain if creation year is before 1980
3051             ###This is a local practice check. Customize according to local needs. ###
3052             elsif ($yearentered < 1980) {
3053 1         6 $hasbadchars .= "Year entered ($yearentered) is before 1980\t";
3054             } #if date is less than or equal to 1980
3055             #validate month portion
3056 69         122 my $monthentered = substr($dateentered, 2, 2);
3057 69 100 66     328 if (($monthentered < 1) || ($monthentered > 12)) {$hasbadchars .= "Month entered is greater than 12 or is 00\t";}
  2         12  
3058              
3059             #validate day portion
3060 69         109 my $dayentered = substr($dateentered, 4, 2);
3061              
3062 69 100 66     1021 if (($monthentered =~ /^01$|^03$|^05$|^07$|^08$|^10$|^12$/) && (($dayentered < 1) || ($dayentered > 31))) {$hasbadchars .= "Day entered is greater than 31 or is 00\t";}
  1 50 66     3  
  0 50 0     0  
    100 33        
      0        
      33        
      66        
3063 0         0 elsif (($monthentered =~ /^04$|^06$|^09$|^11$/) && (($dayentered < 1) || ($dayentered > 30))) {$hasbadchars .= "Day entered is greater than 30 or is 00\t";}
3064             elsif (($monthentered =~ /^02$/) && (($dayentered < 1) || ($dayentered > 29))) {$hasbadchars .= "Day entered is greater than 29 or is 00\t";}
3065             elsif (($dayentered < 1) || ($dayentered > 31)) {
3066 1         4 $hasbadchars .= "Day entered is greater than 31 or is 00\t";
3067             } #elsif day is 0 or greater than 31 and month is not normal
3068              
3069 69         147 my $full_date_entered = join "", ($yearentered, $monthentered, $dayentered);
3070 69 100       166 if ($full_date_entered > $current_date) {
3071 1         5 $hasbadchars .= "Date entered ($dateentered) may be later than current date ($current_date)\t";
3072             } #date entered > current date
3073              
3074 69         269 return (join "\t", $yearentered, $monthentered, $dayentered, $hasbadchars)
3075              
3076             } #if date entered has only digits
3077              
3078             else {
3079 1         5 return "\t\t\tRecord creation date ($dateentered) has non-numeric characters";
3080             } #else creation date has non-digits
3081              
3082             #should never reach this point but just in case
3083 0         0 $hasbadchars .= 'Something is coded wrong in parse008date.';
3084 0         0 return "\t\t\t$hasbadchars";
3085              
3086             } #parse008date
3087              
3088             ##########################
3089             ##########################
3090             ##########################
3091              
3092             =head2 validate008 reworked
3093              
3094             Reworking of the validate008 sub.
3095             Revised to work more like other Errorchecks and Lintadditions checks.
3096             Returns array ref of errors.
3097             Previous version returned hash ref of 008 byte key-value pairs,
3098             array ref of cleaned bytes, and scalar ref of errors.
3099             New version returns only an array ref of errors.
3100              
3101             =head2 validate008 ($field008, $mattype, $biblvl)
3102              
3103             Checks the validity of 008 bytes.
3104             Used by the check_008 method for 008 validation.
3105              
3106             =head2 DESCRIPTION
3107              
3108             Checks the validity of 008 bytes.
3109             Depends upon 008 being based upon LDR/06,
3110             so continuing resources/serials records may not work.
3111             Checks LDR/07 for 's' for serials before checking material specific bytes.
3112              
3113             =head2 OTHER INFO
3114              
3115             Character positions 00-17 and 35-39 are defined the same across all types of material, with special consideration for position 06.
3116              
3117             Current version implements material specific validation through internal subs for each material type. Those internal subs allow for checking either 006 or 008 material specific bytes.
3118              
3119              
3120             =head2 Synopsis
3121              
3122             use MARC::Record;
3123             use MARC::Errorchecks;
3124              
3125             #$mattype and $biblvl are from LDR/06 and LDR/07
3126             #my $mattype = substr($leader, 6, 1);
3127             #my $biblvl = substr($leader, 7, 1);
3128             #my $field008 = $record->field('008')->as_string();
3129             my $field008 = '000101s20002000nyu eng d';
3130             my @warningsfrom008 = @{MARC::Errorchecks::validate008($field008, $mattype, $biblvl)};
3131              
3132             print join "\t", @warningsfrom008, "\n";
3133              
3134             =head2 TO DO (validate008)
3135              
3136             Add requirement that 40 char string needs to be passed in.
3137             Add error checking for less than 40 char string.
3138             --Partially done--Less than 40 characters leads to error.
3139             Verify datetypes that allow multiple dates.
3140              
3141             Verify continuing resource checking (not thoroughly tested).
3142              
3143             Determine proper values for date type 'e'.
3144              
3145              
3146             =head2 SKIP CODE for SERIALS
3147              
3148             ### This is not here for any particular reason,
3149             ### I just wanted to save it for future use if I needed it.
3150             #stop checking if record is not coded 'm', monograph
3151             unless ($biblvl eq 'm') {
3152             push @warningstoreturn, ("LDR: Record coded $biblvl, not monograph. Further parsing of 008 will not be done for this record.");
3153             return (\@warningstoreturn);
3154             } #unless bib level is 'm'
3155              
3156              
3157              
3158              
3159             =head2 TEST CODE
3160              
3161             #test code
3162             use MARC::Errorchecks;
3163             use MARC::Record;
3164             my $leader = '00050nam';
3165             my $field008 = '000101s20002000nyu eng d';
3166             my $mattype = substr($leader, 6, 1);
3167             my $biblvl = substr($leader, 7, 1);
3168              
3169             print "$field008\n";
3170             my @warningsfrom008 = @{validate008($field008, $mattype, $biblvl)};
3171              
3172             print join "\t", @warningsfrom008, "\n";
3173              
3174             =cut
3175              
3176             #####################################
3177              
3178              
3179             ##########################################
3180             ######### Start validate008 sub ##########
3181             ##########################################
3182              
3183             sub validate008 {
3184              
3185             #populate subroutine $field008 variable with passed string
3186 72     72 1 69918 my $field008 = shift;
3187             #populate subroutine $mattype and $biblvl with passed strings
3188 72         122 my $mattype = shift;
3189 72         96 my $biblvl = shift;
3190              
3191             #declaration of return array
3192 72         96 my @warningstoreturn = ();
3193              
3194             #setup country and language code validation hashes
3195             #from the MARC::Lint::CodeData module
3196 5     5   6271 use MARC::Lint::CodeData qw(%LanguageCodes %ObsoleteLanguageCodes %CountryCodes %ObsoleteCountryCodes);
  5         57322  
  5         55452  
3197              
3198             #make sure passed 008 field is exactly 40 bytes
3199 72 100       198 if (length($field008) != 40) {push @warningstoreturn, ("008: Not 40 characters long. Bytes not validated ($field008).");}
  2         7  
3200              
3201             #return if 008 field of 40 bytes was not found
3202 72 100       162 return (\@warningstoreturn) if (@warningstoreturn);
3203              
3204             #get the values of the all-format positions
3205 70         584 my %field008hash = (
3206             dateentered => substr($field008,0,6),
3207             datetype => substr($field008,6,1),
3208             date1 => substr($field008,7,4),
3209             date2 => substr($field008,11,4),
3210             pubctry => substr($field008,15,3),
3211             ### format specific 18-34 ###
3212             langcode => substr($field008,35,3),
3213             modrec => substr($field008,38,1),
3214             catsource => substr($field008,39,1)
3215             );
3216              
3217             #validate the all-format bytes
3218              
3219             # Date entered on file (byte[0]-[5])
3220             #6 digits, yymmdd
3221             #parse created date
3222             #call parse008date to do work of date error checking
3223 70         172 my $yyyymmdderr = MARC::Errorchecks::parse008date($field008hash{dateentered});
3224 70         278 my @parsed008date = split "\t", $yyyymmdderr;
3225 70         119 my $yearentered = shift @parsed008date;
3226 70         98 my $monthentered = shift @parsed008date;
3227 70         104 my $dayentered = shift @parsed008date;
3228 70         110 my $dateerrors = join "\t", @parsed008date;
3229              
3230             #unless date entered is only 6 digits and no errors were found, report the errors
3231 70 100 100     430 unless (($field008hash{dateentered} =~ /^\d{6}$/) && $dateerrors !~ /entered/) {
3232 5         25 push @warningstoreturn, ("008: Bytes 0-5, Date entered has bad characters. $dateerrors.");
3233             } #unless date entered is 6 digits and no errors were found
3234              
3235             #Type of date/Publication status (byte[6])
3236             #my $datetype = substr($field008,6,1);
3237 70 100       228 unless ($field008hash{datetype} =~ /^[bcdeikmnpqrstu|]$/) {
3238 1         5 push @warningstoreturn, (join "", "008: Byte 6, Date type ($field008hash{datetype}) has bad characters.");
3239             } #unless date type is valid code
3240              
3241             ###### Remove the following ###########
3242             ### Remnant of writing of code ####
3243              
3244             #b - No dates given; B.C. date involved
3245             #c - Continuing resource currently published
3246             #d - Continuing resource ceased publication
3247             #e - Detailed date
3248             #i - Inclusive dates of collection
3249             #k - Range of years of bulk of collection
3250             #m - Multiple dates
3251             #n - Dates unknown
3252             #p - Date of distribution/release/issue and production/recording session when different
3253             #q - Questionable date
3254             #r - Reprint/reissue date and original date
3255             #s - Single known date/probable date
3256             #t - Publication date and copyright date
3257             #u - Continuing resource status unknown
3258             #| - No attempt to code
3259             #########################################
3260              
3261              
3262             #Date 1 (byte[7]-[10])
3263 70 50 66     245 unless (($field008hash{date1} =~ /^[u\d|]{4}$/) || (($field008hash{date1} =~ /^\s{4}$/) && ($field008hash{datetype} =~ /^b$/)))
  2   66     7  
3264             {push @warningstoreturn, ("008: Bytes 7-10, Date1 has bad characters ($field008hash{date1}).")};
3265              
3266             ###on date2, verify datetypes that are allowed to have only one date
3267             # Date 2 (byte[11]-[14])
3268             #check datetype for single date
3269 70 100       206 if ($field008hash{datetype} =~ /^[bqs]$/) {
    100          
3270             #if single, need to have four spaces as date2
3271 65 100       204 unless ($field008hash{date2} =~ /^\s{4}$/) {
3272 2         13 push @warningstoreturn, ("008: Bytes 11-14, Date2 ($field008hash{date2}) should be blank for this date type ($field008hash{datetype}).")
3273             } #unless date2 has 4 blanks for types b, q, s
3274             } #if date type is b, q, or s
3275             #may need elsif for 4 blank spaces with other datetypes or other elsifs for different datetypes (e.g. detailed date, 'e')
3276             elsif ($field008hash{date2} !~ /^[u\d|]{4}$/) {
3277 4         17 push @warningstoreturn, ("008: Bytes 11-14, Date2 ($field008hash{date2}) has bad characters or is blank which is not consistent with this date type ($field008hash{datetype}).")}
3278              
3279              
3280             # Place of publication, production, or execution (byte[15]-[17])
3281             #my $pubctry = substr($field008,15,3);
3282             ###Get codes from MARC Country Codes list
3283              
3284             #see if country code matches valid code
3285 70 100       231 my $validctrycode = 1 if $CountryCodes{$field008hash{pubctry}};
3286             #look for obsolete code match if valid code was not matched
3287 70 50       178 my $obsoletectrycode = 1 if $ObsoleteCountryCodes{$field008hash{pubctry}};
3288              
3289 70 100       127 unless ($validctrycode) {
3290             #code did not match valid code, so see if it may have been valid before
3291 2 50       7 if ($obsoletectrycode) {
3292 0         0 push @warningstoreturn, ("008: Bytes 15-17, Country of Publication ($field008hash{pubctry}) may be obsolete.");
3293             }
3294             else {
3295 2         8 push @warningstoreturn, ("008: Bytes 15-17, Country of Publication ($field008hash{pubctry}) is not valid.")
3296             }
3297             } #unless valid country code was found
3298            
3299             #######################################################
3300             #### byte[18]-[34] are format specific (see below) ####
3301             ######################################################
3302              
3303             # Language (byte[35]-[37])
3304              
3305             #%LanguageCodes %ObsoleteLanguageCodes
3306 70 100       216 my $validlang = 1 if (exists $LanguageCodes{$field008hash{langcode}});
3307             #look for invalid code match if valid code was not matched
3308 70 50       636 my $obsoletelang = 1 if (exists $ObsoleteLanguageCodes{$field008hash{langcode}});
3309              
3310             # skip valid subfields
3311 70 100       132 unless ($validlang) {
3312             #report invalid matches as possible obsolete codes
3313 2 50       6 if ($obsoletelang) {
3314 0         0 push @warningstoreturn, ("008: Bytes 35-37, Language ($field008hash{langcode}) may be obsolete.");
3315             } #if obsolete
3316             else {
3317 2         16 push @warningstoreturn, ("008: Bytes 35-37, Language ($field008hash{langcode}) not valid.");
3318             } #else code not found
3319             } # unless found valid code
3320              
3321             #report new 'zxx' code when ' ' (3-blanks) is existing code
3322 70 100       156 if ($field008hash{langcode} eq ' ') {
3323 1         6 push @warningstoreturn, ("008: Bytes 35-37, Language ($field008hash{langcode}) must now be coded 'zxx' for No linguistic content.");
3324             } #if 008/35-37 is 3-blanks
3325             ##################################################
3326              
3327             # Modified record (byte[38])
3328             #my $modrec = substr($field008,38,1);
3329 70 100       233 unless ($field008hash{modrec} =~ /^[dorsx|\s]$/) {
3330 1         4 push @warningstoreturn, ("008: Byte 38, Modified record has bad characters ($field008hash{modrec}).");
3331             } #unless modrec has valid characters
3332              
3333             # Cataloging source (byte[39])
3334             #my $catsource = substr($field008,39,1);
3335 70 100       190 unless ($field008hash{catsource} =~ /^[cdu|\s]$/) {
3336 2         10 push @warningstoreturn, ("008: Byte 39, Cataloging source has bad characters ($field008hash{catsource}).");
3337             } #unless Cataloging source is valid
3338              
3339             ######################################
3340             ### Material Specific Bytes, 18-34 ###
3341             ######################################
3342             ##### checked via internal subs ######
3343             ######################################
3344              
3345 70         125 my $material_specific_bytes = substr($field008,18, 17);
3346              
3347              
3348             ### Check continuing resources (serials) ###
3349 70 50       498 if ($biblvl =~ /^[s]$/) {
    100          
    100          
    100          
    100          
    100          
    50          
3350 0         0 my @warnings_returned = _check_cont_res_bytes($mattype, $biblvl, $material_specific_bytes);
3351 0 0       0 if (@warnings_returned) {
3352             #revise warning messages to report 008 rather than 006
3353 0         0 @warnings_returned = _reword_008(@warnings_returned);
3354 0         0 push @warningstoreturn, @warnings_returned;
3355             } #if bad bytes
3356             } #continuing resources (serials)
3357              
3358             #books
3359             elsif ($mattype =~ /^[at]$/) {
3360 29         64 my @warnings_returned = _check_book_bytes($mattype, $biblvl, $material_specific_bytes);
3361 29 100       76 if (@warnings_returned) {
3362             #revise warning messages to report 008 rather than 006
3363 12         31 @warnings_returned = _reword_008(@warnings_returned);
3364 12         28 push @warningstoreturn, @warnings_returned;
3365             } #if bad bytes
3366             } #books
3367              
3368             #electronic resources/computer files
3369             elsif ($mattype =~ /^[m]$/) {
3370 8         24 my @warnings_returned = _check_electronic_resources_bytes($mattype, $biblvl, $material_specific_bytes);
3371 8 50       19 if (@warnings_returned) {
3372             #revise warning messages to report 008 rather than 006
3373 8         19 @warnings_returned = _reword_008(@warnings_returned);
3374 8         20 push @warningstoreturn, @warnings_returned;
3375             } #if bad bytes
3376             } #electronic resources
3377            
3378             #cartographic materials/maps
3379             elsif ($mattype =~ /^[ef]$/) {
3380 11         28 my @warnings_returned = _check_cartographic_bytes($mattype, $biblvl, $material_specific_bytes);
3381 11 50       29 if (@warnings_returned) {
3382             #revise warning messages to report 008 rather than 006
3383 11         23 @warnings_returned = _reword_008(@warnings_returned);
3384 11         23 push @warningstoreturn, @warnings_returned;
3385             } #if bad bytes
3386             } #cartographic
3387            
3388             #music and sound recordings
3389             elsif ($mattype =~ /^[cdij]$/) {
3390 10         23 my @warnings_returned = _check_music_bytes($mattype, $biblvl, $material_specific_bytes);
3391 10 50       25 if (@warnings_returned) {
3392             #revise warning messages to report 008 rather than 006
3393 10         34 @warnings_returned = _reword_008(@warnings_returned);
3394 10         22 push @warningstoreturn, @warnings_returned;
3395             } #if bad bytes
3396             } #music/sound recordings
3397              
3398             #visual materials
3399             elsif ($mattype =~ /^[gkor]$/) {
3400 9         21 my @warnings_returned = _check_visual_material_bytes($mattype, $biblvl, $material_specific_bytes);
3401 9 50       26 if (@warnings_returned) {
3402             #revise warning messages to report 008 rather than 006
3403 9         20 @warnings_returned = _reword_008(@warnings_returned);
3404 9         19 push @warningstoreturn, @warnings_returned;
3405             } #if bad bytes
3406             } #visual materials
3407              
3408             #mixed materials
3409             elsif ($mattype =~ /^[p]$/) {
3410 3         9 my @warnings_returned = _check_mixed_material_bytes($mattype, $biblvl, $material_specific_bytes);
3411 3 50       9 if (@warnings_returned) {
3412             #revise warning messages to report 008 rather than 006
3413 3         11 @warnings_returned = _reword_008(@warnings_returned);
3414 3         7 push @warningstoreturn, @warnings_returned;
3415             } #if bad bytes
3416             } #mixed materials
3417              
3418              
3419 70         840 return (\@warningstoreturn);
3420              
3421             } #validate008
3422            
3423             =head2 _check_cont_res_bytes($mattype, $biblvl, $bytes)
3424              
3425             Internal sub to check 008 bytes 18-34 or 006 bytes 01-17 for Continuing Resources.
3426              
3427             Receives material type, bibliographic level, and a 17-byte string to be validated. The bytes should be bytes 18-34 of the 008, or bytes 01-17 of the 006.
3428              
3429             =cut
3430              
3431             sub _check_cont_res_bytes {
3432              
3433             ########################################
3434             ########################################
3435             ########################################
3436             ## Continuing Resources bytes 18-34 ##
3437             ########################################
3438             ########################################
3439             ########################################
3440              
3441 0     0   0 my $mattype = shift;
3442 0         0 my $biblvl = shift;
3443 0         0 my $material_specific_bytes = shift;
3444              
3445 0         0 my %bytehash = ();
3446 0         0 my @warningstoreturn = ();
3447              
3448             ### Check continuing resources (serials) ###
3449 0 0       0 if ($biblvl =~ /^[s]$/) {
3450              
3451             # Frequency (byte[18/1])
3452 0         0 $bytehash{frequency} = substr($material_specific_bytes, 0, 1);
3453 0 0       0 unless ($bytehash{frequency} =~ /^[abcdefghijkmqstuwz|\s]$/) {
3454 0         0 push @warningstoreturn, ("008: Byte 18 (006/01), Continuing resources-Frequency has bad characters ($bytehash{frequency}).");
3455             } #Continuing resources 18
3456              
3457             # Regularity (byte[19/2])
3458 0         0 $bytehash{regularity} = substr($material_specific_bytes, 1, 1);
3459 0 0       0 unless ($bytehash{regularity} =~ /^[nrux|]$/) {
3460 0         0 push @warningstoreturn, ("008: Byte 19 (006/02), Continuing resources-Regularity has bad characters ($bytehash{regularity}).");
3461             } #Continuing resources 19
3462              
3463             #Undefined (was ISSN Center) (byte[20/3])
3464 0         0 $bytehash{contresundef20} = substr($material_specific_bytes, 2, 1);
3465 0 0       0 unless ($bytehash{contresundef20} =~ /^[|\s]$/) {
3466 0         0 push @warningstoreturn, ("008: Byte 20 (006/03), Continuing resources-Undef20 has bad characters ($bytehash{contresundef20}).")
3467             } #Continuing resources 20
3468              
3469             #Type of continuing resource (byte[21/4])
3470 0         0 $bytehash{typeofcontres} = substr($material_specific_bytes, 3, 1);
3471 0 0       0 unless ($bytehash{typeofcontres} =~ /^[dlmnpw|\s]$/) {
3472 0         0 push @warningstoreturn, ("008: Byte 21 (006/04), Continuing resources-Type of continuing resource has bad characters ($bytehash{typeofcontres}).");
3473             } #Continuing resources 21
3474              
3475             #Form of original item (byte[22/5])
3476 0         0 $bytehash{formoforig} = substr($material_specific_bytes, 4, 1);
3477 0 0       0 unless ($bytehash{formoforig} =~ /^[abcdefoqs\s]$/) {
3478 0         0 push @warningstoreturn, ("008: Byte 22 (006/05), Continuing resources-Form of original has bad characters ($bytehash{formoforig}).");
3479             } #Continuing resources 22
3480              
3481             #Form of item (byte[23/6])
3482 0         0 $bytehash{formofitem} = substr($material_specific_bytes, 5, 1);
3483 0 0       0 unless ($bytehash{formofitem} =~ /^[abcdfoqrs|\s]$/) {
3484 0         0 push @warningstoreturn, ("008: Byte 23 (006/06), Continuing resources-Form of item has bad characters ($bytehash{formofitem}).");
3485             } #Continuing resources 23
3486              
3487             #Nature of entire work (byte[24/7])
3488 0         0 $bytehash{natureofwk} = substr($material_specific_bytes, 6, 1);
3489 0 0       0 unless ($bytehash{natureofwk} =~ /^[abcdefghiklmnopqrstuvwyz56|\s]$/) {
3490 0         0 push @warningstoreturn, ("008: Byte 24 (006/07), Continuing resources-Nature of work has bad characters ($bytehash{natureofwk}).");
3491             } #Continuing resources 24
3492              
3493             #Nature of contents (byte[25/8]-[27/10])
3494 0         0 $bytehash{contrescontents} = substr($material_specific_bytes, 7, 3);
3495 0 0       0 unless ($bytehash{contrescontents} =~ /^[abcdefghiklmnopqrstuvwyz56|\s]{3}$/) {
3496 0         0 push @warningstoreturn, ("008: Bytes 25-27 (006/08-10), Continuing resources-Contents has bad characters ($bytehash{contrescontents}).");
3497             } #Continuing resources 25-27
3498              
3499             #Government publication (byte[28/11])
3500 0         0 $bytehash{govtpub} = substr($material_specific_bytes, 10, 1);
3501 0 0       0 unless ($bytehash{govtpub} =~ /^[acfilmosuz|\s]$/) {
3502 0         0 push @warningstoreturn, ("008: Byte 28 (006/11), Continuing resources-Govt publication has bad characters ($bytehash{govtpub}).");
3503             } #Continuing resources 28
3504              
3505             #Conference publication (byte[29/12])
3506 0         0 $bytehash{confpub} = substr($material_specific_bytes, 11, 1);
3507 0 0       0 unless ($bytehash{confpub} =~ /^[01|]$/) {
3508 0         0 push @warningstoreturn, ("008: Byte 29 (006/12), Continuing resources-Conference publication has bad characters ($bytehash{confpub}).");
3509             } #Continuing resources 29
3510              
3511             #Undefined (byte[30/13]-[32/15])
3512 0         0 $bytehash{contresundef30to32} = substr($material_specific_bytes, 12, 3);
3513 0 0       0 unless ($bytehash{contresundef30to32} =~ /^[|\s]{3}$/) {
3514 0         0 push @warningstoreturn, ("008: Bytes 30-32 (006/13-15), Continuing resources-Undef30to32 has bad characters ($bytehash{contresundef30to32}).");
3515             } #Continuing resources 30-32
3516              
3517             #Original alphabet or script of title (byte[33/16])
3518 0         0 $bytehash{origalphabet} = substr($material_specific_bytes, 13, 1);
3519 0 0       0 unless ($bytehash{origalphabet} =~ /^[abcdefghijkluz|\s]$/) {
3520 0         0 push @warningstoreturn, ("008: Byte 33 (006/16), Continuing resources-Original alphabet has bad characters ($bytehash{origalphabet}).");
3521             } #Continuing resources 33
3522              
3523             #Entry convention (byte[34/17])
3524 0         0 $bytehash{entryconvention} = substr($material_specific_bytes, 16, 1);
3525 0 0       0 unless ($bytehash{entryconvention} =~ /^[012|]$/) {
3526 0         0 push @warningstoreturn, ("008: Byte 34 (006/17), Continuing resources-Entry convention has bad characters ($bytehash{entryconvention}).");
3527             } #Continuing resources 34
3528              
3529             } # Continuing Resources (biblvl 's')
3530            
3531 0         0 return @warningstoreturn;
3532              
3533             } # _check_cont_res_bytes
3534              
3535             =head2 _check_book_bytes($mattype, $biblvl, $bytes)
3536              
3537             Internal sub to check 008 bytes 18-34 or 006 bytes 01-17 for Books.
3538              
3539             Receives material type, bibliographic level, and a 17-byte string to be validated. The bytes should be bytes 18-34 of the 008, or bytes 01-17 of the 006.
3540              
3541             =cut
3542              
3543             sub _check_book_bytes {
3544              
3545 40     40   53 my $mattype = shift;
3546 40         60 my $biblvl = shift;
3547 40         42 my $material_specific_bytes = shift;
3548              
3549 40         62 my %bytehash = ();
3550 40         47 my @warningstoreturn = ();
3551              
3552             ########################################
3553             ########################################
3554             ########################################
3555             ########### Books bytes 18-34 ##########
3556             ########################################
3557             ########################################
3558             ########################################
3559              
3560              
3561 40 50       115 if ($mattype =~ /^[at]$/) {
3562              
3563             # Illustrations (byte [18/1]-[21/4])
3564 40         93 $bytehash{illustrations} = substr($material_specific_bytes, 0, 4);
3565 40 100       129 unless ($bytehash{illustrations} =~ /^[abcdefghijklmop|\s]{4}$/) {
3566 2         16 push @warningstoreturn, ("008: Bytes 18-21 (006/01-04), Books-Illustrations has bad characters ($bytehash{illustrations}).");
3567             } #Books-18-21
3568              
3569             # Target audience (byte 22/5)
3570 40         88 $bytehash{audience} = substr($material_specific_bytes, 4, 1);
3571 40 100       129 unless ($bytehash{audience} =~ /^[abcdefgj|\s]$/) {
3572 2         8 push @warningstoreturn, ("008: Byte 22 (006/05), Books-Audience has bad characters ($bytehash{audience}).")
3573             } #Books 22
3574              
3575             # Form of item (byte 23/6)
3576 40         91 $bytehash{formofitem} = substr($material_specific_bytes, 5, 1);
3577 40 100       120 unless ($bytehash{formofitem} =~ /^[abcdfoqrs|\s]$/) {
3578 2         8 push @warningstoreturn, ("008: Byte 23 (006/06), Books-Form of item has bad characters ($bytehash{formofitem}).")
3579             } #Books 23
3580              
3581             # Nature of contents (byte[24/7]-[27/10])
3582 40         86 $bytehash{bkcontents} = substr($material_specific_bytes, 6, 4);
3583 40 100       123 unless ($bytehash{bkcontents} =~ /^[abcdefgijklmnopqrstuvwyz256|\s]{4}$/) {
3584 2         8 push @warningstoreturn, ("008: Bytes 24-27 (006/07-10), Books-Contents has bad characters ($bytehash{bkcontents}).")
3585             } #Books 24-27
3586              
3587             #Government publication (byte 28/11)
3588 40         79 $bytehash{govtpub} = substr($material_specific_bytes, 10, 1);
3589 40 100       115 unless ($bytehash{govtpub} =~ /^[acfilmosuz|\s]$/) {
3590 2         8 push @warningstoreturn, ("008: Byte 28 (006/11), Books-Govt publication has bad characters ($bytehash{govtpub}).")
3591             } #Books 28
3592              
3593             #Conference publication (byte 29/12)
3594 40         71 $bytehash{confpub} = substr($material_specific_bytes, 11, 1);
3595 40 100       113 unless ($bytehash{confpub} =~ /^[01|]$/) {
3596 3         12 push @warningstoreturn, ("008: Byte 29 (006/12), Books-Conference publication has bad characters ($bytehash{confpub}).")
3597             } #Books 29
3598              
3599             #Festschrift (byte 30/13)
3600 40         70 $bytehash{fest} = substr($material_specific_bytes, 12, 1);
3601 40 100       106 unless ($bytehash{fest} =~ /^[01|]$/) {
3602 2         8 push @warningstoreturn, ("008: Byte 30 (006/13), Books-Festschrift has bad characters ($bytehash{fest}).")
3603             } #Books 30
3604              
3605             #Index (byte 31/14)
3606 40         74 $bytehash{bkindex} = substr($material_specific_bytes, 13, 1);
3607 40 100       112 unless ($bytehash{bkindex} =~ /^[01|]$/) {
3608 2         9 push @warningstoreturn, ("008: Byte 31 (006/14), Books-Index has bad characters ($bytehash{bkindex}).");
3609             } #Books 31
3610              
3611             #Undefined (byte 32/15)
3612 40         104 $bytehash{obsoletebyte32} = substr($material_specific_bytes, 14, 1);
3613 40 100       121 unless ($bytehash{obsoletebyte32} =~ /^[|\s]$/) {
3614 2         7 push @warningstoreturn, ("008: Byte 32 (006/15), Books-Obsoletebyte32 has bad characters ($bytehash{obsoletebyte32}).");
3615             } #Books 32
3616              
3617             #Literary form (byte 33/16)
3618 40         71 $bytehash{fict} = substr($material_specific_bytes, 15, 1);
3619 40 100       115 unless ($bytehash{fict} =~ /^[01defhijmpsu|\s]$/) {
3620 2 50       7 if ($bytehash{fict} eq 'c') {
3621 0         0 push @warningstoreturn, ("008: Byte 33 (006/16), Books-Literary form code 'c' is now covered by 008/24-27 (006/07-10; Nature of contents) value '6'.");
3622             } #if comic
3623             else {
3624 2         8 push @warningstoreturn, ("008: Byte 33 (006/16), Books-Literary form has bad characters ($bytehash{fict}).");
3625             } #else non-comic
3626             } #Books 33
3627              
3628             #Biography (byte 34/17)
3629 40         94 $bytehash{biog} = substr($material_specific_bytes, 16, 1);
3630 40 100       116 unless ($bytehash{biog} =~ /^[abcd|\s]$/) {
3631 2         7 push @warningstoreturn, ("008: Byte 34 (006/17), Books-Biography has bad characters ($bytehash{biog}).");
3632             } #Books 34
3633              
3634             } ### if Books, mattype 'a' or 't'
3635              
3636 40         211 return @warningstoreturn;
3637            
3638             } # _check_book_bytes
3639              
3640             =head2 _check_electronic_resources_bytes($mattype, $biblvl, $bytes)
3641              
3642             Internal sub to check 008 bytes 18-34 or 006 bytes 01-17 for Electronic Resources.
3643              
3644             Receives material type, bibliographic level, and a 17-byte string to be validated. The bytes should be bytes 18-34 of the 008, or bytes 01-17 of the 006.
3645              
3646             =cut
3647              
3648             sub _check_electronic_resources_bytes {
3649              
3650 16     16   21 my $mattype = shift;
3651 16         21 my $biblvl = shift;
3652 16         20 my $material_specific_bytes = shift;
3653              
3654 16         23 my %bytehash = ();
3655 16         23 my @warningstoreturn = ();
3656              
3657             ########################################
3658             ########################################
3659             ########################################
3660             ### Electronic Resources bytes 18-34 ###
3661             ########################################
3662             ########################################
3663             ########################################
3664              
3665             #electronic resources/computer files
3666 16 50       62 if ($mattype =~ /^[m]$/) {
3667              
3668             #Undefined (byte 18-21/1-4)
3669 16         41 $bytehash{electresundef18to21} = substr($material_specific_bytes, 0, 4);
3670 16 100       54 unless ($bytehash{electresundef18to21} =~ /^[|\s]{4}$/) {
3671 2         9 push @warningstoreturn, ("008: Bytes 18-21 (006/01-04), Electronic Resources-Undef18to21 has bad characters ($bytehash{electresundef18to21}).");
3672             } #Electronic Resources 18-21
3673              
3674             #Target audience (byte 22/5)
3675 16         35 $bytehash{audience} = substr($material_specific_bytes, 4, 1);
3676 16 100       53 unless ($bytehash{audience} =~ /^[abcdefgj|\s]$/) {
3677 2         8 push @warningstoreturn, ("008: Byte 22 (006/05), Electronic Resources-Audience has bad characters ($bytehash{audience}).");
3678             } #Electronic Resources 22
3679              
3680             #Target audience (byte 23/6)
3681 16         27 $bytehash{formofitem} = substr($material_specific_bytes, 5, 1);
3682 16 100       46 unless ($bytehash{formofitem} =~ /^[oq|\s]$/) {
3683 2         7 push @warningstoreturn, ("008: Byte 23 (006/06), Electronic Resources-FormofItem has bad characters ($bytehash{formofitem}).");
3684             } #Electronic Resources 22
3685              
3686             #Undefined (byte[24/7]-[25/8])
3687 16         38 $bytehash{electresundef24to25} = substr($material_specific_bytes, 6, 2);
3688 16 100       47 unless ($bytehash{electresundef24to25} =~ /^[|\s]{2}$/) {
3689 2         29 push @warningstoreturn, ("008: Bytes 24-25 (006/07-08), Electronic Resources-Undef24to25 has bad characters ($bytehash{electresundef24to25}).");
3690             } #Electronic Resources 24-25
3691              
3692             #Type of computer file (byte[26/9])
3693 16         27 $bytehash{typeoffile} = substr($material_specific_bytes, 8, 1);
3694 16 100       49 unless ($bytehash{typeoffile} =~ /^[abcdefghijmuz|]$/) {
3695 2         7 push @warningstoreturn, ("008: Byte 26 (006/09), Electronic Resources-Type of file has bad characters ($bytehash{typeoffile}).");
3696             } #Electronic Resources 26
3697              
3698             #Undefined (byte[27/10])
3699 16         33 $bytehash{electresundef27} = substr($material_specific_bytes, 9, 1);
3700 16 100       50 unless ($bytehash{electresundef27} =~ /^[|\s]$/) {
3701 2         7 push @warningstoreturn, ("008: Byte 27 (006/10), Electronic Resources-Undef27 has bad characters ($bytehash{electresundef27}).");
3702             } #Electronic Resources 27
3703              
3704             #Government publication (byte [28/11])
3705 16         30 $bytehash{govtpub} = substr($material_specific_bytes, 10, 1);
3706 16 100       48 unless ($bytehash{govtpub} =~ /^[acfilmosuz|\s]$/) {
3707 2         7 push @warningstoreturn, ("008: Byte 28 (006/11), Electronic Resources-Govt publication has bad characters ($bytehash{govtpub}).");
3708             } #Electronic Resources 28
3709              
3710             #Undefined (byte[29/12]-[34/17])
3711 16         28 $bytehash{electresundef29to34} = substr($material_specific_bytes, 11, 6);
3712 16 100       58 unless ($bytehash{electresundef29to34} =~ /^[|\s]{6}$/) {
3713 2         8 push @warningstoreturn, ("008: Bytes 29-34 (006/12-17), Electronic Resources-Undef29to34 has bad characters ($bytehash{electresundef29to34}).")
3714             } #Electronic Resources 29-34
3715              
3716             } # if electronic resources mattype 'm'
3717              
3718 16         72 return @warningstoreturn;
3719            
3720             } # _check_electronic_resources_bytes
3721              
3722             =head2 _check_cartographic_bytes($mattype, $biblvl, $bytes)
3723              
3724             Internal sub to check 008 bytes 18-34 or 006 bytes 01-17 for Cartographic Materials.
3725              
3726             Receives material type, bibliographic level, and a 17-byte string to be validated. The bytes should be bytes 18-34 of the 008, or bytes 01-17 of the 006.
3727              
3728             =cut
3729              
3730             sub _check_cartographic_bytes {
3731              
3732 22     22   30 my $mattype = shift;
3733 22         25 my $biblvl = shift;
3734 22         24 my $material_specific_bytes = shift;
3735              
3736 22         34 my %bytehash = ();
3737 22         29 my @warningstoreturn = ();
3738              
3739             ########################################
3740             ########################################
3741             ########################################
3742             # Cartographic Materials bytes 18-34 #
3743             ########################################
3744             ########################################
3745             ########################################
3746              
3747             #cartographic materials/maps
3748 22 50       71 if ($mattype =~ /^[ef]$/) {
3749              
3750             #Relief (byte[18/1]-[21/4])
3751 22         46 $bytehash{relief} = substr($material_specific_bytes, 0, 4);
3752 22 100       67 unless ($bytehash{relief} =~ /^[abcdefgijkmz|\s]{4}$/) {
3753 2         14 push @warningstoreturn, ("008: Bytes 18-21 (006/01-04), Cartographic-Relief has bad characters ($bytehash{relief}).");
3754             } #Cartographic 18-21
3755              
3756             #Projection (byte[22/5]-[23/6])
3757 22         85 $bytehash{projection} = substr($material_specific_bytes, 4, 2);
3758 22 100       68 unless ($bytehash{projection} =~ /^\|\||\s\s|aa|ab|ac|ad|ae|af|ag|am|an|ap|au|az|ba|bb|bc|bd|be|bf|bg|bh|bi|bj|bk|bl|bo|br|bs|bu|bz|ca|cb|cc|ce|cp|cu|cz|da|db|dc|dd|de|df|dg|dh|dl|zz$/) {
3759 2         9 push @warningstoreturn, ("008: Bytes 22-23 (006/05-06), Cartographic-Projection has bad characters ($bytehash{projection}).");
3760             } #Cartographic 22-23
3761              
3762             #Undefined (byte[24/7])
3763 22         40 $bytehash{mapundef24} = substr($material_specific_bytes, 6, 1);
3764 22 100       64 unless ($bytehash{mapundef24} =~ /^[|\s]$/) {
3765 2         7 push @warningstoreturn, ("008: Byte 24 (006/7), Cartographic-Undef24 has bad characters ($bytehash{mapundef24}).");
3766             } #Cartographic 24
3767              
3768             #Type of cartographic material (byte[25/8])
3769 22         36 $bytehash{typeofmap} = substr($material_specific_bytes, 7,1);
3770 22 100       60 unless ($bytehash{typeofmap} =~ /^[abcdefguz|]$/) {
3771 2         8 push @warningstoreturn, ("008: Byte 25 (006/08), Cartographic-Type of map has bad characters ($bytehash{typeofmap}).");
3772             } #Cartographic 25
3773              
3774             #Undefined (byte[26/9]-[27/10])
3775 22         102 $bytehash{mapundef26to27} = substr($material_specific_bytes, 8, 2);
3776 22 100       60 unless ($bytehash{mapundef26to27} =~ /^[|\s]{2}$/) {
3777 2         7 push @warningstoreturn, ("008: Bytes 26-27 (006/09-10), Cartographic-Undef26to27 has bad characters ($bytehash{mapundef26to27}).");
3778             } #Cartographic 26-27
3779              
3780             #Government publication (byte[28/11])
3781 22         40 $bytehash{govtpub} = substr($material_specific_bytes, 10, 1);
3782 22 100       57 unless ($bytehash{govtpub} =~ /^[acfilmosuz|\s]$/) {
3783 2         7 push @warningstoreturn, ("008: Byte 28 (006/11), Cartographic-Govt publication has bad characters ($bytehash{govtpub}).");
3784             } #Cartographic 28
3785              
3786             #Form of item (byte[29/12])
3787 22         37 $bytehash{formofitem} = substr($material_specific_bytes, 11, 1);
3788 22 100       59 unless ($bytehash{formofitem} =~ /^[abcdfoqrs|\s]$/) {
3789 2         8 push @warningstoreturn, ("008: Byte 29 (006/12), Cartographic-Form of item has bad characters ($bytehash{formofitem}).");
3790             } #Cartographic 29
3791              
3792             #Undefined (byte[30/13])
3793 22         36 $bytehash{mapundef30} = substr($material_specific_bytes, 12, 1);
3794 22 100       58 unless ($bytehash{mapundef30} =~ /^[|\s]$/) {
3795 2         7 push @warningstoreturn, ("008: Byte 30 (006/13), Cartographic-Undef30 has bad characters ($bytehash{mapundef30}).");
3796             } #Cartographic 30
3797              
3798             #Index (byte[31/14])
3799 22         38 $bytehash{mapindex} = substr($material_specific_bytes, 13, 1);
3800 22 100       60 unless ($bytehash{mapindex} =~ /^[01|]$/) {
3801 2         12 push @warningstoreturn, ("008: Byte 31 (006/14), Cartographic-Index has bad characters ($bytehash{mapindex}).");
3802             } #Cartographic 31
3803              
3804             #Undefined (byte[32/15])
3805 22         45 $bytehash{mapundef32} = substr($material_specific_bytes, 14, 1);
3806 22 100       59 unless ($bytehash{mapundef32} =~ /^[|\s]$/) {
3807 2         7 push @warningstoreturn, ("008: Byte 32 (006/15), Cartographic-Undef32 has bad characters ($bytehash{mapundef32}).");
3808             } #Cartographic 32
3809              
3810             #Special format characteristics (byte[33/16]-[34/17])
3811 22         38 $bytehash{specialfmtchar} = substr($material_specific_bytes, 15, 2);
3812 22 100       73 unless ($bytehash{specialfmtchar} =~ /^[ejklnoprz|\s]{2}$/) {
3813 2         9 push @warningstoreturn, ("008: Bytes 33-34 (006/16-17), Cartographic-Special format characteristics has bad characters ($bytehash{specialfmtchar}).");
3814             } #Cartographic 33-34
3815              
3816             } # Cartographic Materials
3817              
3818              
3819 22         90 return @warningstoreturn;
3820            
3821             } # _check_cartographic_bytes
3822              
3823             =head2 _check_music_bytes($mattype, $biblvl, $bytes)
3824              
3825             Internal sub to check 008 bytes 18-34 or 006 bytes 01-17 for Music and Sound Recordings.
3826              
3827             Receives material type, bibliographic level, and a 17-byte string to be validated. The bytes should be bytes 18-34 of the 008, or bytes 01-17 of the 006.
3828              
3829             =cut
3830              
3831             sub _check_music_bytes {
3832              
3833 20     20   28 my $mattype = shift;
3834 20         25 my $biblvl = shift;
3835 20         23 my $material_specific_bytes = shift;
3836              
3837 20         28 my %bytehash = ();
3838 20         41 my @warningstoreturn = ();
3839              
3840             ########################################
3841             ########################################
3842             ########################################
3843             # Music/Sound Recordings bytes 18-34 #
3844             ########################################
3845             ########################################
3846             ########################################
3847              
3848             #music and sound recordings
3849 20 50       55 if ($mattype =~ /^[cdij]$/) {
3850              
3851             #Form of composition (byte[18/1]-[19/2])
3852 20         43 $bytehash{formofcomp} = substr($material_specific_bytes, 0, 2);
3853 20 100       104 unless ($bytehash{formofcomp} =~ /^\|\||an|bd|bg|bl|bt|ca|cb|cc|cg|ch|cl|cn|co|cp|cr|cs|ct|cy|cz|df|dv|fg|fl|fm|ft|gm|hy|jz|mc|md|mi|mo|mp|mr|ms|mu|mz|nc|nn|op|or|ov|pg|pm|po|pp|pr|ps|pt|pv|rc|rd|rg|ri|rp|rq|sd|sg|sn|sp|st|su|sy|tc|tl|ts|uu|vi|vr|wz|za|zz$/) {
3854 2         8 push @warningstoreturn, ("008: Bytes 18-19 (006/01-02), Music-Form of composition has bad characters ($bytehash{formofcomp}).");
3855             } #Music 18-19
3856              
3857             #Format of music (byte[20/3])
3858 20         42 $bytehash{fmtofmusic} = substr($material_specific_bytes, 2, 1);
3859 20 100       55 unless ($bytehash{fmtofmusic} =~ /^[abcdeghijklmnuz|]$/) {
3860 2         9 push @warningstoreturn, ("008: Byte 20 (006/03), Music-Format of music has bad characters ($bytehash{fmtofmusic}).");
3861             } #Music 20
3862              
3863             #Music parts (byte[21/4])
3864 20         34 $bytehash{musicparts} = substr($material_specific_bytes, 3, 1);
3865 20 100       56 unless ($bytehash{musicparts} =~ /^[defnu|\s]$/) {
3866 2         6 push @warningstoreturn, ("008: Byte 21 (006/04), Music-Parts has bad characters ($bytehash{musicparts}).");
3867             } #Music 21
3868              
3869             #Target audience (byte[22/5])
3870 20         35 $bytehash{audience} = substr($material_specific_bytes, 4, 1);
3871 20 100       51 unless ($bytehash{audience} =~ /^[abcdefgj|\s]$/) {
3872 2         7 push @warningstoreturn, ("008: Byte 22 (006/05), Music-Audience has bad characters ($bytehash{audience}).");
3873             } #Music 22
3874              
3875             #Form of item (byte[23/6])
3876 20         32 $bytehash{formofitem} = substr($material_specific_bytes, 5, 1);
3877 20 100       58 unless ($bytehash{formofitem} =~ /^[abcdfoqrs|\s]$/) {
3878 2         7 push @warningstoreturn, ("008: Byte 23 (006/06), Music-Form of item has bad characters ($bytehash{formofitem}).");
3879             } #Music 23
3880              
3881             #Accompanying matter (byte[24/7]-[29/12])
3882 20         38 $bytehash{accompmat} = substr($material_specific_bytes, 6, 6);
3883 20 100       57 unless ($bytehash{accompmat} =~ /^[abcdefghikrsz|\s]{6}$/) {
3884 2         6 push @warningstoreturn, ("008: Bytes 24-29 (006/07-12), Music-Accompanying material has bad characters ($bytehash{accompmat}).");
3885             } #Music 24-29
3886              
3887             #Literary text for sound recordings (byte[30/13]-[31/14])
3888 20         35 $bytehash{textforsdrec} = substr($material_specific_bytes, 12, 2);
3889 20 100       53 unless ($bytehash{textforsdrec} =~ /^[abcdefghijklmnoprstz|\s]{2}$/) {
3890 2         8 push @warningstoreturn, ("008: Byte 30-31 (006/13-14), Music-Text for sound recordings has bad characters ($bytehash{textforsdrec}).");
3891             } #Music 30-31
3892              
3893             #Undefined (byte[32/15])
3894 20         34 $bytehash{musicundef32} = substr($material_specific_bytes, 14, 1);
3895 20 100       52 unless ($bytehash{musicundef32} =~ /^[|\s]$/) {
3896 2         17 push @warningstoreturn, ("008: Byte 32 (006/15), Music-Undef32 has bad characters ($bytehash{musicundef32}).");
3897             } #Music 32
3898              
3899             #Transposition and arrangement (byte[33/16])
3900 20         36 $bytehash{transposeandarr} = substr($material_specific_bytes, 15, 1);
3901 20 100       51 unless ($bytehash{transposeandarr} =~ /^[abcnu|\s]$/) {
3902 2         7 push @warningstoreturn, ("008: Byte 33 (006/16), Music-Transposition and arrangement has bad characters ($bytehash{transposeandarr}).");
3903             } #Music 33
3904              
3905             #Undefined (byte[34/17])
3906 20         35 $bytehash{musicundef34} = substr($material_specific_bytes, 16, 1);
3907 20 100       81 unless ($bytehash{musicundef34} =~ /^[|\s]$/) {
3908 2         6 push @warningstoreturn, ("008: Byte 34 (006/17), Music-Undef34 has bad characters ($bytehash{musicundef34}).");
3909             } #Music 34
3910              
3911             } # Music and Sound Recordings
3912              
3913 20         77 return @warningstoreturn;
3914            
3915             } # _check_music_bytes
3916              
3917             =head2 _check_visual_material_bytes($mattype, $biblvl, $bytes)
3918              
3919             Internal sub to check 008 bytes 18-34 or 006 bytes 01-17 for Visual Materials.
3920              
3921             Receives material type, bibliographic level, and a 17-byte string to be validated. The bytes should be bytes 18-34 of the 008, or bytes 01-17 of the 006.
3922              
3923             =cut
3924              
3925             sub _check_visual_material_bytes {
3926              
3927 18     18   25 my $mattype = shift;
3928 18         19 my $biblvl = shift;
3929 18         21 my $material_specific_bytes = shift;
3930              
3931 18         28 my %bytehash = ();
3932 18         24 my @warningstoreturn = ();
3933              
3934             ########################################
3935             ########################################
3936             ########################################
3937             #### Visual Materials bytes 18-34 ####
3938             ########################################
3939             ########################################
3940             ########################################
3941              
3942             #visual materials
3943 18 50       61 if ($mattype =~ /^[gkor]$/) {
3944              
3945             #Running time for motion pictures and videorecordings (byte[18/1]-[20/3])
3946 18         36 $bytehash{runningtime} = substr($material_specific_bytes, 0, 3);
3947 18 100       65 unless ($bytehash{runningtime} =~ /^([|\d]{3}|\-{3}|n{3})$/) {
3948 2         8 push @warningstoreturn, ("008: Bytes 18-20 (006/01-03), Visual materials-Runningtime has bad characters ($bytehash{runningtime}).")
3949             } #Visual materials 18-20
3950              
3951             #Undefined (byte[21/4])
3952 18         34 $bytehash{visualmatundef21} = substr($material_specific_bytes, 3, 1);
3953 18 100       60 unless ($bytehash{visualmatundef21} =~ /^[|\s]$/) {
3954 2         7 push @warningstoreturn, ("008: Byte 21 (006/04), Visual materials-Undef21 has bad characters ($bytehash{visualmatundef21}).");
3955             } #Visual materials 21
3956              
3957             #Target audience (byte[22/5])
3958 18         36 $bytehash{audience} = substr($material_specific_bytes, 4, 1);
3959 18 100       53 unless ($bytehash{audience} =~ /^[abcdefgj|\s]$/) {
3960 2         7 push @warningstoreturn, ("008: Byte 22 (006/05), Visual materials-Audience has bad characters ($bytehash{audience}).");
3961             } #Visual materials 22
3962              
3963             #Undefined (byte[23/6]-[27/10])
3964 18         37 $bytehash{visualmatundef23to27} = substr($material_specific_bytes, 5, 5);
3965 18 100       52 unless ($bytehash{visualmatundef23to27} =~ /^[|\s]{5}$/) {
3966 2         7 push @warningstoreturn, ("008: Bytes 23-27 (006/06-10), Visual materials-Undef23to27 has bad characters ($bytehash{visualmatundef23to27}).");
3967             } #Visual materials 23-27
3968              
3969             #Government publication (byte[28/11])
3970 18         33 $bytehash{govtpub} = substr($material_specific_bytes, 10, 1);
3971 18 100       51 unless ($bytehash{govtpub} =~ /^[acfilmosuz|\s]$/) {
3972 2         7 push @warningstoreturn, ("008: Byte 28 (006/11), Visual materials-Govt publication has bad characters ($bytehash{govtpub}).");
3973             } #Visual materials 28
3974              
3975             #Form of item (byte[29/12])
3976 18         39 $bytehash{formofitem} = substr($material_specific_bytes, 11, 1);
3977 18 100       57 unless ($bytehash{formofitem} =~ /^[abcdfoqrs|\s]$/) {
3978 2         8 push @warningstoreturn, ("008: Byte 29 (006/12), Visual materials-Form of item has bad characters ($bytehash{formofitem}).");
3979             } #Visual materials 29
3980              
3981             #Undefined (byte[30/13]-[32/15])
3982 18         33 $bytehash{visualmatundef30to32} = substr($material_specific_bytes, 12, 3);
3983 18 100       51 unless ($bytehash{visualmatundef30to32} =~ /^[|\s]{3}$/) {
3984 2         8 push @warningstoreturn, ("008: Bytes 30-32 (006/13-15), Visual materials-Undef30to32 has bad characters ($bytehash{visualmatundef30to32}).");
3985             } #Visual materials 30-32
3986              
3987             #Type of visual material (byte[33/16])
3988 18         41 $bytehash{typevisualmaterial} = substr($material_specific_bytes, 15, 1);
3989 18 100       54 unless ($bytehash{typevisualmaterial} =~ /^[abcdfgiklmnopqrstvwz|]$/) {
3990 2         8 push @warningstoreturn, ("008: Byte 33 (006/16), Visual materials-Type of visual material has bad characters ($bytehash{typevisualmaterial}).");
3991             }
3992              
3993             #Technique (byte[34/17])
3994 18         30 $bytehash{technique} = substr($material_specific_bytes, 16, 1);
3995 18 100       51 unless ($bytehash{technique} =~ /^[aclnuz|]$/) { push @warningstoreturn, ("008: Byte 34 (006/17), Visual materials-Technique has bad characters ($bytehash{technique}).");
  2         8  
3996             } #Visual materials 34
3997              
3998             } #Visual Materials
3999              
4000 18         74 return @warningstoreturn;
4001            
4002             } # _check_visual_material_bytes
4003              
4004             =head2 _check_mixed_material_bytes($mattype, $biblvl, $bytes)
4005              
4006             Internal sub to check 008 bytes 18-34 or 006 bytes 01-17 for Mixed Materials.
4007              
4008             Receives material type, bibliographic level, and a 17-byte string to be validated. The bytes should be bytes 18-34 of the 008, or bytes 01-17 of the 006.
4009              
4010             =cut
4011              
4012             sub _check_mixed_material_bytes {
4013              
4014 6     6   8 my $mattype = shift;
4015 6         8 my $biblvl = shift;
4016 6         8 my $material_specific_bytes = shift;
4017              
4018 6         9 my %bytehash = ();
4019 6         10 my @warningstoreturn = ();
4020              
4021             ########################################
4022             ########################################
4023             ########################################
4024             #### Mixed Materials bytes 18-34 ####
4025             ########################################
4026             ########################################
4027             ########################################
4028              
4029             #mixed materials
4030 6 50       20 if ($mattype =~ /^[p]$/) {
4031              
4032             #Undefined (byte[18/1]-[22/5])
4033 6         19 $bytehash{mixedundef18to22} = substr($material_specific_bytes, 0, 5);
4034 6 100       23 unless ($bytehash{mixedundef18to22} =~ /^[|\s]{5}$/) {
4035 2         8 push @warningstoreturn, ("008: Bytes 18-22 (006/01-05), Mixed materials-Undef18to22 has bad characters ($bytehash{mixedundef18to22}).");
4036             } #Mixed materials 18-22
4037              
4038             #Form of item (byte[23/6])
4039 6         11 $bytehash{formofitem} = substr($material_specific_bytes, 5, 1);
4040 6 100       21 unless ($bytehash{formofitem} =~ /^[abcdfoqrs|\s]$/) {
4041 2         8 push @warningstoreturn, ("008: Byte 23 (006/06), Mixed materials-Form of item has bad characters ($bytehash{formofitem}).");
4042             } #Mixed materials 23
4043              
4044             #Undefined (byte[24/7]-[34/17])
4045 6         12 $bytehash{mixedundef24to34} = substr($material_specific_bytes, 6, 11);
4046 6 100       19 unless ($bytehash{mixedundef24to34} =~ /^[|\s]{11}$/) {
4047 2         7 push @warningstoreturn, ("008: Bytes 24-34 (006/07-17), Mixed materials-Undef24to34 has bad characters ($bytehash{mixedundef24to34}).");
4048             } #Mixed materials 24-30
4049              
4050             } #Mixed Materials
4051              
4052              
4053             #########################################
4054             #########################################
4055             #########################################
4056             #########################################
4057              
4058 6         20 return @warningstoreturn;
4059            
4060             } # _check_mixed_material_bytes
4061              
4062             sub _reword_008 {
4063 53     53   94 my @warnings = @_;
4064              
4065 53         98 foreach (@warnings) {
4066 53         366 $_ =~ s/^(008: Byte[ s] ?[\-0-9]+) \(006\/[\-0-9]+\)/$1/;
4067             } #foreach warning
4068              
4069 53         141 return @warnings;
4070              
4071             } #_reword_008
4072              
4073             sub _reword_006 {
4074              
4075 52     52   66 my @warnings = @_;
4076              
4077 52         73 foreach (@warnings) {
4078 52         347 $_ =~ s/^(008: Byte[ s] ?[\-0-9]+) \(006\/([\-0-9]+)\)/006: Byte(s) $2/;
4079              
4080             } #foreach warning
4081              
4082 52         137 return @warnings;
4083              
4084             } #_reword_006
4085              
4086             #########################################
4087             #########################################
4088             #########################################
4089             #########################################
4090              
4091             =head2 _get_current_date()
4092              
4093             Internal sub for use with validate008($field008, $mattype, $biblvl) (actually with parse008date($field008string)). Returns the current year-month-day, in the form yyyymmdd.
4094              
4095             Also used by check_010($record).
4096              
4097             =cut
4098              
4099             sub _get_current_date {
4100 88     88   2611 my ($sec,$min,$hour,$mday,$mon,$year) = localtime();
4101            
4102 88         182 $year += 1900;
4103             #add 1 to month to account for 0-base
4104 88         107 $mon++;
4105              
4106 88         399 return sprintf("%0.4d%0.2d%0.2d",$year,$mon,$mday);
4107              
4108             } #_get_current_date()
4109              
4110             #########################################
4111             #########################################
4112             #########################################
4113             #########################################
4114              
4115             #########################################
4116             #########################################
4117             #########################################
4118             #########################################
4119              
4120             =head1 CHANGES/VERSION HISTORY
4121              
4122             Version 1.18: Updated Oct. 8, 2012 to June 22, 2013. Released , 2013.
4123              
4124             -Updated _check_music_bytes for MARC Update 16 (Sept. 2012), adding 'l' as valid for 008/20.
4125              
4126             Version 1.17: Updated Oct. 8, 2012 to June 22, 2013. Released June 23, 2013.
4127              
4128             -Updated check_490vs8xx($record) to look only for 800, 810, 811, 830 rather than any 8XX.
4129             -Added functionality to deal with RDA records.
4130             -Updated parse008vs300b($illcodes, $field300subb, $record_is_RDA) to pass 3rd variable, "$record_is_RDA".
4131             -Updated _check_music_bytes for MARC Update 15 (Sept. 2012), adding 'k' as valid for 008/20.
4132              
4133             Version 1.16: Updated May 16-Nov. 14, 2011. Released .
4134              
4135             -Turned off check_fieldlength($record) in check_all_subs()
4136             -Turned off checking of floating hyphens in 520 fields in findfloatinghyphens($record)
4137             -Updated validate008 subs (and 006) related to 008/24-27 (Books and Continuing Resources) for MARC Update no. 10, Oct. 2009 and Update no. 11, 2010; no. 12, Oct. 2010; and no. 13, Sept. 2011.
4138             -Updated %ldrbytes with leader/18 'c' and redefinition of 'i' per MARC Update no. 12, Oct. 2010.
4139              
4140             Version 1.15: Updated June 24-August 16, 2009. Released , 2009.
4141              
4142             -Updated checks related to 300 to better account for electronic resources.
4143             -Revised wording in validate008($field008, $mattype, $biblvl) language code (008/35-37) for ' '/zxx.
4144             -Updated validate008 subs (and 006) related to 008/24-27 (Books and Continuing Resources) for MARC Update no. 9, Oct. 2008.
4145             -Updated validate008 sub (and 006) for Books byte 33, Literary form, invalidating code 'c' and referring it to 008/24-27 value 'c' .
4146             -Updated video007vs300vs538($record) to allow Blu-ray in 538 and 's' in 07/04.
4147              
4148             Version 1.14: Updated Oct. 21, 2007, Jan. 21, 2008, May 20, 2008. Released May 25, 2008.
4149              
4150             -Updated %ldrbytes with leader/19 per Update no. 8, Oct. 2007. Check for validity of leader/19 not yet implemented.
4151             -Updated _check_book_bytes with code '2' ('Offprints') for 008/24-27, per Update no. 8, Oct. 2007.
4152             -Updated check_245ind1vs1xx($record) with TODO item and comments
4153             -Updated check_bk008_vs_300($record) to allow "leaves of plates" (as opposed to "leaves", when no p. or v. is present), "leaf", and "column"(s).
4154              
4155             Version 1.13: Updated Aug. 26, 2007. Released Oct. 3, 2007.
4156              
4157             -Uncommented valid MARC 21 leader values in %ldrbytes to remove local practice. Libraries wishing to restrict leader values should comment out individual bytes to enable errors when an unwanted value is encountered.
4158             -Added ldrvalidate.t.pl and ldrvalidate.t tests.
4159             -Includes version 1.18 of MARC::Lint::CodeData.
4160              
4161             Version 1.12: Updated July 5-Nov. 17, 2006. Released Feb. 25, 2007.
4162              
4163             -Updated check_bk008_vs_300($record) to look for extra p. or v. after parenthetical qualifier.
4164             -Updated check_bk008_vs_300($record) to look for missing period after 'col' in subfield 'b'.
4165             -Replaced $field-tag() with $tag in error message reporting in check_nonpunctendingfields($record).
4166             -Turned off 50-field limit check in check_fieldlength($record).
4167             -Updated parse008vs300b($illcodes, $field300subb) to look for /map[ \,s]/ rather than just 'map' when 008 is coded 'b'.
4168             -Updated check_bk008_vs_bibrefandindex($record) to look for spacing on each side of parenthetical pagination.
4169             -Updated check_internal_spaces($record) to report 10 characters on either side of each set of multiple internal spaces.
4170             -Uncommented level-5 and level-7 leader values as acceptable. Level-3 is still commented out, but could be uncommented for libraries that allow it.
4171             -Includes version 1.14 of MARC::Lint::CodeData.
4172              
4173             Version 1.11: Updated June 5, 2006. Released June 6, 2006.
4174              
4175             -Implemented check_006($record) to validate 006 (currently only does length check).
4176             --Revised validate008($field008, $mattype, $biblvl) to use internal sub for material specific bytes (18-34)
4177             -Revised validate008($field008, $mattype, $biblvl) language code (008/35-37) to report new 'zxx' code availability when ' ' is the code in the record.
4178             -Added 'mgmt.' to %abbexceptions for check_nonpunctendingfields($record).
4179              
4180             Version 1.10: Updated Sept. 5-Jan. 2, 2006. Released Jan. 2, 2006.
4181              
4182             -Revised validate008($field008, $mattype, $biblvl) to use internal subs for material specific byte checking.
4183             --Added:
4184             ---_check_cont_res_bytes($mattype, $biblvl, $bytes),
4185             ---_check_book_bytes($mattype, $biblvl, $bytes),
4186             ---_check_electronic_resources_bytes($mattype, $biblvl, $bytes),
4187             ---_check_cartographic_bytes($mattype, $biblvl, $bytes),
4188             ---_check_music_bytes($mattype, $biblvl, $bytes),
4189             ---_check_visual_material_bytes($mattype, $biblvl, $bytes),
4190             ---_check_mixed_material_bytes,
4191             ---_reword_008(@warnings), and
4192             ---_reword_006(@warnings).
4193             --Updated Continuing resources byte 20 from ISSN center to Undefined per MARC 21 update of Oct. 2003.
4194             -Updated wording in findfloatinghyphens($record) to report 10 chars on either side of floaters and check_floating_punctuation($record) to report some context if the field in question has more than 80 chars.
4195             -check_bk008_vs_bibrefandindex($record) updated to check for 'p. ' following bibliographical references when pagination is present.
4196             -check_5xxendingpunctuation($record) reports question mark or exclamation point followed by period as error.
4197             -check_5xxendingpunctuation($record) now checks 505.
4198             -Updated check_nonpunctendingfields($record) to account for initialisms with interspersed periods.
4199             -Added check_floating_punctuation($record) looking for unwanted spaces before periods, commas, and other punctuation marks.
4200             -Renamed findfloatinghyphens($record) to fix spelling.
4201             -Revised check_bk008_vs_300($record) to account for textual materials on CD-ROM.
4202             -Added abstract to name.
4203              
4204             Version 1.09: Updated July 18, 2005. Released July 19, 2005 (Aug. 14, 2005 to CPAN).
4205              
4206             -Added check_010.t (and check_010.t.pl) tests for check_010($record).
4207             -check_010($record) revisions.
4208             --Turned off validation of 8-digit LCCN years. Code commented-out.
4209             --Modified parsing of numbers to check spacing for 010a with valid non-digits after valid numbers.
4210             --Validation of 10-digit LCCN years is based on current year.
4211             -Fixed bug of uninitialized values for matchpubdates($record) 050 and 260 dates.
4212             -Corrected comparison for year entered < 1980.
4213             -Removed AutoLoader (which was a remnant of the initial module creation process)
4214              
4215             Version 1.08: Updated Feb. 15-July 11, 2005. Released July 16, 2005.
4216              
4217             -Added 008errorchecks.t (and 008errorchecks.t.txt) tests for 008 validation
4218             -Added check of current year, month, day vs. 008 creation date, reporting error if creation date appears to be later than local time. Assumes 008 dates of 00mmdd to 70mmdd represent post-2000 dates.
4219             --This is a change from previous range, which gave dates as 00-06 as 200x, 80-99 as 19xx, and 07-79 as invalid.
4220             -Added _get_current_date() internal sub to assist with check of creation date vs. current date.
4221             -findemptysubfields($record) also reports error if period(s) and/or space(s) are the only data in a subfield.
4222             -Revised wording of error messages for validate008($field008, $mattype, $biblvl)
4223             -Revised parse008date($field008string) error message wording and bug fix.
4224             -Bug fix in video007vs300vs538($record) for gathering multiple 538 fields.
4225             -added check in check_5xxendingpunctuation($record) for space-semicolon-space-period at the end of 5xx fields.
4226             -added field count check for more than 50 fields to check_fieldlength($record)
4227             -added 'webliography' as acceptable 'bibliographical references' term in check_bk008_vs_bibrefandindex($record), even though it is discouraged. Consider adding an error message indicating that the term should be 'bibliographical references'?
4228             -Code indenting changed from tabs to 4 spaces per tab.
4229             -Misc. bug fixes including changing '==' to 'eq' for tag numbers, bytes in 008, and indicators.
4230              
4231             Version 1.07: Updated Dec. 11-Feb. 2005. Released Feb. 13, 2005.
4232              
4233             -check_double_periods() skips field 856, where multiple punctuation is possible for URIs.
4234             -added code in check_internal_spaces() to account for spaces between angle brackets in open dates in field 260c.
4235             -Updated various subs to verify that 008 exists (and quietly return if not. check_008 will report the error).
4236             -Changed #! line, removed -w, replaced with use warnings.
4237             -Added error message to check_bk008_vs_bibrefandindex($record) if 008 book
4238             index byte is not 0 or 1. This will result in duplicate errors if check_008 is
4239             also called on the record.
4240              
4241             Version 1.05 and 1.06: Updated Dec. 6-7. Released Dec. 6-7, 2004.
4242              
4243             -CPAN distribution fix.
4244              
4245             Version 1.04: Updated Nov. 4-Dec. 4, 2004. Released Dec. 5, 2004.
4246              
4247             -Updated validate008() to use MARC::Lint::CodeData.
4248             -Removed DATA section, since this is now in MARC::Lint::CodeData.
4249             -Updated check_008() to use the new validate008().
4250             -Revised bib. refs. check to require 'reference' to be followed by optional 's', optional period, and word boundary (to catch things like 'referenced'.
4251              
4252              
4253             Version 1.03: Updated Aug. 30-Oct. 16, 2004. Released Oct. 17. First CPAN version.
4254              
4255             -Moved subs to MARC::QBIerrorchecks
4256             --check_003($record)
4257             --check_CIP_for_stockno($record)
4258             --check_082count($record)
4259             -Fixed bug in check_5xxendingpunctuation for first 10 characters.
4260             -Moved validate008() and parse008date() from MARC::BBMARC (to make MARC::Errorchecks more self-contained).
4261             -Moved readcodedata() from BBMARC (used by validate008)
4262             -Moved DATA from MARC::BBMARC for use in readcodedata()
4263             -Remove dependency on MARC::BBMARC
4264             -Added duplicate comma check in check_double_periods($record)
4265             -Misc. bug fixes
4266             Planned (future versions):
4267             -Account for undetermined dates in matchpubdates($record).
4268             -Cleanup of validate008
4269             --Standardization of error reporting
4270             --Material specific byte checking (bytes 18-34) abstracted to allow 006 validation.
4271            
4272             Version 1.02: Updated Aug. 11-22, 2004. Released Aug. 22, 2004.
4273              
4274             -Implemented VERSION (uncommented)
4275             -Added check for presence of 040 (check_040present($record)).
4276             -Added check for presence of 2 082s in full-level, 1 082 in CIP-level records (check_082count($record)).
4277             -Added temporary (test) check for trailing punctuation in 240, 586, 440, 490, 246 (check_nonpunctendingfields($record))
4278             --which should not end in punctuation except when the data ends in such.
4279             -Added check_fieldlength($record) to report fields longer than 1870 bytes.
4280             --This should be rewritten to use the length in the directory of the raw MARC.
4281             -Fixed workaround in check_bk008_vs_bibrefandindex($record) (Thanks again to Rich Ackerman).
4282            
4283             Version 1.01: Updated July 20-Aug. 7, 2004. Released Aug. 8, 2004.
4284              
4285             -Temporary (or not) workaround for check_bk008_vs_bibrefandindex($record) and bibliographies.
4286             -Removed variables from some error messages and cleanup of messages.
4287             -Code readability cleanup.
4288             -Added subroutines:
4289             --check_240ind1vs1xx($record)
4290             --check_041vs008lang($record)
4291             --check_5xxendingpunctuation($record)
4292             --findfloatinghypens($record)
4293             --video007vs300vs538($record)
4294             --ldrvalidate($record)
4295             --geogsubjvs043($record)
4296             ---has list of exceptions (e.g. English-speaking countries)
4297             --findemptysubfields($record)
4298             -Changed subroutines:
4299             --check_bk008_vs_300($record):
4300             ---added cross-checking for codes a, b, c, g (ill., map(s), port(s)., music)
4301             ---added checking for 'p. ' or 'v. ' or 'leaves ' in subfield 'a'
4302             ---added checking for 'cm.', 'mm.', 'in.' in subfield 'c'
4303             --parse008vs300b
4304             ---revised check for 'm', phono. (which our catalogers don't currently use)
4305             --Added check in check_bk008_vs_bibrefandindex($record) for 'Includes index.' (or indexes) in 504
4306             ---This has a workaround I would like to figure out how to fix
4307            
4308             Version 1.00 (update to 0.95): First release July 18, 2004.
4309              
4310             -Fixed bugs causing check_003 and check_010 subroutines to fail (Thanks to Rich Ackerman)
4311             -Added to documentation
4312             -Misc. cleanup
4313             -Added skip of 787 fields to check_internal_spaces
4314             -Added subroutines:
4315             --check_end_punct_300($record)
4316             --check_bk008_vs_300($record)
4317             ---parse008vs300b
4318             --check_490vs8xx($record)
4319             --check_245ind1vs1xx($record)
4320             --matchpubdates($record)
4321             --check_bk008_vs_bibrefandindex($record)
4322              
4323             Version 1 (original version (actually version 0.95)): First release, June 22, 2004
4324              
4325             =head1 SEE ALSO
4326              
4327             MARC::Record -- Required for this module to work.
4328              
4329             MARC::Lint -- In the MARC::Record distribution and basis for this module.
4330              
4331             MARC::Lintadditons -- Extension of MARC::Lint for checks involving individual tags.
4332             (vs. cross-field checking covered in this module).
4333             Available at http://home.inwave.com/eija (and may be merged into MARC::Lint).
4334              
4335             MARC pages at the Library of Congress (http://www.loc.gov/marc)
4336              
4337             Anglo-American Cataloging Rules, 2nd ed., 2002 revision, plus updates.
4338              
4339             Library of Congress Rule Interpretations to AACR2.
4340              
4341             MARC Report (http://www.marcofquality.com) -- More full-featured commercial program for validating MARC records.
4342              
4343             =head1 LICENSE
4344              
4345             This code may be distributed under the same terms as Perl itself.
4346              
4347             Please note that this module is not a product of or supported by the
4348             employers of the various contributors to the code.
4349              
4350             =head1 AUTHOR
4351              
4352             Bryan Baldus
4353             eijabb@cpan.org
4354              
4355             Copyright (c) 2003-2013
4356              
4357             =cut
4358              
4359             1;
4360              
4361             __END__