File Coverage

blib/lib/Business/LCCN.pm
Criterion Covered Total %
statement 72 74 97.3
branch 18 20 90.0
condition 6 9 66.6
subroutine 18 18 100.0
pod n/a
total 114 121 94.2


line stmt bran cond sub pod time code
1             package Business::LCCN;
2 5     5   25742 use 5.6.1;
  5         17  
  5         243  
3 5     5   29 use Carp qw( carp );
  5         9  
  5         312  
4 5     5   7725 use Moose;
  5         3351684  
  5         45  
5 5     5   40568 use Moose::Util::TypeConstraints;
  5         14  
  5         65  
6 5     5   12022 use Scalar::Util qw( blessed );
  5         14  
  5         333  
7 5     5   6427 use URI;
  5         26675  
  5         169  
8 5     5   49 use strict;
  5         10  
  5         163  
9 5     5   30 use warnings;
  5         11  
  5         669  
10              
11             =head1 NAME
12              
13             Business::LCCN - Work with Library of Congress Control Number (LCCN) codes
14              
15             =head1 VERSION
16              
17             Version 1.01
18              
19             =cut
20              
21             our $VERSION = '1.01';
22              
23             =head1 SYNOPSIS
24              
25             Work with Library of Congress Control Number (LCCN) codes.
26              
27             use Business::LCCN;
28              
29             my $lccn = Business::LCCN->new('he 68001993 /HE/r692');
30             if ($lccn) {
31              
32             # parse LCCN (common fields)
33             print 'Prefix ', $lccn->prefix, "\n"; # "he"
34             print 'Prefix field ', $lccn->prefix_encoded, "\n"; # "he "
35             print 'Year cataloged ', $lccn->year_cataloged, "\n"; # 1968
36             print 'Year field ', $lccn->year_encoded, "\n"; # "68"
37             print 'Serial ', $lccn->serial, "\n"; # "001993"
38              
39             # stringify LCCN:
40              
41             # canonical format: "he 68001993 /HE/r692"
42             print 'Canonical ', $lccn->canonical, "\n";
43              
44             # simple normalized format: "he68001993"
45             print 'Normalized ', $lccn->normalized,"\n";
46              
47             # info: URI: "info:lccn:he68001993"
48             print 'Info URI ', $lccn->info_uri, "\n";
49              
50             # lccn.loc.gov permalink: "http://lccn.loc.gov/he68001993"
51             print 'Permalink ', $lccn->permalink,"\n";
52              
53             # parse LCCN (uncommon fields)
54             print 'LCCN Type ', $lccn->lccn_structure, "\n"; # "A" or "B"
55             print 'Suffix field ', $lccn->suffix_encoded, \n"; # "/HE"
56             print 'Suffix parts ', $lccn->suffix_alphabetic_identifiers,
57             "\n"; # ("HE")
58             print 'Rev year', $lccn->revision_year, "\n"; # 1969
59             print 'Rev year field ',$lccn->revision_year_encoded,
60             "\n"; # "69"
61             print 'Rev number ', $lccn->revision_number,"\n"; # 2
62              
63             } else {
64             print " Error : Invalid LCCN \n ";
65             }
66              
67             =cut
68              
69             use overload
70 5         53 '==' => \&_overload_equality,
71             'eq' => \&_overload_equality,
72 5     5   30 '""' => \&_overload_string;
  5         12  
73              
74             subtype 'LCCN_Year' => as 'Int' => where { $_ >= 1898 };
75             subtype 'LCCN_Serial' => as 'Str' => where {m/^\d{6}$/};
76             enum 'LCCN_Structure' => qw( A B );
77              
78             # normalize syntax at http://www.loc.gov/marc/lccn-namespace.html
79             subtype 'LCCN_Normalized' => as 'Str' =>
80             where {m/^(?:[a-z](?:[a-z](?:[a-z]|\d{2})?|\d\d)?|\d\d)?\d{8}$/};
81             subtype 'URI' => as 'Object' => where { $_->isa('URI') };
82             coerce 'URI' => from 'Str' => via { URI->new($_) };
83              
84             has 'original' => ( is => 'ro', isa => 'Maybe[Str]', required => 1 );
85             has 'lccn_structure' =>
86             ( is => 'ro', isa => 'LCCN_Structure', required => 1 );
87             has 'year_encoded' => ( is => 'ro', isa => 'Str', required => 1 );
88             has 'year_cataloged' =>
89             ( is => 'ro', isa => 'Maybe[LCCN_Year]', required => 0 );
90             has 'prefix' => ( is => 'ro', isa => 'Str', required => 1 );
91             has 'prefix_encoded' => ( is => 'ro', isa => 'Str', required => 1 );
92             has 'serial' => ( is => 'ro', isa => 'LCCN_Serial', required => 1 );
93             has 'suffix_encoded' =>
94             ( is => 'ro', isa => 'Str', required => 1, default => '' );
95             has 'suffix_alphabetic_identifiers' => (
96             is => 'ro',
97             isa => 'ArrayRef[Str]',
98             lazy => 1,
99             default => sub { _suffix_alphabetic_identifiers(@_) },
100             );
101             has 'revision_year' => ( is => 'ro', isa => 'Maybe[Int]', required => 0 );
102             has 'revision_year_encoded' =>
103             ( is => 'ro', isa => 'Str', required => 1, default => '' );
104             has 'revision_number' => ( is => 'ro', isa => 'Maybe[Int]', required => 0 );
105             has 'canonical' => ( is => 'ro',
106             isa => 'Str',
107             lazy => 1,
108             default => sub { _canonical(@_) },
109             );
110             has 'normalized' => ( is => 'ro',
111             isa => 'LCCN_Normalized',
112             lazy => 1,
113             default => sub { _normalized(@_) },
114             );
115             has 'permalink' => ( is => 'ro',
116             isa => 'URI',
117             lazy => 1,
118             default => sub { _permalink(@_) }
119             );
120             has 'info_uri' => ( is => 'ro',
121             isa => 'URI',
122             lazy => 1,
123             default => sub { _info_uri(@_) }
124             );
125              
126             around 'new' => sub {
127             my ( $next, $self, $input, $options ) = @_;
128              
129             unless ( $options and ref $options and ref $options eq 'HASH' ) {
130             $options = {};
131             }
132             my $emit_warnings = !$options->{no_warnings};
133              
134             if ( !defined $input ) {
135             carp q{Received an undefined value as LCCN input.} if $emit_warnings;
136             return;
137             } elsif ( !length $input ) {
138             carp q{Received an empty string as LCCN input.} if $emit_warnings;
139             return;
140             } else {
141             my %out = ( original => $input );
142              
143             # clean up any leading or trailing whitespace
144             $input =~ s/^\s+|\s+$//g;
145              
146             # accept permalinks
147             $input =~ s{^http://lccn.loc.gov/}{};
148              
149             # accept info: uris
150             $input =~ s{^info:lccn/}{};
151              
152             # try LCCN structure B
153             if ($input =~ m{
154             ^
155             ([a-zA-Z\s]{0,2}) # 2-letter alphabetic prefix
156             \s? # space, not officially allowed
157             ([2-9]\d\d\d) # 4-letter year
158             (?:
159             -(\d{1,6}) # hyphen plus 1-6 digit serial number
160             | # or...
161             (\d{6}) # 6 digit serial number
162             )
163             $ }x
164             ) {
165             $out{lccn_structure} = 'B';
166             $out{prefix_encoded} = $1;
167             $out{year_encoded} = $2;
168             $out{serial} = ( defined $3 ? $3 : $4 );
169              
170             $out{year_cataloged} = $out{year_encoded};
171              
172             # try LCCN structure A
173             } elsif (
174             $input =~ m{
175             ^
176             ([a-zA-Z\s]{0,3}) # 3-letter alphabetic prefix
177             (\d\d) # 2-letter year
178             (?:
179             -(\d{1,6}) # hyphen plus 1-6 digit serial number
180             | # or...
181             (\d{6}) # 6 digit serial number
182             )
183             (?:
184             (?:\s|(?!\d)) # blank for supplement
185             (/[A-Z]{1,3})* # suffix/alphabetic identifiers
186             (?://?
187             r(\d\d) # revision year encoded
188             (\d*))? # revision number
189             )?
190             $ }x
191             ) {
192              
193             $out{lccn_structure} = 'A';
194             $out{prefix_encoded} = $1;
195             $out{year_encoded} = $2;
196             $out{serial} = ( defined $3 ? $3 : $4 );
197             $out{suffix_encoded} = ( defined($5) ? $5 : '' );
198             $out{revision_year_encoded} = $6;
199             $out{revision_number} = ( $7 || undef );
200              
201             # per http://www.loc.gov/marc/marbi/dp/dp84.html and
202             # http://en.wikipedia.org/wiki/Library_of_Congress_Control_Number,
203             # the first LCCNs were assigned in 1898, and there were fewer than
204             # 8000 LCCns issued each of those years
205              
206             if ( $out{year_encoded} eq '98' ) {
207             if ( $out{serial} < 3000 ) {
208             $out{year_cataloged} = 1898;
209             } else {
210             $out{year_cataloged} = 1998;
211             }
212             } elsif ( $out{year_encoded} eq '99' ) {
213             if ( $out{serial} < 6000 ) {
214             $out{year_cataloged} = 1899;
215             } else {
216             $out{year_cataloged} = 1999;
217             }
218             } elsif ( $out{year_encoded} eq '00' ) {
219             if ( $out{serial} < 8000 ) {
220             $out{year_cataloged} = 1900;
221             } else {
222             $out{year_cataloged} = 2000;
223             }
224             } elsif ( $out{year_encoded} eq '50' ) {
225             $out{lccn_externally_created_flag} = 1; # zzz
226             } elsif ( $out{year_encoded} =~ m/^7\d$/ ) {
227             if ( _verify_7_checksum( $out{year_encoded}, $out{serial} ) )
228             {
229             $out{lccn_structure_series} = 7;
230             } else {
231             $out{year_cataloged} = $out{year_encoded} + 1900;
232             }
233             } else {
234             $out{year_cataloged} = $out{year_encoded} + 1900;
235             }
236              
237             if ( defined $out{revision_year_encoded}
238             and length $out{revision_year_encoded} ) {
239             if ( $out{revision_year_encoded} == 98
240             or $out{revision_year_encoded} == 99 ) {
241             $out{revision_year} = $out{revision_year_encoded} + 1800;
242             } else {
243             $out{revision_year} = $out{revision_year_encoded} + 1900;
244             }
245             }
246              
247             } else {
248             if ( $input !~ m/\d\d/ ) {
249             carp
250             qq{LCCN input "$input" doesn't contain enough numbers. Please check the input and try again.}
251             if $emit_warnings;
252             } elsif ( $input =~ m/^\s*(0(?:01|10))\b/ ) {
253             carp
254             qq{LCCN input "$input" starts with "$1", suggesting you've copied in part of a MARC record. Please remove MARC record formatting from the LCCN.}
255             if $emit_warnings;
256             } elsif ( $input =~ m/^\s*(\$[ab])\b/ ) {
257             carp
258             qq{LCCN $input "input" starts with "$1", suggesting you've copied in part of a MARC record. Please remove MARC record formatting from the LCCN.}
259             if $emit_warnings;
260             } elsif ( $input =~ m/#/ ) {
261             carp
262             qq{LCCN input "$input" contains "#" characters, which are sometimes used as placeholders for spaces Please remove the "#" characters from the LCCN input.}
263             if $emit_warnings;
264             } elsif ( $input =~ m/^\s*(_[a-z])\b\s*/ ) {
265             carp
266             qq{LCCN input "$input" starts with "$1", which may be MARC formatting. Please remove any such formatting from the LCCN.}
267             if $emit_warnings;
268             } else {
269             carp qq{LCCN input "$input" cannot be parsed.}
270             if $emit_warnings;
271             }
272              
273             return;
274             }
275              
276             my $req_prefix_length = ( $out{lccn_structure} eq 'A' ? 3 : 2 );
277              
278             # fixup serial
279             $out{serial} = sprintf '%06i', $out{serial};
280              
281             # fixup prefix
282             if ( defined $out{prefix_encoded} ) {
283             $out{prefix_encoded} =~ s/^\s+|\s+$//;
284             $out{prefix_encoded} = lc $out{prefix_encoded};
285             unless ( length $out{prefix_encoded} == $req_prefix_length ) {
286             $out{prefix_encoded} .= ' '
287             x ( $req_prefix_length - length $out{prefix_encoded} );
288             }
289              
290             $out{prefix} = $out{prefix_encoded};
291             $out{prefix} =~ s/\s+//g;
292             }
293              
294             # fixup suffix
295             if ( !defined $out{suffix_encoded} ) {
296             $out{suffix_encoded} = '';
297             }
298              
299             # fixup revision year
300             if ( !defined $out{revision_year_encoded} ) {
301             $out{revision_year_encoded} = '';
302             }
303              
304             $next->( $self, \%out );
305             }
306             };
307              
308             sub _canonical {
309 62     62   107 my $self = shift;
310 62 100       2280 if ( $self->lccn_structure eq 'B' ) {
    50          
311             return
312 19         622 sprintf( "%- 2s%4i%06i",
313             $self->prefix, $self->year_encoded, $self->serial );
314             } elsif ( $self->lccn_structure eq 'A' ) {
315 43         1568 my $string =
316             sprintf( "%- 3s%02i%06i %s",
317             $self->prefix, $self->year_encoded,
318             $self->serial, $self->suffix_encoded
319             );
320              
321 43 100       1722 if ( length $self->revision_year_encoded ) {
322 5 100       170 if ( !length $self->suffix_encoded ) {
323 2         5 $string .= '/';
324             }
325 5         177 $string .= '/r' . $self->revision_year_encoded;
326 5 100       173 if ( $self->revision_number ) {
327 2         66 $string .= $self->revision_number;
328             }
329             }
330              
331 43         1678 return $string;
332             } else { # should never get here
333 0         0 return '';
334             }
335             }
336              
337 5     5   21101 no Moose; # remove Moose keywords
  5         19  
  5         89  
338              
339             # normalize documented at http://www.loc.gov/marc/lccn-namespace.html
340             # and http://lccn.loc.gov/lccnperm-faq.html
341             sub _normalized {
342 526     526   830 my $self = shift;
343 526         18993 my $string = join '', $self->prefix, $self->year_encoded, $self->serial;
344 526         1933 $string =~ s/[\s-]//g;
345 526         23147 return $string;
346             }
347              
348             # permalink syntax documented at http://lccn.loc.gov/lccnperm-faq.html
349             sub _permalink {
350 1     1   3 my $self = shift;
351 1         31 return URI->new( 'http://lccn.loc.gov/' . $self->normalized );
352             }
353              
354             # info: uri syntax documented at http://www.loc.gov/standards/uri/info.html
355             sub _info_uri {
356 1     1   2 my $self = shift;
357 1         76 return URI->new( 'info:lccn/' . $self->normalized );
358             }
359              
360             sub _overload_string {
361 62     62   106224 my $self = shift;
362 62         2399 return $self->canonical;
363             }
364              
365             sub _overload_equality {
366 464     464   359867 my ( $self, $other ) = @_;
367              
368 464         685 my $other_lccn;
369 464 100 66     4827 if ( ref($other) and blessed($other) and $other->isa('Business::LCCN') ) {
      66        
370 232         432 $other_lccn = $other;
371             } else {
372 232         1444 $other_lccn = new Business::LCCN($other);
373             }
374              
375 464 50       469645 if ( !defined $other_lccn ) {
376 0         0 return 0;
377             } else {
378 464         19346 return ( $self->normalized eq $other_lccn->normalized );
379             }
380             }
381              
382             # returns a list of all the suffix alphabetic identifiers
383             sub _suffix_alphabetic_identifiers {
384 62     62   121 my $self = shift;
385 62 100       216 if ( length $self->{suffix_encoded} ) {
386 3         123 my @identifiers = $self->suffix_encoded =~ m{\b([A-Z]+)\b};
387 3         125 return \@identifiers;
388             } else {
389 59         2245 return [];
390             }
391             }
392              
393             sub _verify_7_checksum {
394 407     407   829 my ( $year_encoded, $serial ) = @_;
395 407 100 66     3370 unless ( $year_encoded =~ m/^\d{2}$/
396             and $serial =~ m/^\d{6}$/ ) {
397 41         141 return 0;
398             }
399              
400 366         3111 my @year_digits = split //, $year_encoded;
401 366         1619 my @serial_digits = split //, $serial;
402              
403 366         2464 my $product
404             = $year_digits[0] * 7
405             + $year_digits[1] * 8
406             + $serial_digits[0] * 4
407             + $serial_digits[1] * 6
408             + $serial_digits[2] * 3
409             + $serial_digits[3] * 5
410             + $serial_digits[4] * 2
411             + $serial_digits[5] * 1;
412              
413 366 100       1049 if ( $product % 11 == 0 ) {
414 2         12 return 1;
415             } else {
416 364         1584 return 0;
417             }
418             }
419              
420             =head1 INTERFACE
421              
422             =head2 Methods
423              
424             =head3 C<new>
425              
426             The new method takes a single encoded LCCN string, in a variety of
427             formats -- with or without hyphens, with proper spacing or without.
428             Examples:
429              
430             "89-1234", "89-001234", "89001234", "2002-1234", "2002-001234",
431             "2002001234", " 89001234 ", " 2002001234", "a89-1234",
432             "a89-001234", "a89001234", "a2002-1234", "a2002-001234",
433             "a2002001234", "a 89001234 ", "a 2002001234", "ab98-1234",
434             "ab98-001234", "ab98001234", "ab2002-1234", "ab2002-001234",
435             "ab2002001234", "ab 98001234 ", "ab 2002001234", "abc89-1234",
436             "abc89-001234", "abc89001234", "abc89001234 ", permalinks URLs
437             like "http://lccn.loc.gov/2002001234" and info URIs like
438             "info:lccn/2002001234"
439              
440             Returns a Business::LCCN object, or undef if the string can't be
441             parsed as a valid LCCN. If the string can't be parsed, C<new> will
442             warn with a diagnostic message explaining why the string was invalid.
443              
444             C<new> can also take an optional hashref of options as a second
445             parameter. The only option supported is C<no_warnings>, which will
446             disable any diagnostic warnings explaining why a candidate LCCN string
447             was invalid:
448              
449             # returns undef, issues warning about input not containing any digits
450             $foo = LCCN->new('x');
451              
452             # returns undef, but does not issue any additional warning
453             $bar = LCCN->new( 'x', { no_warnings => 1 } );
454              
455             =head3 LCCN attributes
456              
457             =head3 C<lccn_structure>
458              
459             LCCN structure type, either "A" (issued 1898-2000) or "B" (issued
460             2001-).
461              
462             =head3 C<prefix>
463              
464             LCCN's alphabetic prefix, 1-3 characters long. Returns an empty string
465             if LCCN has no prefix.
466              
467             =head3 C<prefix_encoded>
468              
469             The prefix as encoded, either two (structure A) or three (structure B)
470             characters long, space-padded.
471              
472             =head3 C<year_cataloged>
473              
474             The year a book was cataloged. Returns an undef in cases where the
475             cataloging year in unclear. For example, LCCN S<" 75425165 //r75">
476             has a cataloged year of 1975.
477              
478             =head3 C<year_encoded>
479              
480             A two (structure A) or four (structure B) digit string typically
481             representing the year the book was cataloged, but sometimes serving as
482             a checksum, or a source code. For example, LCCN S<" 75425165 //r75">
483             has an encoded year field of S<"75">.
484              
485             =head3 C<serial>
486              
487             A six-digit number zero-padded serial number. For example, LCCN
488             S<" 75425165 //r75"> has a serial number of S<"425165">.
489              
490             =head3 C<suffix_alphabetic_identifiers>
491              
492             Structure A LCCNs can include one or more 1-3 character
493             suffix/alphabetic identifiers. Returns a list of all identifiers
494             present. For example, for LCCN S<" 79139101 /AC/MN">,
495             suffix_alphabetic_identifiers returns ('AC', 'MN').
496              
497             =head3 C<suffix_encoded>
498              
499             The LCCN's suffix/alphabetic identifier field, as encoded in the LCCN.
500             Returns an empty string if no suffix present.
501              
502             =head3 C<revision_year>
503              
504             Structure A LCCNs can include a revision date in their
505             bibliographic records. Returns the four-digit year the record was
506             revised, or undef if not present. For example, LCCN
507             S<" 75425165 //r75"> has a revision year of 1975.
508              
509             =head3 C<revision_year_encoded>
510              
511             The two-letter revision date, as encoded in structure A LCCNs. Returns
512             an empty string if no revision year present. For example, LCCN
513             S<" 75425165 //r75"> has a revision year of C<"75">.
514              
515             =head3 C<revision_number>
516              
517             Some structure A LCCNs have a revision year and number,
518             representing the number of times the record has been revised. For
519             example, LCCN S<" 75425165 //r752"> has revision_number 2. Returns
520             undef if not present.
521              
522             =head3 LCCN representations
523              
524             =head3 C<canonical>
525              
526             Returns the canonical 12+ character default representation of an
527             LCCN. For example, S<" 85000002 "> is the canonical representation of
528             S<"85000002">, S<"85-000002">, S<"85-2">, S<" 85000002">.
529              
530             =head3 C<normalized>
531              
532             Returns the normalized 9-12 character representation of an LCCN.
533             Normalized LCCNs are often used in URIs and Internet-era
534             representations. For example, S<"n2001050268"> is the normalized
535             representation of S<"n 85-000002 ">, S<"n85-2">, S<"n 85-0000002">.
536              
537             =head3 C<info_uri>
538              
539             Returns the info: URI for an LCCN. For example, the URI for LCCN
540             S<"n 85-000002 "> is S<"info:lccn/n85000002">.
541              
542             =head3 C<original>
543              
544             Returns the original representation of the LCCN, as passed to C<new>.
545              
546             =head3 C<permalink>
547              
548             Returns the Library of Congress permalink URL for an LCCN. For
549             example, the permalink URL for LCCN S<"n 85-000002 "> is
550             S<"http://lccn.loc.gov/n85000002">.
551              
552             =head2 Operator overloading
553              
554             =head3 C<"">
555              
556             In string context, Business::LCCN objects stringify as the
557             canonical representation of the LCCN.
558              
559             =head3 C<eq>, C<==>
560              
561             Business::LCCN objects can be compared to other Business::LCCN
562             objects or LCCN strings.
563              
564             =head1 SEE ALSO
565              
566             L<Business::ISBN>, L<http://www.loc.gov/marc/lccn_structure.html>,
567             L<http://lccn.loc.gov/>,
568             L<http://www.loc.gov/standards/uri/info.html>,
569             L<http://en.wikipedia.org/wiki/Library_of_Congress_Control_Number>
570              
571             =head1 DIAGNOSTICS
572              
573             Running C<new> on invalid input may generate warnings, unless the
574             C<no_warnings> option is set.
575              
576             =head1 AUTHOR
577              
578             Anirvan Chatterjee, C<< <anirvan at cpan.org> >>
579              
580             =head1 BUGS
581              
582             Please report any bugs or feature requests to
583             C<bug-business-lccn at rt.cpan.org>, or through the web interface at
584             L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Business-LCCN>. I
585             will be notified, and then you'll automatically be notified of
586             progress on your bug as I make changes.
587              
588             =head1 SUPPORT
589              
590             You can find documentation for this module with the perldoc command.
591              
592             perldoc Business::LCCN
593              
594             You can also look for information at:
595              
596             =over 4
597              
598             =item * RT: CPAN's request tracker
599              
600             L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=Business-LCCN>
601              
602             =item * AnnoCPAN: Annotated CPAN documentation
603              
604             L<http://annocpan.org/dist/Business-LCCN>
605              
606             =item * CPAN Ratings
607              
608             L<http://cpanratings.perl.org/d/Business-LCCN>
609              
610             =item * Search CPAN
611              
612             L<http://search.cpan.org/dist/Business-LCCN>
613              
614             =back
615              
616             =head1 COPYRIGHT & LICENSE
617              
618             Copyright 2008 Anirvan Chatterjee, all rights reserved.
619              
620             This program is free software; you can redistribute it and/or modify it
621             under the same terms as Perl itself.
622              
623             =cut
624              
625             1; # End of Business::LCCN
626              
627             # Local Variables:
628             # mode: perltidy
629             # End: