File Coverage

blib/lib/Library/CallNumber/LC.pm
Criterion Covered Total %
statement 125 131 95.4
branch 45 54 83.3
condition 8 13 61.5
subroutine 16 16 100.0
pod 9 9 100.0
total 203 223 91.0


line stmt bran cond sub pod time code
1             package Library::CallNumber::LC;
2              
3 3     3   32057 use warnings;
  3         6  
  3         105  
4 3     3   13 use strict;
  3         3  
  3         96  
5 3     3   2918 use Math::BigInt;
  3         63529  
  3         16  
6              
7             =head1 NAME
8              
9             Library::CallNumber::LC - Deal with Library-of-Congress call numbers
10              
11             =head1 VERSION
12              
13             Version 0.23;
14              
15             =cut
16              
17             our $VERSION = '0.23';
18              
19              
20             =head1 SYNOPSIS
21              
22             Utility functions to deal with Library of Congress Call Numbers
23              
24             use Library::CallNumber::LC;
25             my $a = Library::CallNumber::LC->new('A 123.4 .c11');
26             print $a->normalize; # normalizes for string comparisons.
27             # gives 'A01234 C11'
28             print $a->start_of_range; # same as "normalize"
29             my $b = Library::CallNumber::LC->new('B11 .c13 .d11');
30             print $b->normalize;
31             # gives 'B0011 C13 D11'
32             my @range = ($a->start_of_range, $b->end_of_range);
33             # end of range is 'B0011 C13 D11~'
34            
35             # Get components suitable for printing (e.g., number and decimal joined, leading dot on first cutter)
36             @comps = Library::CallNumber::LC->new('A 123.4 .c11')->components()
37            
38             # Same thing, but return empty strings for missing components (e.g., the cutters)
39             @comps = Library::CallNumber::LC->new('A 123.4 .c11')->components('true');
40              
41             =head1 ABSTRACT
42              
43             Library::CallNumber::LC is mostly designed to do call number normalization, with the following goals:
44              
45             =over 4
46              
47             =item * The normalized call numbers are comparable with each other, for proper sorting
48              
49             =item * The normalized call number is a short as possible, so left-anchored wildcard searches are possible (e.g., searching on "A11*" should give you all the A11 call numbers)
50              
51             =item * A range defined by start_of_range and end_of_range should be correct, assuming that the string given for the end of the range is, in fact, a left prefix.
52              
53             =back
54              
55             That last point needs some explanation. The idea is that if someone gives a range of, say, A-AZ, what they really mean is A - AZ9999.99. The end_of_range method generates a key which lies immediately beyond the last possible key for a given starting point. There is no attempt to make end_of_range normalization correspond to anything in real life.
56              
57             =cut
58              
59             # Set up the prefix mapping for longints
60             my %intmap;
61             my $i = 0;
62             foreach my $prefix (qw(a aa ab abc ac ae ag ah ai al am an anl ao ap aq arx as at aug aw awo ay az b bc bd bf bg bh bj bl bm bn bp bq br bs bt bu bv bx c cb cc cd ce cg cis cj cmh cmm cn cr cs ct cz d da daa daw db dc dd de df dff dg dh dj djk dk dkj dl doc dp dq dr ds dt dth du dx e ea eb ec ed ee ek ep epa ex f fb fc fem fg fj fnd fp fsd ft ful g ga gb gc gda ge gf gh gn gr gs gt gv h ha hb hc hcg hd he hf hfs hg hh hhg hj hjc hm hmk hn hq hs ht hv hx i ia ib iid ill ilm in ioe ip j ja jan jb jc jf jg jh jhe jj jk jkc jl jln jn jq js jv jx jz k kb kbm kbp kbq kbr kbu kc kd kdc kde kdg kdk kds kdz ke kea keb kem ken keo keq kes kf kfa kfc kfd kff kfg kfh kfi kfk kfl kfm kfn kfo kfp kfr kfs kft kfu kfv kfw kfx kfz kg kga kgb kgc kgd kge kgf kgg kgh kgj kgk kgl kgn kgq kgs kgt kgv kgx kh kha khc khd khf khh khk khp khq khu khw kit kj kja kjc kje kjg kjj kjk kjm kjn kjp kjq kjr kjs kjt kjv kjw kk kka kkb kkc kke kkf kkg kkh kki kkj kkm kkn kkp kkq kkr kks kkt kkv kkw kkx kky kkz kl kla klb kld kle klf klg klh klm kln klp klr kls klt klv klw km kmc kme kmf kmh kmj kmk kml kmm kmn kmo kmp kmq kmt kmu kmv kmx kn knc knd kne knf kng knh knk knl knm knn knp knq knr kns knt knu knw knx kny kp kpa kpc kpe kpf kpg kph kpj kpk kpl kpm kpp kps kpt kpv kpw kq kqc kqe kqg kqj kqk kqp kqw krb krc krg krm krn krp krr krs kru krv krx ks ksa ksc ksh ksj ksk ksl ksp kss kst ksv ksw ksx ksy kta ktd ktg ktj ktk ktl ktq ktr ktt ktu ktv ktw ktx kty ktz ku kuc kuq kvc kvf kvm kvn kvp kvq kvr kvs kvw kwc kwg kwh kwl kwp kwr kww kwx kz kza kzd l la law lb lc ld le lf lg lh lj ll ln lrm lt lv m may mb mc me mf mh mkl ml mpc mr ms mt my n na nat nax nb nc nd nda nds ne ner new ng nh nk nl nmb nn no nt nv nx ok onc p pa pb pc pcr pd pe pf pg ph phd pj pjc pk pl pm pn pnb pp pq pr ps pt pz q qa qb qc qd qe qh qk ql qm qp qr qry qu qv r ra rb rbw rc rcc rd re ref res rf rg rh rj rk rl rm rn rp rs rt rv rx rz s sb sd see sf sfk sgv sh sk sn sql sw t ta tc td tdd te tf tg tgg th tj tk tl tn tnj to tp tr ts tt tx tz u ua ub uc ud ue uf ug uh un use v va vb vc vd ve vf vg vk vla vm w wq x xp xx y yh yl yy z za zhn zz zzz)) {
63             $intmap{$prefix} = $i;
64             $i++;
65             }
66              
67             # Regexp constants to deal with matching LC and variants
68              
69             my $lcregex = qr/^
70             \s*
71             (?:VIDEO-D)? # for video stuff
72             (?:DVD-ROM)? # DVDs, obviously
73             (?:CD-ROM)? # CDs
74             (?:TAPE-C)? # Tapes
75             \s*
76             ([A-Z]{1,3}) # alpha
77             \s*
78             (?: # optional numbers with optional decimal point
79             (\d+)
80             (?:\s*?\.\s*?(\d+))?
81             )?
82             \s*
83             (\d+[stndrh]*)? # optional extra numbering including suffixes (1st, 2nd, etc.)
84             \s*
85             (?: # optional cutter
86             (\.)? \s* # optional decimal
87             ([A-Z]) # cutter letter
88             \s*
89             (\d+ | \Z)? # cutter numbers
90             )?
91             \s*
92             (?: # optional cutter
93             \.? \s*
94             ([A-Z]) # cutter letter
95             \s*
96             (\d+ | \Z)? # cutter numbers
97             )?
98             \s*
99             (?: # optional cutter
100             \.? \s*
101             ([A-Z]) # cutter letter
102             \s*
103             (\d+ | \Z)? # cutter numbers
104             )?
105             (\s+.+?)? # everthing else
106             \s*$
107             /x;
108              
109              
110              
111             my $weird = qr/
112             ^
113             \s*[A-Z]+\s*\d+\.\d+\.\d+
114             /x;
115              
116             # Class variables for top/bottom sort chars
117             my $Topper = ' '; # must sort before 'A'
118             my $Bottomer = '~'; # must sort after 'Z' and '9'
119              
120              
121             =head1 CONSTRUCTORS
122              
123             =head2 new([call_number_text, [topper_character, [bottomer_character]]]) -- create a new object, optionally passing in the initial string, a "topper", and a "bottomer" (explained below)
124              
125             =cut
126              
127             sub new {
128 7     7 1 6516 my $proto = shift;
129 7   33     41 my $class = ref($proto) || $proto;
130 7   100     26 my $lc = shift || '';
131 7         10 my $topper = shift;
132 7 100       22 $topper = $Topper if !defined($topper); # ZERO is false but might be a reasonable value, so we need this more specific check
133 7   66     24 my $bottomer = shift || $Bottomer;
134 7         26 my $self = {
135             callno => $lc,
136             topper => $topper,
137             bottomer => $bottomer
138             };
139 7         19 bless $self, $class;
140 7         22 return $self;
141             }
142              
143              
144             =head1 BASIC ACCESSORS
145              
146             =head2 call_number([call_number_text])
147              
148             The text of the call number we are dealing with.
149              
150             =cut
151              
152             sub call_number {
153 2     2 1 4 my $self = shift;
154 2 100       7 if (@_) { $self->{callno} = uc(shift) }
  1         3  
155 2         5 return $self->{callno};
156             }
157              
158             =head2 topper([character])
159              
160             Specify which character occupies the 'always-sort-to-the-top' slots in the sort keys. Defaults to the SPACE character, but can reasonably be anything with an ASCII value lower than 48 (i.e. the 'zero' character, '0'). This can function as either a class or instance method depending on need.
161              
162             =cut
163              
164             sub topper {
165 402     402 1 376 my $self = shift;
166 402         333 my $topper = shift;
167 402 100       637 if (ref $self) {
168 397 100       615 $self->{topper} = $topper if $topper; # just myself
169 397         764 return $self->{topper};
170             } else {
171 5 100       11 $Topper = $topper if $topper; # whole class
172 5         6 return $Topper;
173             }
174             }
175              
176             =head2 bottomer([character])
177              
178             Specify which character occupies the 'always-sort-to-the-bottom' slots in the sort keys. Defaults to the TILDE character, but can reasonably be anything with an ASCII value higher than 90 (i.e. 'Z'). This can function as either a class or instance method depending on need.
179              
180             =cut
181              
182             sub bottomer {
183 153     153 1 173 my $self = shift;
184 153         144 my $bottomer = shift;
185 153 100       227 if (ref $self) {
186 150 100       261 $self->{bottomer} = $bottomer if $bottomer; # just myself
187 150         256 return $self->{bottomer};
188             } else {
189 3 100       6 $Bottomer = $bottomer if $bottomer; # whole class
190 3         4 return $Bottomer;
191             }
192             }
193              
194             =head1 OTHER METHODS
195              
196             =head2 components(boolean returnAll = false)
197              
198             @comps = Library::CallNumber::LC->new('A 123.4 .c11')->components($returnAll)
199              
200             Returns an array of the individual components of the call number (or undef if it doesn't look like a call number).
201             Components are:
202              
203             =over 4
204              
205             =item * alpha
206              
207             =item * number (numeric.decimal)
208              
209             =item * cutter1
210              
211             =item * cutter2
212              
213             =item * cutter3
214              
215             =item * "extra" (anything after the cutters)
216              
217             =back
218              
219             The optional argument (false by default) determines whether or not empty components (e.g.,
220             extra cutters) get a slot in the returned list.
221              
222             =cut
223              
224             sub components {
225 4     4 1 4 my $self = shift;
226 4         8 my $returnAll = shift;
227 4         10 my $lc = $self->{callno};
228              
229 4 50       37 return undef if ($lc =~ $weird);
230 4 50       81 return undef unless ($lc =~ $lcregex);
231              
232              
233 4         31 my ($alpha, $num, $dec, $othernum, $c1dec, $c1alpha, $c1num, $c2alpha, $c2num, $c3alpha, $c3num, $extra) = ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12);
234              
235             #combine stuff if need be
236            
237 4 100       10 if ($dec) {
238 2         5 $num .= '.' . $dec;
239             }
240            
241 3     3   38146 no warnings;
  3         5  
  3         241  
242 4         9 my $c1 = join('', $c1alpha, $c1num);
243 4         6 my $c2 = join('', $c2alpha, $c2num);
244 4         9 my $c3 = join('', $c3alpha, $c3num);
245            
246 3     3   15 use warnings;
  3         5  
  3         626  
247              
248 4 100       11 $c1 = '.' . $c1 if $c1dec;
249              
250 4         6 my @return;
251 4         9 foreach my $comp ($alpha, $num, $othernum, $c1, $c2, $c3, $extra) {
252 28 100       44 $comp = '' unless (defined $comp);
253 28 100 66     91 next unless ($comp =~ /\S/ or $returnAll);
254 18         46 $comp =~ m/^\s*(.*?)\s*$/;
255 18         30 $comp = $1;
256 18         31 push @return, $comp;
257             }
258 4         84 return @return;
259             }
260              
261             =head2 _normalize(call_number_text)
262              
263             Base function to perform normalization.
264              
265             =cut
266              
267             sub _normalize {
268 255     255   243 my $self = shift;
269 255         333 my $lc = uc(shift);
270              
271 255         366 my $topper = $self->topper;
272              
273             # return undef if ($lc =~ $weird);
274 255 50       3534 return undef unless ($lc =~ $lcregex);
275            
276 255         1266 my ($alpha, $num, $dec, $othernum, $c1dec, $c1alpha, $c1num, $c2alpha, $c2num, $c3alpha, $c3num, $extra) = ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12);
277              
278 3     3   21 no warnings;
  3         3  
  3         1984  
279 255         289 my $class = $alpha;
280 255 100       825 $class .= sprintf('%04s', $num) if $num;
281 255 100       401 $class .= $dec if $dec;
282 255         324 my $c1 = $c1alpha.$c1num;
283 255         291 my $c2 = $c2alpha.$c2num;
284 255         289 my $c3 = $c3alpha.$c3num;
285              
286             # normalize extra (most commonly years/numbering, benefits from padding)
287             # this could be reduced to a four digit pad, as very, very few numbers
288             # reach 10000, but we'll be conservative here (for now)
289 255         390 $extra =~ s/^\s+//g;
290 255         264 $extra =~ s/\.\s+/./g;
291 255         236 $extra =~ s/(\d)\s*-\s*(\d)/$1-$2/g;
292 255         305 $extra =~ s/(\d+)/sprintf("%05s", $1)/ge;
  48         144  
293 255 100       535 $extra = $topper . $extra if ($extra ne ''); # give the extra less 'weight' for falling down the list
294            
295             # pad out othernum (again, conservatively)
296 255         322 $othernum =~ s/(\d+)/sprintf("%05s", $1)/ge;
  25         74  
297              
298 255         358 return join($topper, grep {/\S/} ($class, $othernum, $c1, $c2, $c3, $extra));
  1530         3406  
299             }
300              
301             =head2 normalize([call_number_text])
302              
303             Normalize the stored or passed call number as a sortable string
304              
305             =cut
306              
307             sub normalize {
308 249     249 1 51391 my $self = shift;
309 249         270 my $lc = shift;
310 249 100       472 $lc = $lc? uc($lc) : $self->{callno};
311 249         434 return $self->_normalize($lc)
312             }
313              
314             =head2 start_of_range([call_number_text])
315              
316             Alias for normalize
317              
318             =cut
319              
320             sub start_of_range {
321 2     2 1 5 my $self = shift;
322 2         3 return $self->normalize(@_);
323             }
324              
325             =head2 end_of_range([call_number_text])
326              
327             Downshift an lc number so it represents the end of a range
328              
329             =cut
330              
331             sub end_of_range {
332 6     6 1 11 my $self = shift;
333 6         6 my $lc = shift;
334 6 100       13 $lc = $lc? uc($lc) : $self->{callno};
335 6         11 my $bottomer = $self->bottomer;
336 6         10 return $self->_normalize($lc) . $bottomer;
337             }
338              
339             =head2 toLongInt(call_number_text, num_digits)
340              
341             Attempt to turn a call number into an integer value. Possibly useful for fast range checks, although obviously not perfectly accurate. Optional argument I<$num_digits> can be used to control the number of digits used, and therefore the precision of the results.
342              
343             =cut
344              
345             my $minval = new Math::BigInt('0'); # set to zero until this code matures
346             my $minvalstring = $minval->bstr;
347              
348             # this is a work in progress, with room for improvement in both exception
349             # logic and overall economy of bits
350             sub toLongInt {
351 142     142 1 5427 my $self = shift;
352 142         143 my $lc = shift;
353 142   50     729 my $num_digits = shift || 19; # 19 is a max-fit for 64-bits within our scope
354              
355 142         242 my $topper = $self->topper;
356 142         233 my $bottomer = $self->bottomer;
357              
358             #print "$lc\n";
359 142         166 my $topper_ord = ord($topper);
360 142         225 my $long = $self->normalize($lc);
361              
362 142 50       270 return $minvalstring unless ($long);
363              
364 142         125 my ($alpha, $num, $rest);
365 142 100       552 if ($long =~ /^([A-Z]+)(\d{4})(.*)$/) { # we have a 'full' call number
    50          
366 131         345 ($alpha, $num, $rest) = (lc($1), $2, $3);
367             } elsif ($long =~ /^([A-Z]+)(.*)$/) { # numberless class; generally invalid, but let it slide for now
368 11         29 ($alpha, $rest) = (lc($1), $2);
369 11 50       52 if ($rest =~ /^$bottomer/) { # bottomed-out class
370 0         0 $num = '9999';
371             } else {
372 11         21 $num = '0000';
373             }
374             }
375 142         160 my $class_int_string = '';
376 142 50       325 if (defined($intmap{$alpha})) {
377 142         243 $class_int_string .= $intmap{$alpha} . $num;
378             } else {
379 0         0 warn "Unknown prefix '$alpha'\n";
380 0         0 return $minvalstring;
381             }
382 142         126 my $rest_int_string = '';
383 142         121 my $bottomout;
384 142         497 foreach my $char (split('', $rest)) {
385 1103 50       1828 if ($char eq $bottomer) {
386 0         0 $bottomout = 1;
387 0         0 last;
388             }
389 1103         1967 $rest_int_string .= sprintf('%02d', ord($char) - $topper_ord);
390             }
391              
392 142         338 $rest_int_string = substr($rest_int_string, 0, $num_digits - 7); # Reserve first seven digits for $alpha and $num
393 142 50       201 if ($bottomout) {
394 0         0 $rest_int_string .= '9' x (($num_digits - 7) - length($rest_int_string)); # pad it if need be
395             } else {
396 142         256 $rest_int_string .= '0' x (($num_digits - 7) - length($rest_int_string)); # pad it if need be
397             }
398              
399             # print " $long => ", join('', @rv), "\n";
400 142         547 my $longint = Math::BigInt->new($class_int_string . $rest_int_string);
401 142         8935 $longint->badd($minval);
402             # warn "\n\n".$self->_normalize($lc)." = ".$longint->bstr." ( $class_int_string + $rest_int_string) \n\n";
403 142         6575 return $longint->bstr;
404            
405             }
406              
407              
408              
409             =head1 AUTHOR
410              
411             Current Maintainer: Dan Wells, C<< >>
412             Original Author: Bill Dueber, C<< >>
413              
414             =head1 BUGS
415              
416             Please report any bugs or feature requests through the web interface at
417             L. I will be
418             notified, and then you'll automatically be notified of progress on your bug as
419             I make changes.
420              
421              
422             =head1 SUPPORT
423              
424             You can find documentation for this module with the perldoc command.
425              
426             perldoc Library::CallNumber::LC
427              
428              
429             You can also look for information at the Google Code page:
430              
431             http://code.google.com/p/library-callnumber-lc/
432              
433              
434             =head1 COPYRIGHT & LICENSE
435              
436             Copyright 2009 Bill Dueber, all rights reserved.
437             Copyright 2011 Dan Wells, all rights reserved.
438              
439             This program is free software; you can redistribute it and/or modify it
440             under the same terms as Perl itself and also under the new BSD license
441             as described at http://www.opensource.org/licenses/bsd-license.php
442              
443              
444             =cut
445              
446             1; # End of Library::CallNumber::LC