File Coverage

blib/lib/Text/Shoebox/Entry.pm
Criterion Covered Total %
statement 139 174 79.8
branch 27 48 56.2
condition 19 27 70.3
subroutine 30 35 85.7
pod 23 23 100.0
total 238 307 77.5


line stmt bran cond sub pod time code
1              
2             require 5;
3             package Text::Shoebox::Entry;
4 3     3   17 use strict;
  3         8  
  3         150  
5 3     3   15 use vars qw(@ISA $Debug $VERSION);
  3         6  
  3         197  
6 3     3   16 use integer;
  3         6  
  3         22  
7 3     3   86 use Text::Shoebox 1.02 ();
  3         59  
  3         82  
8 3     3   17 use Carp ();
  3         5  
  3         125  
9              
10             $Debug = 0 unless defined $Debug;
11             BEGIN {
12 3     3   8265 $VERSION = "1.02";
13             }
14             my $Array_Class;
15              
16             unless($Text::Shoebox::Lexicon::VERSION) { require Text::Shoebox::Lexicon; }
17              
18             ###########################################################################
19              
20             =head1 NAME
21              
22             Text::Shoebox::Entry - class for Shoebox SF lexicon entries
23              
24             =head1 SYNOPSIS
25              
26             use Text::Shoebox::Lexicon;
27             my $lex = Text::Shoebox::Lexicon->read_file( "haida.sf" );
28              
29             foreach my $entry ($lex->entries) {
30             #
31             # Each $entry is a Text::Shoebox::Entry object
32             #
33             my %e = $entry->as_list;
34             print "Looky, stuff! [", %e, "]!\n";
35             }
36              
37             =head1 DESCRIPTION
38              
39             An object of this class represents an entry in an SF lexicon
40             (L).
41              
42             An entry consists of a number of fields. Each field has two scalars,
43             a key, and a value. The first field in an entry is considered the
44             headword field, and its key must occur there and only there in that
45             entry. There is no requirement on uniqueness of keys in the rest of
46             the entry.
47              
48              
49             =head1 METHODS
50              
51              
52             =over
53              
54             =item $entry = Text::Shoebox::Entry->new();
55              
56             =item $entry = Text::Shoebox::Entry->new( 'foo' => 'bar', 'baz' => 'quux' );
57              
58             The C method creates a new Text::Shoebox::Entry method. If you
59             provide parameters, as in the second example, those are used as the
60             contents of the new object.
61              
62             Normally you don't need to expressly create objects of this class,
63             as Text::Shoebox::Lexicon will create them as needed when you call
64             a Text::Shoebox::Lexicon C or C method.
65              
66             =item $entry2 = $entry->copy
67              
68             This returns a copy of the object in $entry.
69              
70             =cut
71              
72             sub new { # Text::Shoebox is free to not use this, for speed's sake
73 22     22 1 3916 my $class = shift;
74 22   33     186 $class = ref($class) || $class; # be an object or a class method
75 22 50       47 print "New object in class $class\n" if $Debug;
76 22 50 33     83 if(@_ == 1 and ref($_[0]) eq 'ARRAY') {
77             # listref form -- call as: Text::Shoebox::Entry->new([foo => 'bar']);
78 0         0 return bless $_[0], $class;
79             } else {
80             # list form -- call as: Text::Shoebox::Entry->new(foo => 'bar');
81 22         158 return bless [@_], $class;
82             }
83             }
84              
85             sub copy {
86 1     1 1 2 my $original = $_[0];
87 1 50       5 Carp::croak("Text::Shoebox::entry is strictly an object method.")
88             unless ref $original;
89 1         7 return bless( [@$original], ref($original) );
90             # bless into the same class as the original
91             # presumably a deep copy isn't necessary!
92             }
93              
94             #--------------------------------------------------------------------------
95              
96             =item @keys = $entry->keys;
97              
98             This returns the names of all the keys (a/k/a fieldnames) in this entry.
99             For example, if $entry is
100             C<< ('hw' => 'foo', 'ex' => 'Things', 'ex' => "Stuff") >>,
101             then $entry->keys will return the list C<('hw', 'ex', 'ex')>.
102              
103             =item @values = $entry->values;
104              
105             This returns the values of all fields in this entry.
106             For example, if $entry is
107             C<< ('hw' => 'foo', 'ex' => 'Things', 'ex' => "Stuff") >>,
108             then $entry->values will return the list C<('foo', 'Things', 'Stuff')>.
109              
110             =cut
111              
112             sub keys {
113 2     2 1 436 my @out;
114 2         7 for(my $i = 0; $i < @{$_[0]}; $i += 2) { push @out, $_[0][$i] }
  7         24  
  5         13  
115 2         26 return @out;
116             }
117              
118             sub values {
119 1     1 1 616 my @out;
120 1         3 for(my $i = 1; $i < @{$_[0]}; $i += 2) { push @out, $_[0][$i] }
  4         12  
  3         16  
121 1         5 return @out;
122             }
123              
124             #--------------------------------------------------------------------------
125              
126             =item $headword = $entry->headword;
127              
128             This returns the headword value of this entry. This is basically
129             a shortcut for C<< ($entry->values)[0] >>
130              
131             =item $headword_field = $entry->headword_field;
132              
133             This returns the fieldname of this entry's headword field. This
134             is basically a shortcut for C<< ($entry->keys)[0] >>
135              
136             =item $value = $entry->_(I)
137              
138             Yes, this method really is called _ !
139              
140             This gets all the values of the pairs that have the key I. How
141             it returns those values (of which there may be none, one, or many) depends
142             on context:
143              
144             In list context, this simply returns the list of found values.
145             E.g., if $entry is
146             C<< ('hw' => 'foo', 'ex' => 'Things', 'ex' => "Stuff") >>,
147             then C<< $entry->_('hw') >> returns the one-item list C<('foo')>,
148             then C<< $entry->_('zorp') >> returns the zero-item list C<()>,
149             and C<< $entry->_('ex') >> returns the two-item list C<('Things', 'Stuff')>.
150              
151             In scalar context, this returns undef if no values were found.
152             Otherwise it returns a magical arrayref containing the list of
153             (one or more) found values. What's special about these arrayrefs
154             is that if you treat one as a plain string, you get the useful value
155             C, instead of nonsense like
156             "ARRAY(0x1555294)".
157              
158             (Internally, this is implemented just like L, which see.)
159              
160             =cut
161              
162              
163 1 50   1 1 3 sub headword { @{$_[0]} ? $_[0][1] : undef } # simply the first value
  1         7  
164 1 50   1 1 199 sub headword_field { @{$_[0]} ? $_[0][0] : undef } # simply the first key
  1         8  
165              
166             sub Text::Shoebox::Entry::_ {
167 19     19   6312 my($self, $key) = @_;
168 19 50       50 return unless defined $key;
169              
170 19         24 my @out;
171 19         55 for(my $i = 0; $i < @$self; $i += 2) {
172 54 100       200 push @out, $self->[$i+1] if $key eq $self->[$i];
173             }
174 19 50       42 if(wantarray) {
175 0         0 return @out;
176             } else {
177 19 100       54 return unless @out;
178 15         121 return bless \@out, $Array_Class;
179             }
180             }
181              
182              
183              
184             =item my $num_pairs = $entry->pair_count;
185              
186             This returns an integer expressing the number of pairs in this entry.
187             It's basically the same as scalar($entry->keys).
188              
189              
190             =item ($key, $val) = $entry->pair($n)
191              
192             This returns the key and value of pair number $n for this entry.
193             E.g., if $entry is
194             C<< ('hw' => 'foo', 'ex' => 'Things', 'ex' => "Stuff") >>,
195             then $entry->pair(1) is the list C<('ex', 'Things')>.
196              
197              
198             =item ($key, $val, $k2, $v2) = $entry->pairs($n, $m)
199              
200             This returns the key and value of pair number $n
201             and the key and value of pair number $m for this entry.
202             E.g., if $entry is
203             C<< ('hw' => 'foo', 'ex' => 'Things', 'ex' => "Stuff") >>,
204             then $entry->pair(0,2) is the list C<('hw' => 'foo', 'ex', "Stuff")>.
205              
206             (Actually, C<< $entry->pair(...) >> is just an alias to
207             C<< $entry->pairs(...) >>.)
208              
209              
210             =cut
211              
212 1     1 1 2 sub pair_count { return @{$_[0]} / 2; }
  1         5  
213              
214 4     4 1 13 sub pair { (shift)->pairs(@_) } #alias
215              
216             sub pairs { # also good for accessing one pair, or none!
217             # get pair #3 (assuming counting from 0) : ($k,$v) = $e->pairs(3);
218 4     4 1 8 my $o = shift;
219 4         9 map { @{$o}[$_ * 2, $_ * 2 + 1] } @_;
  4         7  
  4         27  
220             # map to slices. Better be legal offsets!
221             # e.g., 3 maps to @{$o}[6,7]
222             }
223              
224             #--------------------------------------------------------------------------
225              
226             =item $true_or_false = $entry->are_keys_unique;
227              
228             This returns true iff every keyname in this entry appears only once.
229              
230             =item $entry->assert_keys_unique;
231              
232             This dies unless C<< $entry->are_keys_unique >> is true.
233              
234             =cut
235              
236             sub are_keys_unique {
237             # returns true iff the keys are unique in this entry.
238             # i.e., if no headword occurs twice (or more)
239 7 100   7 1 7 return 1 if @{$_[0]} < 2; # can't have collisions with just one key!
  7         25  
240              
241 6         8 my %seen;
242 6         10 for(my $i = 0; $i < @{$_[0]}; $i += 2) {
  19         46  
243 15 100       61 return 0 if $seen{$_[0][$i]}++;
244             }
245 4         29 return 1;
246             }
247              
248             sub assert_keys_unique {
249 0 0   0 1 0 return $_[0] if $_[0]->are_keys_unique;
250 0         0 my $e = shift;
251 0         0 my @k = $e->keys;
252 0         0 my %seen;
253 0         0 for my $k (@k) { ++$seen{$k} }
  0         0  
254 0 0       0 for my $k (@k) { $k = uc $k if $seen{$k} > 1 }
  0         0  
255 0         0 Carp::croak "Entry $e \"$$e[1]\" has duplicate keys: [@k]\nAborting";
256             }
257              
258              
259             =item $true_or_false = $entry->is_null
260              
261             This returns true iff this entry is empty. This is basically the same
262             as C<< 0 == $entry->pair_count >>.
263              
264             =item $true_or_false = $entry->is_sane
265              
266             This returns true iff this entry is non-null, contains no
267             references, and if no keyname is undef or zero-length.
268              
269             =item $entry->scrunch;
270              
271             For all values in this entry, this compacts all whitespace, deletes
272             leading and trailing whitespace, and deletes any pairs where the
273             value is blank. (Where "blank" means undef, zero-length, or
274             is all-whitespace.)
275              
276             =item $entry->dump;
277              
278             This prints (not returns!) a representation of this object's contents.
279              
280             =cut
281              
282              
283 3     3 1 477 sub is_null { return( @{$_[0]} == 0 ) }
  3         15  
284              
285             sub is_sane {
286 7     7 1 11 my $e = $_[0];
287 7 100       17 return 0 unless @$e; # empty entries are not sane
288 6         10 for(my $i = 0; $i < @{$_[0]}; $i += 2) { # scan keys
  11         26  
289 10 100 100     92 return 0 unless defined $e->[$i] and length $e->[$i];
290             # all keys have to be defined and be non-null
291 6 100 66     36 return 0 if ref $e->[$i] or ref $e->[$i+1];
292             # no references anywhere!
293             }
294 1         5 return 1;
295             }
296              
297             #--------------------------------------------------------------------------
298              
299             sub scrunch {
300 3     3 1 487 my $e = $_[0];
301 3         14 for(my $i = 1; $i < @$e; $i += 2) { # scan keys
302 8 100 66     78 unless( defined $e->[$i] and $e->[$i] =~ m/\S/ ) {
303 2         8 splice @$e, $i-1, 2; # nix K=>V where V is null or all-whitespace
304 2         5 $i-=2;
305             }
306 8         23 $e->[$i] =~ s/^\s+//s;
307 8         25 $e->[$i] =~ s/\s+$//s;
308 8         56 $e->[$i] =~ s/[ \t]*[\n\r]+[ \t]*/ /g;
309             # remove newlines and any whitespace around them
310             }
311 3         13 return $e;
312             }
313              
314             #--------------------------------------------------------------------------
315              
316             sub dump {
317 0     0 1 0 my $e = $_[0];
318              
319 0         0 print "Entry $e contains:\n";
320              
321 0         0 my $safe;
322 0         0 my $toggle = 0;
323 0         0 foreach my $v (@$e) {
324 0         0 ($safe = $v) =~
325             s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E\xA1-\xFE])>
326 0         0 <$Text::Shoebox::p{$1}>eg;
327 0 0       0 print(
328             ($toggle ^= 1) ? qq{ $safe = } : qq{"$safe"\n}
329             );
330             }
331 0         0 print "\n";
332 0         0 return $e;
333             }
334              
335             #--------------------------------------------------------------------------
336              
337             =item @it = $entry->as_list
338              
339             This returns a list expressing the contents of $entry.
340             For example, if $entry is
341             C<< ('hw' => 'foo', 'ex' => 'Things', 'ex' => "Stuff") >>,
342             then this returns just that,
343             C<< ('hw' => 'foo', 'ex' => 'Things', 'ex' => "Stuff") >>.
344              
345             =item $them = $entry->as_arrayref
346              
347             This returns an arrayref (probably blessed) to the contents
348             of $entry. For example, if $entry is
349             C<< ('hw' => 'foo', 'ex' => 'Things', 'ex' => "Stuff") >>,
350             then this will return an arrayred to just that list.
351              
352             Note that this (and as_HoLS) is like the other C>
353             methods, in that this doesn't return any sort of copy; it returns
354             a reference to the entry's array itself -- if you do $them->[1] = 'x',
355             then $entry's contents change to
356             C<< ('hw' => 'x', 'ex' => 'Things', 'ex' => "Stuff") >>.)
357              
358             (Internally, this method is implemented by simply returning
359             $entry itself, since I,
360             $entry I just a blessed arrayref to the C<(k,v,k,v,...)>
361             list it contains.)
362              
363             =item $h = $entry->as_hashref
364              
365             This returns a hashref expressing the contents of $entry
366             as a C<< {key => value,...} >> hash, I
367             For example, if $entry is
368             C<< ('hw' => 'foo', 'ex' => 'Things', 'ex' => "Stuff") >>,
369             then this returns
370             C<< { 'hw' => 'foo', 'ex' => "Stuff" } >>.
371              
372             =cut
373              
374 8     8 1 203 sub as_list { return @{$_[0]} }
  8         45  
375 1     1 1 4 sub as_arrayref { return $_[0] }
376 1     1 1 2 sub as_hashref { return {@{$_[0]}} }
  1         8  
377              
378              
379              
380              
381             =item $hol = $entry->as_HoL
382              
383             This returns a reference to a hash-of-lists expressing
384             the contents of this entry, i.e., a reference to a hash where
385             every value is an arrayref. Note that this doesn't destroy
386             duplicates.
387              
388             For example, if $entry is
389             C<< ('hw' => 'foo', 'ex' => 'Things', 'ex' => "Stuff") >>,
390             then this returns
391             C<< { 'hw' => ['foo'], 'ex' => ['Things', 'Stuff'] } >>.
392              
393             And there's a useful bit of magic added -- the arrayrefs aren't just
394             plain arrayrefs, they're special arrayrefs (implemented
395             just like L) such that if you treat one
396             as a plain string, you get the useful value
397             C, instead of nonsense like
398             "ARRAY(0x1555294)".
399              
400             =cut
401              
402              
403             sub as_HoL {
404 2     2 1 5 my $e = $_[0];
405 2         4 my %h;
406 2         26 for(my $i = 0; $i < @$e; $i += 2) {
407 6   100     7 push @{ $h{ $e->[$i] } ||= []}, $e->[$i+1];
  6         44  
408             }
409 2         7 foreach my $v (CORE::values %h) { bless $v, $Array_Class }
  5         65  
410 2         8 \%h;
411             }
412              
413              
414             =item $hol = $entry->as_HoLS
415              
416             This returns a hashref where every value is a reference to an array
417             of scalar-refs to the value-slots in $entry. This is so you can
418             alter $entry. (This and $entry->as_arrayref are really the only ways
419             to alter an entry objects's content.)
420              
421             This sounds (and is) very circuitous, but it's like this:
422             If $entry is
423             C<< ('hw' => 'foo', 'ex' => 'Things', 'ex' => "Stuff") >>,
424             then this returns
425             C<< { 'hw' => [$fooslot], 'ex' => [$thingsslot, $stuffslot] } >>,
426             where if you do C<$$thingslot = 'gack'>, then $entry then becomes
427             C<< ('hw' => 'foo', 'ex' => B<'gack'>, 'ex' => "Stuff") >>.
428              
429             =cut
430              
431             sub as_HoLS { # ref to a hash of list of refs to each of the value slots
432 1     1 1 7 my $e = $_[0];
433 1         2 my %h;
434 1         7 for(my $i = 0; $i < @$e; $i += 2) {
435 3   100     24 push @{
436 3         10 $h{ $e->[$i] } ||= []
437             }, \$e->[$i + 1];
438             }
439 1         5 \%h;
440             }
441              
442              
443              
444             =item $hol = $entry->as_doublets
445              
446             This returns this entry as a list of "doublets" -- i.e.,
447             as a list of two-item arrayrefs.
448              
449             For example, if $entry is
450             C<< ('hw' => 'foo', 'ex' => 'Things', 'ex' => "Stuff") >>,
451             then this returns the list
452             C<< (['hw','foo'], ['ex','Things'], ['ex','Stuff']) >>.
453              
454             =cut
455              
456             sub as_doublets {
457             # returns this entry...
458             # (hw => 'shash', english => 'bear')
459             # as this...
460             # ([hw => 'shash'], [english => 'bear'])
461 1     1 1 8 my @out;
462 1         5 for(my $i = 0; $i < @{$_[0]}; $i += 2) {
  4         11  
463 3         5 push @out, [ @{ $_[0] }[$i, $i+1] ];
  3         10  
464             }
465 1         6 return @out;
466             }
467              
468              
469              
470             =item $xml_source = $entry->as_xml()
471              
472             =item $xml_source = $entry->as_xml( I )
473              
474             This returns an XML representation of this entry's contents. In short,
475             For example, if $entry is
476             C<< ('hw' => 'foo', 'ex' => 'Things', 'ex' => "Stuff") >>,
477             then this returns this string:
478              
479             foo
480             Things
481             Stuff
482              
483             The only details arise from the problem of how to turn $entry's keynames
484             into XML tag names. For each key, if it's present in the optional TagNameHash
485             hashref parameter, then that value (C<< $tagnamehash->{$keyname} >>) is used;
486             otherwise, $keyname itself is used, stripped of characters other than
487             C, colon, underscore, period, and dash.
488              
489              
490              
491             =item $xml_source = $entry->as_xml_pairs()
492              
493             =item $xml_source = $entry->as_xml_pairs( I)
494              
495             This returns an XML representation of this entry's contents. In short,
496             For example, if $entry is
497             C<< ('hw' => 'foo', 'ex' => 'Things', 'ex' => "Stuff") >>,
498             then this returns this string:
499              
500            
501            
502            
503              
504             This avoids the problem of how to turn keynames into XML tagnames.
505             If you don't like the choice of the pair tagname (by default, "pair")
506             or the key attribute name (by default, "key"),
507             or the value attribute name (by default, "value"), then you can
508             specify new values as the parameters. So if you call
509             C<<
510             $entry->as_xml_pairs( 'fee' , 'fie', 'Foe:Fum')
511             >>, the return value is:
512              
513            
514            
515            
516              
517             =cut
518              
519             sub as_xml {
520             # Yes, VERY simpleminded. And note that the result is NOT wrapped
521             # in an ... or anything.
522              
523             # returns this entry...
524             # (hw => 'shash', english => 'bear')
525             # as this...
526             # " shash\nbear\n"
527              
528             # Consider this entry more as a suggestion, and as a debugging tool, than
529             # anything else.
530              
531             # Optional first parameter: a reference to a hash mapping key names
532             # to tags. E.g., $e->as_xml({hw => 'headword', english => 'gloss'})
533             # will give you this:
534             # " shash\nbear\n"
535              
536 1 50   1 1 82 my $map = ref($_[1]) ? $_[1] : {};
537              
538 1         3 my(@out, $k, $v);
539 1         3 for(my $i = 0; $i < @{$_[0]}; $i += 2) {
  4         15  
540 3         5 ($k,$v) = @{$_[0]}[$i, 1 + $i];
  3         10  
541              
542 3 50       9 if(exists $map->{$k}){
543 0         0 $k = $map->{$k};
544             } else {
545             # spiff up the key name so it's an okay GI (tag name)
546 3         71 $k =~ tr<-._:a-zA-Z0-9><_>cd; # Yes, this is conservative
547 3 50       10 if(length $k) {
548 3 50       14 $k = '_' . $k unless $k =~ m<^[_:a-zA-Z]>s;
549             # prefix unsafe things.
550             } else { # to avoid a null GI
551 0         0 $k = 'NULL';
552             }
553             }
554              
555 3         8 $v =~ s/&/&/g;
556 3         96 $v =~ s/
557 3         5 $v =~ s/>/>/g ;
558 3         12 push @out, " <$k>$v\n";
559             }
560 1         8 return join '', @out;
561             }
562              
563             sub as_xml_pairs {
564             # A bit less pointless. And note that the result is still not wrapped
565             # in an ... or anything.
566              
567             # Returns this entry...
568             # (hw => 'shash', english => 'bear')
569             # as this...
570             # " \n"
571              
572             # Consider this entry more as a suggestion, and as a debugging tool, than
573             # anything else.
574              
575             # Calling format: $e->as_xml_pairs(TAGNAME, KEYNAME, VALUENAME)
576             # TAGNAME defaults to 'pair'.
577             # KEYNAME defaults to 'key'.
578             # VALUENAME defaults to 'value'.
579              
580 2     2 1 9 my($o, $gi, $key_name, $value_name) = @_;
581 2   100     12 $gi ||= 'pair' ;
582 2   100     9 $key_name ||= 'key' ;
583 2   100     8 $value_name ||= 'value' ;
584              
585 2         3 my(@out, $k, $v);
586 2         21 for(my $i = 0; $i < @$o; $i += 2) {
587 6         10 ($k,$v) = @{$o}[$i, 1 + $i];
  6         17  
588 6         93 foreach my $x ($k, $v) {
589             # NB: Doesn't entitify apostrophes. No point, really.
590 12         26 $x =~ s/&/&/g;
591 12         15 $x =~ s/"/"/g;
592 12         14 $x =~ s/
593 12         14 $x =~ s/>/>/g;
594 12         36 $x =~ s<([\n\t\cm\cj])>
595 0         0 <'&#'.(ord($1)).';'>seg;
596             # turn newlines into character references
597             }
598 6         32 push @out, " <$gi $key_name=\"$k\" $value_name=\"$v\" />\n"
599             }
600              
601 2         24 return join '', @out;
602             }
603              
604             ###########################################################################
605             {
606             # Basically just the guts of Array::Autojoin:
607              
608             package Text::Shoebox::Entry::_Autojoin;
609             $Array_Class = __PACKAGE__;
610              
611             use overload(
612              
613 36     36   1813 '""' => sub { join '; ', @{$_[0]}},
  36         152  
614              
615 0   0 0   0 '0+' => sub {0 + ( $_[0][0] || 0 ) },
616             # stringifies and then just numerifies, but feh.
617              
618             'fallback' => 1, # turn on cleverness
619              
620             'bool', => sub { # true iff there's any true items in it
621 0 0   0   0 for (@{$_[0]}) { return 1 if $_ };
  0         0  
  0         0  
622 0         0 return '';
623             },
624              
625             '.=' => sub { # sure, why not.
626 0 0   0   0 if(@{$_[0]}) { $_[0][-1] .= $_[1] } else { push @{$_[0]}, $_[1] }
  0         0  
  0         0  
  0         0  
  0         0  
627 0         0 $_[0];
628             }, # but can't overload ||= or the like
629              
630 3     3   6759 );
  3         3786  
  3         48  
631             }
632             ###########################################################################
633             1;
634              
635             __END__