line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
|
2
|
|
|
|
|
|
|
require 5; |
3
|
|
|
|
|
|
|
package Text::Shoebox::Entry; |
4
|
3
|
|
|
3
|
|
17
|
use strict; |
|
3
|
|
|
|
|
8
|
|
|
3
|
|
|
|
|
150
|
|
5
|
3
|
|
|
3
|
|
15
|
use vars qw(@ISA $Debug $VERSION); |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
197
|
|
6
|
3
|
|
|
3
|
|
16
|
use integer; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
22
|
|
7
|
3
|
|
|
3
|
|
86
|
use Text::Shoebox 1.02 (); |
|
3
|
|
|
|
|
59
|
|
|
3
|
|
|
|
|
82
|
|
8
|
3
|
|
|
3
|
|
17
|
use Carp (); |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
125
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
$Debug = 0 unless defined $Debug; |
11
|
|
|
|
|
|
|
BEGIN { |
12
|
3
|
|
|
3
|
|
8265
|
$VERSION = "1.02"; |
13
|
|
|
|
|
|
|
} |
14
|
|
|
|
|
|
|
my $Array_Class; |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
unless($Text::Shoebox::Lexicon::VERSION) { require Text::Shoebox::Lexicon; } |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
########################################################################### |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 NAME |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
Text::Shoebox::Entry - class for Shoebox SF lexicon entries |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 SYNOPSIS |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
use Text::Shoebox::Lexicon; |
27
|
|
|
|
|
|
|
my $lex = Text::Shoebox::Lexicon->read_file( "haida.sf" ); |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
foreach my $entry ($lex->entries) { |
30
|
|
|
|
|
|
|
# |
31
|
|
|
|
|
|
|
# Each $entry is a Text::Shoebox::Entry object |
32
|
|
|
|
|
|
|
# |
33
|
|
|
|
|
|
|
my %e = $entry->as_list; |
34
|
|
|
|
|
|
|
print "Looky, stuff! [", %e, "]!\n"; |
35
|
|
|
|
|
|
|
} |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=head1 DESCRIPTION |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
An object of this class represents an entry in an SF lexicon |
40
|
|
|
|
|
|
|
(L). |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
An entry consists of a number of fields. Each field has two scalars, |
43
|
|
|
|
|
|
|
a key, and a value. The first field in an entry is considered the |
44
|
|
|
|
|
|
|
headword field, and its key must occur there and only there in that |
45
|
|
|
|
|
|
|
entry. There is no requirement on uniqueness of keys in the rest of |
46
|
|
|
|
|
|
|
the entry. |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=head1 METHODS |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=over |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
=item $entry = Text::Shoebox::Entry->new(); |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=item $entry = Text::Shoebox::Entry->new( 'foo' => 'bar', 'baz' => 'quux' ); |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
The C method creates a new Text::Shoebox::Entry method. If you |
59
|
|
|
|
|
|
|
provide parameters, as in the second example, those are used as the |
60
|
|
|
|
|
|
|
contents of the new object. |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
Normally you don't need to expressly create objects of this class, |
63
|
|
|
|
|
|
|
as Text::Shoebox::Lexicon will create them as needed when you call |
64
|
|
|
|
|
|
|
a Text::Shoebox::Lexicon C or C method. |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=item $entry2 = $entry->copy |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
This returns a copy of the object in $entry. |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=cut |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
sub new { # Text::Shoebox is free to not use this, for speed's sake |
73
|
22
|
|
|
22
|
1
|
3916
|
my $class = shift; |
74
|
22
|
|
33
|
|
|
186
|
$class = ref($class) || $class; # be an object or a class method |
75
|
22
|
50
|
|
|
|
47
|
print "New object in class $class\n" if $Debug; |
76
|
22
|
50
|
33
|
|
|
83
|
if(@_ == 1 and ref($_[0]) eq 'ARRAY') { |
77
|
|
|
|
|
|
|
# listref form -- call as: Text::Shoebox::Entry->new([foo => 'bar']); |
78
|
0
|
|
|
|
|
0
|
return bless $_[0], $class; |
79
|
|
|
|
|
|
|
} else { |
80
|
|
|
|
|
|
|
# list form -- call as: Text::Shoebox::Entry->new(foo => 'bar'); |
81
|
22
|
|
|
|
|
158
|
return bless [@_], $class; |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
} |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
sub copy { |
86
|
1
|
|
|
1
|
1
|
2
|
my $original = $_[0]; |
87
|
1
|
50
|
|
|
|
5
|
Carp::croak("Text::Shoebox::entry is strictly an object method.") |
88
|
|
|
|
|
|
|
unless ref $original; |
89
|
1
|
|
|
|
|
7
|
return bless( [@$original], ref($original) ); |
90
|
|
|
|
|
|
|
# bless into the same class as the original |
91
|
|
|
|
|
|
|
# presumably a deep copy isn't necessary! |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
#-------------------------------------------------------------------------- |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=item @keys = $entry->keys; |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
This returns the names of all the keys (a/k/a fieldnames) in this entry. |
99
|
|
|
|
|
|
|
For example, if $entry is |
100
|
|
|
|
|
|
|
C<< ('hw' => 'foo', 'ex' => 'Things', 'ex' => "Stuff") >>, |
101
|
|
|
|
|
|
|
then $entry->keys will return the list C<('hw', 'ex', 'ex')>. |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=item @values = $entry->values; |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
This returns the values of all fields in this entry. |
106
|
|
|
|
|
|
|
For example, if $entry is |
107
|
|
|
|
|
|
|
C<< ('hw' => 'foo', 'ex' => 'Things', 'ex' => "Stuff") >>, |
108
|
|
|
|
|
|
|
then $entry->values will return the list C<('foo', 'Things', 'Stuff')>. |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=cut |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
sub keys { |
113
|
2
|
|
|
2
|
1
|
436
|
my @out; |
114
|
2
|
|
|
|
|
7
|
for(my $i = 0; $i < @{$_[0]}; $i += 2) { push @out, $_[0][$i] } |
|
7
|
|
|
|
|
24
|
|
|
5
|
|
|
|
|
13
|
|
115
|
2
|
|
|
|
|
26
|
return @out; |
116
|
|
|
|
|
|
|
} |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
sub values { |
119
|
1
|
|
|
1
|
1
|
616
|
my @out; |
120
|
1
|
|
|
|
|
3
|
for(my $i = 1; $i < @{$_[0]}; $i += 2) { push @out, $_[0][$i] } |
|
4
|
|
|
|
|
12
|
|
|
3
|
|
|
|
|
16
|
|
121
|
1
|
|
|
|
|
5
|
return @out; |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
#-------------------------------------------------------------------------- |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=item $headword = $entry->headword; |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
This returns the headword value of this entry. This is basically |
129
|
|
|
|
|
|
|
a shortcut for C<< ($entry->values)[0] >> |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
=item $headword_field = $entry->headword_field; |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
This returns the fieldname of this entry's headword field. This |
134
|
|
|
|
|
|
|
is basically a shortcut for C<< ($entry->keys)[0] >> |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=item $value = $entry->_(I) |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
Yes, this method really is called _ ! |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
This gets all the values of the pairs that have the key I. How |
141
|
|
|
|
|
|
|
it returns those values (of which there may be none, one, or many) depends |
142
|
|
|
|
|
|
|
on context: |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
In list context, this simply returns the list of found values. |
145
|
|
|
|
|
|
|
E.g., if $entry is |
146
|
|
|
|
|
|
|
C<< ('hw' => 'foo', 'ex' => 'Things', 'ex' => "Stuff") >>, |
147
|
|
|
|
|
|
|
then C<< $entry->_('hw') >> returns the one-item list C<('foo')>, |
148
|
|
|
|
|
|
|
then C<< $entry->_('zorp') >> returns the zero-item list C<()>, |
149
|
|
|
|
|
|
|
and C<< $entry->_('ex') >> returns the two-item list C<('Things', 'Stuff')>. |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
In scalar context, this returns undef if no values were found. |
152
|
|
|
|
|
|
|
Otherwise it returns a magical arrayref containing the list of |
153
|
|
|
|
|
|
|
(one or more) found values. What's special about these arrayrefs |
154
|
|
|
|
|
|
|
is that if you treat one as a plain string, you get the useful value |
155
|
|
|
|
|
|
|
C, instead of nonsense like |
156
|
|
|
|
|
|
|
"ARRAY(0x1555294)". |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
(Internally, this is implemented just like L, which see.) |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=cut |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
|
163
|
1
|
50
|
|
1
|
1
|
3
|
sub headword { @{$_[0]} ? $_[0][1] : undef } # simply the first value |
|
1
|
|
|
|
|
7
|
|
164
|
1
|
50
|
|
1
|
1
|
199
|
sub headword_field { @{$_[0]} ? $_[0][0] : undef } # simply the first key |
|
1
|
|
|
|
|
8
|
|
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
sub Text::Shoebox::Entry::_ { |
167
|
19
|
|
|
19
|
|
6312
|
my($self, $key) = @_; |
168
|
19
|
50
|
|
|
|
50
|
return unless defined $key; |
169
|
|
|
|
|
|
|
|
170
|
19
|
|
|
|
|
24
|
my @out; |
171
|
19
|
|
|
|
|
55
|
for(my $i = 0; $i < @$self; $i += 2) { |
172
|
54
|
100
|
|
|
|
200
|
push @out, $self->[$i+1] if $key eq $self->[$i]; |
173
|
|
|
|
|
|
|
} |
174
|
19
|
50
|
|
|
|
42
|
if(wantarray) { |
175
|
0
|
|
|
|
|
0
|
return @out; |
176
|
|
|
|
|
|
|
} else { |
177
|
19
|
100
|
|
|
|
54
|
return unless @out; |
178
|
15
|
|
|
|
|
121
|
return bless \@out, $Array_Class; |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=item my $num_pairs = $entry->pair_count; |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
This returns an integer expressing the number of pairs in this entry. |
187
|
|
|
|
|
|
|
It's basically the same as scalar($entry->keys). |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
=item ($key, $val) = $entry->pair($n) |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
This returns the key and value of pair number $n for this entry. |
193
|
|
|
|
|
|
|
E.g., if $entry is |
194
|
|
|
|
|
|
|
C<< ('hw' => 'foo', 'ex' => 'Things', 'ex' => "Stuff") >>, |
195
|
|
|
|
|
|
|
then $entry->pair(1) is the list C<('ex', 'Things')>. |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=item ($key, $val, $k2, $v2) = $entry->pairs($n, $m) |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
This returns the key and value of pair number $n |
201
|
|
|
|
|
|
|
and the key and value of pair number $m for this entry. |
202
|
|
|
|
|
|
|
E.g., if $entry is |
203
|
|
|
|
|
|
|
C<< ('hw' => 'foo', 'ex' => 'Things', 'ex' => "Stuff") >>, |
204
|
|
|
|
|
|
|
then $entry->pair(0,2) is the list C<('hw' => 'foo', 'ex', "Stuff")>. |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
(Actually, C<< $entry->pair(...) >> is just an alias to |
207
|
|
|
|
|
|
|
C<< $entry->pairs(...) >>.) |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
=cut |
211
|
|
|
|
|
|
|
|
212
|
1
|
|
|
1
|
1
|
2
|
sub pair_count { return @{$_[0]} / 2; } |
|
1
|
|
|
|
|
5
|
|
213
|
|
|
|
|
|
|
|
214
|
4
|
|
|
4
|
1
|
13
|
sub pair { (shift)->pairs(@_) } #alias |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
sub pairs { # also good for accessing one pair, or none! |
217
|
|
|
|
|
|
|
# get pair #3 (assuming counting from 0) : ($k,$v) = $e->pairs(3); |
218
|
4
|
|
|
4
|
1
|
8
|
my $o = shift; |
219
|
4
|
|
|
|
|
9
|
map { @{$o}[$_ * 2, $_ * 2 + 1] } @_; |
|
4
|
|
|
|
|
7
|
|
|
4
|
|
|
|
|
27
|
|
220
|
|
|
|
|
|
|
# map to slices. Better be legal offsets! |
221
|
|
|
|
|
|
|
# e.g., 3 maps to @{$o}[6,7] |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
#-------------------------------------------------------------------------- |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
=item $true_or_false = $entry->are_keys_unique; |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
This returns true iff every keyname in this entry appears only once. |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
=item $entry->assert_keys_unique; |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
This dies unless C<< $entry->are_keys_unique >> is true. |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
=cut |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
sub are_keys_unique { |
237
|
|
|
|
|
|
|
# returns true iff the keys are unique in this entry. |
238
|
|
|
|
|
|
|
# i.e., if no headword occurs twice (or more) |
239
|
7
|
100
|
|
7
|
1
|
7
|
return 1 if @{$_[0]} < 2; # can't have collisions with just one key! |
|
7
|
|
|
|
|
25
|
|
240
|
|
|
|
|
|
|
|
241
|
6
|
|
|
|
|
8
|
my %seen; |
242
|
6
|
|
|
|
|
10
|
for(my $i = 0; $i < @{$_[0]}; $i += 2) { |
|
19
|
|
|
|
|
46
|
|
243
|
15
|
100
|
|
|
|
61
|
return 0 if $seen{$_[0][$i]}++; |
244
|
|
|
|
|
|
|
} |
245
|
4
|
|
|
|
|
29
|
return 1; |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
sub assert_keys_unique { |
249
|
0
|
0
|
|
0
|
1
|
0
|
return $_[0] if $_[0]->are_keys_unique; |
250
|
0
|
|
|
|
|
0
|
my $e = shift; |
251
|
0
|
|
|
|
|
0
|
my @k = $e->keys; |
252
|
0
|
|
|
|
|
0
|
my %seen; |
253
|
0
|
|
|
|
|
0
|
for my $k (@k) { ++$seen{$k} } |
|
0
|
|
|
|
|
0
|
|
254
|
0
|
0
|
|
|
|
0
|
for my $k (@k) { $k = uc $k if $seen{$k} > 1 } |
|
0
|
|
|
|
|
0
|
|
255
|
0
|
|
|
|
|
0
|
Carp::croak "Entry $e \"$$e[1]\" has duplicate keys: [@k]\nAborting"; |
256
|
|
|
|
|
|
|
} |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
=item $true_or_false = $entry->is_null |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
This returns true iff this entry is empty. This is basically the same |
262
|
|
|
|
|
|
|
as C<< 0 == $entry->pair_count >>. |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
=item $true_or_false = $entry->is_sane |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
This returns true iff this entry is non-null, contains no |
267
|
|
|
|
|
|
|
references, and if no keyname is undef or zero-length. |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
=item $entry->scrunch; |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
For all values in this entry, this compacts all whitespace, deletes |
272
|
|
|
|
|
|
|
leading and trailing whitespace, and deletes any pairs where the |
273
|
|
|
|
|
|
|
value is blank. (Where "blank" means undef, zero-length, or |
274
|
|
|
|
|
|
|
is all-whitespace.) |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
=item $entry->dump; |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
This prints (not returns!) a representation of this object's contents. |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
=cut |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
|
283
|
3
|
|
|
3
|
1
|
477
|
sub is_null { return( @{$_[0]} == 0 ) } |
|
3
|
|
|
|
|
15
|
|
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
sub is_sane { |
286
|
7
|
|
|
7
|
1
|
11
|
my $e = $_[0]; |
287
|
7
|
100
|
|
|
|
17
|
return 0 unless @$e; # empty entries are not sane |
288
|
6
|
|
|
|
|
10
|
for(my $i = 0; $i < @{$_[0]}; $i += 2) { # scan keys |
|
11
|
|
|
|
|
26
|
|
289
|
10
|
100
|
100
|
|
|
92
|
return 0 unless defined $e->[$i] and length $e->[$i]; |
290
|
|
|
|
|
|
|
# all keys have to be defined and be non-null |
291
|
6
|
100
|
66
|
|
|
36
|
return 0 if ref $e->[$i] or ref $e->[$i+1]; |
292
|
|
|
|
|
|
|
# no references anywhere! |
293
|
|
|
|
|
|
|
} |
294
|
1
|
|
|
|
|
5
|
return 1; |
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
#-------------------------------------------------------------------------- |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
sub scrunch { |
300
|
3
|
|
|
3
|
1
|
487
|
my $e = $_[0]; |
301
|
3
|
|
|
|
|
14
|
for(my $i = 1; $i < @$e; $i += 2) { # scan keys |
302
|
8
|
100
|
66
|
|
|
78
|
unless( defined $e->[$i] and $e->[$i] =~ m/\S/ ) { |
303
|
2
|
|
|
|
|
8
|
splice @$e, $i-1, 2; # nix K=>V where V is null or all-whitespace |
304
|
2
|
|
|
|
|
5
|
$i-=2; |
305
|
|
|
|
|
|
|
} |
306
|
8
|
|
|
|
|
23
|
$e->[$i] =~ s/^\s+//s; |
307
|
8
|
|
|
|
|
25
|
$e->[$i] =~ s/\s+$//s; |
308
|
8
|
|
|
|
|
56
|
$e->[$i] =~ s/[ \t]*[\n\r]+[ \t]*/ /g; |
309
|
|
|
|
|
|
|
# remove newlines and any whitespace around them |
310
|
|
|
|
|
|
|
} |
311
|
3
|
|
|
|
|
13
|
return $e; |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
#-------------------------------------------------------------------------- |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
sub dump { |
317
|
0
|
|
|
0
|
1
|
0
|
my $e = $_[0]; |
318
|
|
|
|
|
|
|
|
319
|
0
|
|
|
|
|
0
|
print "Entry $e contains:\n"; |
320
|
|
|
|
|
|
|
|
321
|
0
|
|
|
|
|
0
|
my $safe; |
322
|
0
|
|
|
|
|
0
|
my $toggle = 0; |
323
|
0
|
|
|
|
|
0
|
foreach my $v (@$e) { |
324
|
0
|
|
|
|
|
0
|
($safe = $v) =~ |
325
|
|
|
|
|
|
|
s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E\xA1-\xFE])> |
326
|
0
|
|
|
|
|
0
|
<$Text::Shoebox::p{$1}>eg; |
327
|
0
|
0
|
|
|
|
0
|
print( |
328
|
|
|
|
|
|
|
($toggle ^= 1) ? qq{ $safe = } : qq{"$safe"\n} |
329
|
|
|
|
|
|
|
); |
330
|
|
|
|
|
|
|
} |
331
|
0
|
|
|
|
|
0
|
print "\n"; |
332
|
0
|
|
|
|
|
0
|
return $e; |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
#-------------------------------------------------------------------------- |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
=item @it = $entry->as_list |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
This returns a list expressing the contents of $entry. |
340
|
|
|
|
|
|
|
For example, if $entry is |
341
|
|
|
|
|
|
|
C<< ('hw' => 'foo', 'ex' => 'Things', 'ex' => "Stuff") >>, |
342
|
|
|
|
|
|
|
then this returns just that, |
343
|
|
|
|
|
|
|
C<< ('hw' => 'foo', 'ex' => 'Things', 'ex' => "Stuff") >>. |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
=item $them = $entry->as_arrayref |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
This returns an arrayref (probably blessed) to the contents |
348
|
|
|
|
|
|
|
of $entry. For example, if $entry is |
349
|
|
|
|
|
|
|
C<< ('hw' => 'foo', 'ex' => 'Things', 'ex' => "Stuff") >>, |
350
|
|
|
|
|
|
|
then this will return an arrayred to just that list. |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
Note that this (and as_HoLS) is like the other C> |
353
|
|
|
|
|
|
|
methods, in that this doesn't return any sort of copy; it returns |
354
|
|
|
|
|
|
|
a reference to the entry's array itself -- if you do $them->[1] = 'x', |
355
|
|
|
|
|
|
|
then $entry's contents change to |
356
|
|
|
|
|
|
|
C<< ('hw' => 'x', 'ex' => 'Things', 'ex' => "Stuff") >>.) |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
(Internally, this method is implemented by simply returning |
359
|
|
|
|
|
|
|
$entry itself, since I, |
360
|
|
|
|
|
|
|
$entry I just a blessed arrayref to the C<(k,v,k,v,...)> |
361
|
|
|
|
|
|
|
list it contains.) |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
=item $h = $entry->as_hashref |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
This returns a hashref expressing the contents of $entry |
366
|
|
|
|
|
|
|
as a C<< {key => value,...} >> hash, I |
367
|
|
|
|
|
|
|
For example, if $entry is |
368
|
|
|
|
|
|
|
C<< ('hw' => 'foo', 'ex' => 'Things', 'ex' => "Stuff") >>, |
369
|
|
|
|
|
|
|
then this returns |
370
|
|
|
|
|
|
|
C<< { 'hw' => 'foo', 'ex' => "Stuff" } >>. |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
=cut |
373
|
|
|
|
|
|
|
|
374
|
8
|
|
|
8
|
1
|
203
|
sub as_list { return @{$_[0]} } |
|
8
|
|
|
|
|
45
|
|
375
|
1
|
|
|
1
|
1
|
4
|
sub as_arrayref { return $_[0] } |
376
|
1
|
|
|
1
|
1
|
2
|
sub as_hashref { return {@{$_[0]}} } |
|
1
|
|
|
|
|
8
|
|
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
=item $hol = $entry->as_HoL |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
This returns a reference to a hash-of-lists expressing |
384
|
|
|
|
|
|
|
the contents of this entry, i.e., a reference to a hash where |
385
|
|
|
|
|
|
|
every value is an arrayref. Note that this doesn't destroy |
386
|
|
|
|
|
|
|
duplicates. |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
For example, if $entry is |
389
|
|
|
|
|
|
|
C<< ('hw' => 'foo', 'ex' => 'Things', 'ex' => "Stuff") >>, |
390
|
|
|
|
|
|
|
then this returns |
391
|
|
|
|
|
|
|
C<< { 'hw' => ['foo'], 'ex' => ['Things', 'Stuff'] } >>. |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
And there's a useful bit of magic added -- the arrayrefs aren't just |
394
|
|
|
|
|
|
|
plain arrayrefs, they're special arrayrefs (implemented |
395
|
|
|
|
|
|
|
just like L) such that if you treat one |
396
|
|
|
|
|
|
|
as a plain string, you get the useful value |
397
|
|
|
|
|
|
|
C, instead of nonsense like |
398
|
|
|
|
|
|
|
"ARRAY(0x1555294)". |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
=cut |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
sub as_HoL { |
404
|
2
|
|
|
2
|
1
|
5
|
my $e = $_[0]; |
405
|
2
|
|
|
|
|
4
|
my %h; |
406
|
2
|
|
|
|
|
26
|
for(my $i = 0; $i < @$e; $i += 2) { |
407
|
6
|
|
100
|
|
|
7
|
push @{ $h{ $e->[$i] } ||= []}, $e->[$i+1]; |
|
6
|
|
|
|
|
44
|
|
408
|
|
|
|
|
|
|
} |
409
|
2
|
|
|
|
|
7
|
foreach my $v (CORE::values %h) { bless $v, $Array_Class } |
|
5
|
|
|
|
|
65
|
|
410
|
2
|
|
|
|
|
8
|
\%h; |
411
|
|
|
|
|
|
|
} |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
=item $hol = $entry->as_HoLS |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
This returns a hashref where every value is a reference to an array |
417
|
|
|
|
|
|
|
of scalar-refs to the value-slots in $entry. This is so you can |
418
|
|
|
|
|
|
|
alter $entry. (This and $entry->as_arrayref are really the only ways |
419
|
|
|
|
|
|
|
to alter an entry objects's content.) |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
This sounds (and is) very circuitous, but it's like this: |
422
|
|
|
|
|
|
|
If $entry is |
423
|
|
|
|
|
|
|
C<< ('hw' => 'foo', 'ex' => 'Things', 'ex' => "Stuff") >>, |
424
|
|
|
|
|
|
|
then this returns |
425
|
|
|
|
|
|
|
C<< { 'hw' => [$fooslot], 'ex' => [$thingsslot, $stuffslot] } >>, |
426
|
|
|
|
|
|
|
where if you do C<$$thingslot = 'gack'>, then $entry then becomes |
427
|
|
|
|
|
|
|
C<< ('hw' => 'foo', 'ex' => B<'gack'>, 'ex' => "Stuff") >>. |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
=cut |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
sub as_HoLS { # ref to a hash of list of refs to each of the value slots |
432
|
1
|
|
|
1
|
1
|
7
|
my $e = $_[0]; |
433
|
1
|
|
|
|
|
2
|
my %h; |
434
|
1
|
|
|
|
|
7
|
for(my $i = 0; $i < @$e; $i += 2) { |
435
|
3
|
|
100
|
|
|
24
|
push @{ |
436
|
3
|
|
|
|
|
10
|
$h{ $e->[$i] } ||= [] |
437
|
|
|
|
|
|
|
}, \$e->[$i + 1]; |
438
|
|
|
|
|
|
|
} |
439
|
1
|
|
|
|
|
5
|
\%h; |
440
|
|
|
|
|
|
|
} |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
=item $hol = $entry->as_doublets |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
This returns this entry as a list of "doublets" -- i.e., |
447
|
|
|
|
|
|
|
as a list of two-item arrayrefs. |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
For example, if $entry is |
450
|
|
|
|
|
|
|
C<< ('hw' => 'foo', 'ex' => 'Things', 'ex' => "Stuff") >>, |
451
|
|
|
|
|
|
|
then this returns the list |
452
|
|
|
|
|
|
|
C<< (['hw','foo'], ['ex','Things'], ['ex','Stuff']) >>. |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
=cut |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
sub as_doublets { |
457
|
|
|
|
|
|
|
# returns this entry... |
458
|
|
|
|
|
|
|
# (hw => 'shash', english => 'bear') |
459
|
|
|
|
|
|
|
# as this... |
460
|
|
|
|
|
|
|
# ([hw => 'shash'], [english => 'bear']) |
461
|
1
|
|
|
1
|
1
|
8
|
my @out; |
462
|
1
|
|
|
|
|
5
|
for(my $i = 0; $i < @{$_[0]}; $i += 2) { |
|
4
|
|
|
|
|
11
|
|
463
|
3
|
|
|
|
|
5
|
push @out, [ @{ $_[0] }[$i, $i+1] ]; |
|
3
|
|
|
|
|
10
|
|
464
|
|
|
|
|
|
|
} |
465
|
1
|
|
|
|
|
6
|
return @out; |
466
|
|
|
|
|
|
|
} |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
=item $xml_source = $entry->as_xml() |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
=item $xml_source = $entry->as_xml( I ) |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
This returns an XML representation of this entry's contents. In short, |
475
|
|
|
|
|
|
|
For example, if $entry is |
476
|
|
|
|
|
|
|
C<< ('hw' => 'foo', 'ex' => 'Things', 'ex' => "Stuff") >>, |
477
|
|
|
|
|
|
|
then this returns this string: |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
foo |
480
|
|
|
|
|
|
|
Things |
481
|
|
|
|
|
|
|
Stuff |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
The only details arise from the problem of how to turn $entry's keynames |
484
|
|
|
|
|
|
|
into XML tag names. For each key, if it's present in the optional TagNameHash |
485
|
|
|
|
|
|
|
hashref parameter, then that value (C<< $tagnamehash->{$keyname} >>) is used; |
486
|
|
|
|
|
|
|
otherwise, $keyname itself is used, stripped of characters other than |
487
|
|
|
|
|
|
|
C, colon, underscore, period, and dash. |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
=item $xml_source = $entry->as_xml_pairs() |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
=item $xml_source = $entry->as_xml_pairs( I) |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
This returns an XML representation of this entry's contents. In short, |
496
|
|
|
|
|
|
|
For example, if $entry is |
497
|
|
|
|
|
|
|
C<< ('hw' => 'foo', 'ex' => 'Things', 'ex' => "Stuff") >>, |
498
|
|
|
|
|
|
|
then this returns this string: |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
This avoids the problem of how to turn keynames into XML tagnames. |
505
|
|
|
|
|
|
|
If you don't like the choice of the pair tagname (by default, "pair") |
506
|
|
|
|
|
|
|
or the key attribute name (by default, "key"), |
507
|
|
|
|
|
|
|
or the value attribute name (by default, "value"), then you can |
508
|
|
|
|
|
|
|
specify new values as the parameters. So if you call |
509
|
|
|
|
|
|
|
C<< |
510
|
|
|
|
|
|
|
$entry->as_xml_pairs( 'fee' , 'fie', 'Foe:Fum') |
511
|
|
|
|
|
|
|
>>, the return value is: |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
=cut |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
sub as_xml { |
520
|
|
|
|
|
|
|
# Yes, VERY simpleminded. And note that the result is NOT wrapped |
521
|
|
|
|
|
|
|
# in an ... or anything. |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
# returns this entry... |
524
|
|
|
|
|
|
|
# (hw => 'shash', english => 'bear') |
525
|
|
|
|
|
|
|
# as this... |
526
|
|
|
|
|
|
|
# " shash\nbear\n" |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
# Consider this entry more as a suggestion, and as a debugging tool, than |
529
|
|
|
|
|
|
|
# anything else. |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
# Optional first parameter: a reference to a hash mapping key names |
532
|
|
|
|
|
|
|
# to tags. E.g., $e->as_xml({hw => 'headword', english => 'gloss'}) |
533
|
|
|
|
|
|
|
# will give you this: |
534
|
|
|
|
|
|
|
# " shash\nbear\n" |
535
|
|
|
|
|
|
|
|
536
|
1
|
50
|
|
1
|
1
|
82
|
my $map = ref($_[1]) ? $_[1] : {}; |
537
|
|
|
|
|
|
|
|
538
|
1
|
|
|
|
|
3
|
my(@out, $k, $v); |
539
|
1
|
|
|
|
|
3
|
for(my $i = 0; $i < @{$_[0]}; $i += 2) { |
|
4
|
|
|
|
|
15
|
|
540
|
3
|
|
|
|
|
5
|
($k,$v) = @{$_[0]}[$i, 1 + $i]; |
|
3
|
|
|
|
|
10
|
|
541
|
|
|
|
|
|
|
|
542
|
3
|
50
|
|
|
|
9
|
if(exists $map->{$k}){ |
543
|
0
|
|
|
|
|
0
|
$k = $map->{$k}; |
544
|
|
|
|
|
|
|
} else { |
545
|
|
|
|
|
|
|
# spiff up the key name so it's an okay GI (tag name) |
546
|
3
|
|
|
|
|
71
|
$k =~ tr<-._:a-zA-Z0-9><_>cd; # Yes, this is conservative |
547
|
3
|
50
|
|
|
|
10
|
if(length $k) { |
548
|
3
|
50
|
|
|
|
14
|
$k = '_' . $k unless $k =~ m<^[_:a-zA-Z]>s; |
549
|
|
|
|
|
|
|
# prefix unsafe things. |
550
|
|
|
|
|
|
|
} else { # to avoid a null GI |
551
|
0
|
|
|
|
|
0
|
$k = 'NULL'; |
552
|
|
|
|
|
|
|
} |
553
|
|
|
|
|
|
|
} |
554
|
|
|
|
|
|
|
|
555
|
3
|
|
|
|
|
8
|
$v =~ s/&/&/g; |
556
|
3
|
|
|
|
|
96
|
$v =~ s/</g ; |
557
|
3
|
|
|
|
|
5
|
$v =~ s/>/>/g ; |
558
|
3
|
|
|
|
|
12
|
push @out, " <$k>$v$k>\n"; |
559
|
|
|
|
|
|
|
} |
560
|
1
|
|
|
|
|
8
|
return join '', @out; |
561
|
|
|
|
|
|
|
} |
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
sub as_xml_pairs { |
564
|
|
|
|
|
|
|
# A bit less pointless. And note that the result is still not wrapped |
565
|
|
|
|
|
|
|
# in an ... or anything. |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
# Returns this entry... |
568
|
|
|
|
|
|
|
# (hw => 'shash', english => 'bear') |
569
|
|
|
|
|
|
|
# as this... |
570
|
|
|
|
|
|
|
# " \n" |
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
# Consider this entry more as a suggestion, and as a debugging tool, than |
573
|
|
|
|
|
|
|
# anything else. |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
# Calling format: $e->as_xml_pairs(TAGNAME, KEYNAME, VALUENAME) |
576
|
|
|
|
|
|
|
# TAGNAME defaults to 'pair'. |
577
|
|
|
|
|
|
|
# KEYNAME defaults to 'key'. |
578
|
|
|
|
|
|
|
# VALUENAME defaults to 'value'. |
579
|
|
|
|
|
|
|
|
580
|
2
|
|
|
2
|
1
|
9
|
my($o, $gi, $key_name, $value_name) = @_; |
581
|
2
|
|
100
|
|
|
12
|
$gi ||= 'pair' ; |
582
|
2
|
|
100
|
|
|
9
|
$key_name ||= 'key' ; |
583
|
2
|
|
100
|
|
|
8
|
$value_name ||= 'value' ; |
584
|
|
|
|
|
|
|
|
585
|
2
|
|
|
|
|
3
|
my(@out, $k, $v); |
586
|
2
|
|
|
|
|
21
|
for(my $i = 0; $i < @$o; $i += 2) { |
587
|
6
|
|
|
|
|
10
|
($k,$v) = @{$o}[$i, 1 + $i]; |
|
6
|
|
|
|
|
17
|
|
588
|
6
|
|
|
|
|
93
|
foreach my $x ($k, $v) { |
589
|
|
|
|
|
|
|
# NB: Doesn't entitify apostrophes. No point, really. |
590
|
12
|
|
|
|
|
26
|
$x =~ s/&/&/g; |
591
|
12
|
|
|
|
|
15
|
$x =~ s/"/"/g; |
592
|
12
|
|
|
|
|
14
|
$x =~ s/</g; |
593
|
12
|
|
|
|
|
14
|
$x =~ s/>/>/g; |
594
|
12
|
|
|
|
|
36
|
$x =~ s<([\n\t\cm\cj])> |
595
|
0
|
|
|
|
|
0
|
<''.(ord($1)).';'>seg; |
596
|
|
|
|
|
|
|
# turn newlines into character references |
597
|
|
|
|
|
|
|
} |
598
|
6
|
|
|
|
|
32
|
push @out, " <$gi $key_name=\"$k\" $value_name=\"$v\" />\n" |
599
|
|
|
|
|
|
|
} |
600
|
|
|
|
|
|
|
|
601
|
2
|
|
|
|
|
24
|
return join '', @out; |
602
|
|
|
|
|
|
|
} |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
########################################################################### |
605
|
|
|
|
|
|
|
{ |
606
|
|
|
|
|
|
|
# Basically just the guts of Array::Autojoin: |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
package Text::Shoebox::Entry::_Autojoin; |
609
|
|
|
|
|
|
|
$Array_Class = __PACKAGE__; |
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
use overload( |
612
|
|
|
|
|
|
|
|
613
|
36
|
|
|
36
|
|
1813
|
'""' => sub { join '; ', @{$_[0]}}, |
|
36
|
|
|
|
|
152
|
|
614
|
|
|
|
|
|
|
|
615
|
0
|
|
0
|
0
|
|
0
|
'0+' => sub {0 + ( $_[0][0] || 0 ) }, |
616
|
|
|
|
|
|
|
# stringifies and then just numerifies, but feh. |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
'fallback' => 1, # turn on cleverness |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
'bool', => sub { # true iff there's any true items in it |
621
|
0
|
0
|
|
0
|
|
0
|
for (@{$_[0]}) { return 1 if $_ }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
622
|
0
|
|
|
|
|
0
|
return ''; |
623
|
|
|
|
|
|
|
}, |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
'.=' => sub { # sure, why not. |
626
|
0
|
0
|
|
0
|
|
0
|
if(@{$_[0]}) { $_[0][-1] .= $_[1] } else { push @{$_[0]}, $_[1] } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
627
|
0
|
|
|
|
|
0
|
$_[0]; |
628
|
|
|
|
|
|
|
}, # but can't overload ||= or the like |
629
|
|
|
|
|
|
|
|
630
|
3
|
|
|
3
|
|
6759
|
); |
|
3
|
|
|
|
|
3786
|
|
|
3
|
|
|
|
|
48
|
|
631
|
|
|
|
|
|
|
} |
632
|
|
|
|
|
|
|
########################################################################### |
633
|
|
|
|
|
|
|
1; |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
__END__ |