line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Tie::Hash::Abbrev::BibRefs; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
Tie::Hash::Abbrev::BibRefs - match bibliographic references to the original titles |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
use Tie::Hash::Abbrev::BibRefs; |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
tie my %hash, 'Tie::Hash::Abbrev::BibRefs', |
12
|
|
|
|
|
|
|
preprocess => sub { s/\s+[[:upper:]]:.*// }, |
13
|
|
|
|
|
|
|
stopwords => [ qw( a and de del der des di |
14
|
|
|
|
|
|
|
et for für i if in la las |
15
|
|
|
|
|
|
|
of on part Part Pt. Sect. |
16
|
|
|
|
|
|
|
the to und ) ], |
17
|
|
|
|
|
|
|
exceptions => { jpn => 'japan', |
18
|
|
|
|
|
|
|
natl => 'national' }; |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
$hash{'Physical Review B'} = '0163-1829'; |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
print $hash{'Phys. Rev. B: Condens. Matter Mater. Phys.'}; |
23
|
|
|
|
|
|
|
# will print '0163-1829' |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=head1 DESCRIPTION |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
This module is an attempt to ease the mapping of often abbreviated |
28
|
|
|
|
|
|
|
bibliographical references to the original titles. |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
To achieve this, it simplyfies the title according to parameterizable rules and |
31
|
|
|
|
|
|
|
stores it as a I. |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
When accessing the hash, the key given is also L |
34
|
|
|
|
|
|
|
and compared to the normalized version of the original title. |
35
|
|
|
|
|
|
|
In addition, each word (words are separated by whitespace) may be abbreviated by |
36
|
|
|
|
|
|
|
specifying only the first few letters. |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
If more than one matching hash entry is found, the values of all matching |
39
|
|
|
|
|
|
|
entries are compared; as long as they are all |
40
|
|
|
|
|
|
|
Lual (or all L), the |
41
|
|
|
|
|
|
|
lookup is still considered to be successful. |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=head1 KEY NORMALIZATION |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
The process of normalization is implemented as follows: |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=over 4 |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=item 1. |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
execute any preprocessing code (see L), which is |
52
|
|
|
|
|
|
|
expected to operate on C<$_>. |
53
|
|
|
|
|
|
|
You can use subroutine references or strings here; strings will be |
54
|
|
|
|
|
|
|
L. |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=item 2. |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
split the key into parts (at whitespace). |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=item 3. |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
remove any parts contained in the list of stopwords |
63
|
|
|
|
|
|
|
(see L). |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=item 4. |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
replace any parts contained in the list of exceptions |
68
|
|
|
|
|
|
|
by their corresponding value. |
69
|
|
|
|
|
|
|
If the value is L, the entire part will be removed. |
70
|
|
|
|
|
|
|
(In the L, "Jpn" would be replaced by "japan".) |
71
|
|
|
|
|
|
|
This lookup is done case-insensitively. |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=item 5. |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
remove any non-word characters at the end of each part or followed by a dash |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=back |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=cut |
80
|
|
|
|
|
|
|
|
81
|
1
|
|
|
1
|
|
27468
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
46
|
|
82
|
1
|
|
|
1
|
|
6
|
use vars '$VERSION'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
61
|
|
83
|
|
|
|
|
|
|
|
84
|
1
|
|
|
1
|
|
7
|
use Carp 'croak'; |
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
93
|
|
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
$VERSION = 0.02; |
87
|
|
|
|
|
|
|
|
88
|
1
|
|
|
1
|
|
7
|
use constant DATA => 0; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
59
|
|
89
|
1
|
|
|
1
|
|
7
|
use constant I => 1; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
59
|
|
90
|
1
|
|
|
1
|
|
6
|
use constant PREPROCESS => 2; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
51
|
|
91
|
1
|
|
|
1
|
|
7
|
use constant STOPWORDS => 3; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
84
|
|
92
|
1
|
|
|
1
|
|
7
|
use constant EXCEPTIONS => 4; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
60
|
|
93
|
1
|
|
|
1
|
|
6
|
use constant DEBUG => 5; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
2689
|
|
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
sub TIEHASH { |
96
|
1
|
50
|
|
1
|
|
17
|
croak 'Odd number of arguments.' unless @_ & 1; |
97
|
1
|
|
|
|
|
3
|
my $package = shift; |
98
|
1
|
50
|
|
|
|
5
|
$package = ref $package if length ref $package; |
99
|
1
|
|
|
|
|
4
|
my $self = bless [], $package; |
100
|
1
|
|
|
|
|
7
|
$self->[DATA] = []; |
101
|
1
|
|
|
|
|
4
|
while (@_) { |
102
|
0
|
|
|
|
|
0
|
my ( $option, $argument ) = splice @_, 0, 2; |
103
|
0
|
0
|
|
|
|
0
|
if ( $option eq 'debug' ) { $self->debug($argument) } |
|
0
|
0
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
104
|
0
|
|
|
|
|
0
|
elsif ( $option =~ /^exceptions?\z/ ) { $self->exceptions($argument) } |
105
|
0
|
|
|
|
|
0
|
elsif ( $option eq 'preprocess' ) { $self->preprocess($argument) } |
106
|
|
|
|
|
|
|
elsif ( $option =~ /^stopwords?\z/ ) { |
107
|
0
|
0
|
|
|
|
0
|
$self->stopwords( ref $argument ? @$argument : $argument ); |
108
|
|
|
|
|
|
|
} |
109
|
0
|
|
|
|
|
0
|
else { croak qq(Unknown TIEHASH option "$option"!) } |
110
|
|
|
|
|
|
|
} |
111
|
1
|
|
|
|
|
5
|
$self; |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
sub FETCH { |
115
|
0
|
|
|
0
|
|
0
|
my ( $self, $key ) = @_; |
116
|
0
|
0
|
|
|
|
0
|
if ( defined( my $found = $self->find($key) ) ) { $self->[DATA][$found] } |
|
0
|
|
|
|
|
0
|
|
117
|
0
|
|
|
|
|
0
|
else { undef } |
118
|
|
|
|
|
|
|
} |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
sub STORE { |
121
|
0
|
|
|
0
|
|
0
|
my ( $self, $key, $value ) = @_; |
122
|
0
|
0
|
|
|
|
0
|
if ( |
123
|
|
|
|
|
|
|
defined $self->exact( |
124
|
|
|
|
|
|
|
$key, my $pos = $self->pos( my $normkey = $self->normalize($key) ) |
125
|
|
|
|
|
|
|
) |
126
|
|
|
|
|
|
|
) |
127
|
|
|
|
|
|
|
{ |
128
|
0
|
|
|
|
|
0
|
$self->[DATA][ $pos + 1 ] = $value; |
129
|
|
|
|
|
|
|
} |
130
|
0
|
|
|
|
|
0
|
else { splice @{ $self->[DATA] }, $pos, 0, $normkey, $value, $key } |
|
0
|
|
|
|
|
0
|
|
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
sub EXISTS { |
134
|
0
|
|
|
0
|
|
0
|
my ( $self, $key ) = @_; |
135
|
0
|
0
|
|
|
|
0
|
if ( defined $self->find($key) ) { 1 } |
|
0
|
|
|
|
|
0
|
|
136
|
0
|
|
|
|
|
0
|
else { '' } |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
sub DELETE { |
140
|
0
|
|
|
0
|
|
0
|
my ( $self, $key ) = @_; |
141
|
0
|
|
|
|
|
0
|
my $pos = $self->pos( my $normkey = $self->normalize($key) ); |
142
|
0
|
0
|
|
|
|
0
|
if ( defined $self->exact( $key, $pos ) ) { |
143
|
0
|
|
|
|
|
0
|
( undef, my $value ) = splice @{ $self->[DATA] }, $pos, 3; |
|
0
|
|
|
|
|
0
|
|
144
|
0
|
|
|
|
|
0
|
$self->startover; |
145
|
0
|
|
|
|
|
0
|
$value; |
146
|
|
|
|
|
|
|
} |
147
|
0
|
|
|
|
|
0
|
else { undef } |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
sub CLEAR { |
151
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
152
|
0
|
|
|
|
|
0
|
$self->startover; |
153
|
0
|
|
|
|
|
0
|
@{ $self->[DATA] } = (); |
|
0
|
|
|
|
|
0
|
|
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
sub FIRSTKEY { |
157
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
158
|
0
|
0
|
|
|
|
0
|
return undef unless @{ $self->[DATA] }; |
|
0
|
|
|
|
|
0
|
|
159
|
0
|
|
|
|
|
0
|
$self->[ $self->[I] = 2 ]; |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
sub NEXTKEY { |
163
|
0
|
|
|
0
|
|
0
|
my ( $self, $lastkey ) = @_; |
164
|
0
|
0
|
|
|
|
0
|
if ( ( my $i = $self->[I] += 3 ) <= $#{ $self->[DATA] } ) { |
|
0
|
|
|
|
|
0
|
|
165
|
0
|
|
|
|
|
0
|
$self->[DATA][$i]; |
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
else { |
168
|
0
|
|
|
|
|
0
|
$self->startover; |
169
|
0
|
|
|
|
|
0
|
undef; |
170
|
|
|
|
|
|
|
} |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
0
|
|
|
0
|
|
0
|
sub UNTIE { } |
174
|
|
|
|
|
|
|
|
175
|
1
|
|
|
1
|
|
853
|
sub DESTROY { shift->startover } |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=head1 ADDITIONAL METHODS |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=head2 debug |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
turn debug mode on (when given a true value as argument) or off |
182
|
|
|
|
|
|
|
(when given a false value). |
183
|
|
|
|
|
|
|
Returns the (possibly new) value. |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
In debug mode, the L method will print debug messages to STDERR. |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
=cut |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
sub debug { |
190
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
191
|
0
|
0
|
|
|
|
0
|
$self->[DEBUG] = shift if @_; |
192
|
0
|
|
|
|
|
0
|
$self->[DEBUG]; |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=head2 delete_abbrev |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
my @deleted = tied(%hash)->delete_abbrev('foo','bar'); |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
Will delete all elements on the basis of all unambiguous abbreviations given as |
200
|
|
|
|
|
|
|
arguments and return a (possibly empty) list of all deleted values. |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
=cut |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
sub delete_abbrev { |
205
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
206
|
0
|
|
|
|
|
0
|
my @deleted; |
207
|
0
|
|
|
|
|
0
|
for (@_) { |
208
|
|
|
|
|
|
|
next |
209
|
|
|
|
|
|
|
unless |
210
|
0
|
0
|
|
|
|
0
|
defined( my $pos1 = $self->valid( $_, my $pos = $self->pos($_) ) ); |
211
|
0
|
|
|
|
|
0
|
my $i = 0; |
212
|
0
|
|
|
|
|
0
|
push @deleted, grep $i++ & 1, splice @{ $self->[DATA] }, $pos, |
|
0
|
|
|
|
|
0
|
|
213
|
|
|
|
|
|
|
3 + $pos1 - $pos; |
214
|
|
|
|
|
|
|
} |
215
|
0
|
0
|
|
|
|
0
|
$self->startover if @deleted; |
216
|
0
|
|
|
|
|
0
|
@deleted; |
217
|
|
|
|
|
|
|
} |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
=head2 exceptions |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
get or set the exceptions table for the hash. |
222
|
|
|
|
|
|
|
Expects hash references or L, which clears the table. |
223
|
|
|
|
|
|
|
Returns a reference to the new exception table. |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
=cut |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
sub exceptions { |
228
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
229
|
0
|
|
|
|
|
0
|
for (@_) { |
230
|
0
|
0
|
|
|
|
0
|
if (defined) { |
231
|
0
|
|
|
|
|
0
|
while ( my ( $k, $v ) = each %$_ ) { |
232
|
0
|
|
|
|
|
0
|
$self->[EXCEPTIONS]{ lc $k } = lc $v; |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
} |
235
|
0
|
|
|
|
|
0
|
else { $self->[EXCEPTIONS] = {} } |
236
|
|
|
|
|
|
|
} |
237
|
0
|
0
|
|
|
|
0
|
$self->[EXCEPTIONS] || {}; |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
=head2 preprocess |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
set up the preprocessing code chain for the hash. |
243
|
|
|
|
|
|
|
Any code references or strings will be added to the chain, |
244
|
|
|
|
|
|
|
an L will clear the chain. |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
=cut |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
sub preprocess { |
249
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
250
|
0
|
|
|
|
|
0
|
for (@_) { |
251
|
0
|
0
|
|
|
|
0
|
if (defined) { push @{ $self->[PREPROCESS] }, $_ } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
252
|
0
|
|
|
|
|
0
|
else { @{ $self->[PREPROCESS] } = [] } |
|
0
|
|
|
|
|
0
|
|
253
|
|
|
|
|
|
|
} |
254
|
0
|
0
|
|
|
|
0
|
@{ $self->[PREPROCESS] || [] }; |
|
0
|
|
|
|
|
0
|
|
255
|
|
|
|
|
|
|
} |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
=head2 stopwords |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
get or set the /stopwords for the hash. |
260
|
|
|
|
|
|
|
Any arguments given will be added to the list of stopwords. |
261
|
|
|
|
|
|
|
An L> as argument will clear the list of stopwords. |
262
|
|
|
|
|
|
|
The method returns the new list of stopwords (in an unsorted manner). |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
=cut |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
sub stopwords { |
267
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
268
|
0
|
|
|
|
|
0
|
for (@_) { |
269
|
0
|
0
|
|
|
|
0
|
if (defined) { $self->[STOPWORDS]{$_} = undef } |
|
0
|
|
|
|
|
0
|
|
270
|
0
|
|
|
|
|
0
|
else { $self->[STOPWORDS] = {} } |
271
|
|
|
|
|
|
|
} |
272
|
0
|
0
|
|
|
|
0
|
keys %{ $self->[STOPWORDS] || {} }; |
|
0
|
|
|
|
|
0
|
|
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
=head1 INTERNAL METHODS |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
The following methods should usually not be called "from the outside"; |
278
|
|
|
|
|
|
|
the main intention of ducumenting them is that the author still wants to |
279
|
|
|
|
|
|
|
understand his own module in case changes will be neccessary later. :o) |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
=head2 exact |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
expects a key as first and a L as second argument. |
284
|
|
|
|
|
|
|
Returns the position if the given key equals (case-insensitively) the real key |
285
|
|
|
|
|
|
|
stored at that position or undef if not. |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
=cut |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
sub exact { |
290
|
0
|
|
|
0
|
1
|
0
|
my ( $self, $key, $pos ) = @_; |
291
|
0
|
0
|
0
|
|
|
0
|
if ( $pos < $#{ $self->[DATA] } && lc $self->[DATA][ $pos + 2 ] eq lc $key ) |
|
0
|
|
|
|
|
0
|
|
292
|
|
|
|
|
|
|
{ |
293
|
0
|
|
|
|
|
0
|
$pos; |
294
|
|
|
|
|
|
|
} |
295
|
0
|
|
|
|
|
0
|
else { undef } |
296
|
|
|
|
|
|
|
} |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
=head2 find |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
This is the central method for lookups, used by L and |
301
|
|
|
|
|
|
|
C. |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
It expects a key as its only argument. |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
Upon success, the method returns an array index at which the corresponding value |
306
|
|
|
|
|
|
|
can be found, or undef otherwise. |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
=cut |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
sub find { |
311
|
0
|
|
|
0
|
1
|
0
|
my ( $self, $key ) = @_; |
312
|
0
|
|
|
|
|
0
|
my $debug = $self->debug; |
313
|
0
|
|
|
|
|
0
|
my ( $prefix, $pattern, $normkey ) = $self->normalize($key); |
314
|
0
|
0
|
|
|
|
0
|
print STDERR <<_ if $debug; |
315
|
|
|
|
|
|
|
-------------------------------------------------------------------------------- |
316
|
|
|
|
|
|
|
Key: <$key> |
317
|
|
|
|
|
|
|
Prefix: <$prefix> |
318
|
|
|
|
|
|
|
Pattern: <$pattern> |
319
|
|
|
|
|
|
|
NormKey: <$normkey> |
320
|
|
|
|
|
|
|
_ |
321
|
0
|
0
|
|
|
|
0
|
defined( my $pos = $self->pos($prefix) ) or return undef; |
322
|
0
|
|
|
|
|
0
|
my $data = $self->[DATA]; |
323
|
0
|
0
|
|
|
|
0
|
print STDERR 'Starting search at entry #' |
|
|
0
|
|
|
|
|
|
324
|
|
|
|
|
|
|
. ( $pos / 3 ) |
325
|
|
|
|
|
|
|
. ( |
326
|
|
|
|
|
|
|
$pos ? qq(; the key before that would be: "$data->[$pos-3]"\n) : ".\n" ) |
327
|
|
|
|
|
|
|
if $debug; |
328
|
0
|
|
|
|
|
0
|
my $found; |
329
|
0
|
|
0
|
|
|
0
|
do { |
330
|
0
|
0
|
|
|
|
0
|
print STDERR 'Examining entry #' |
331
|
|
|
|
|
|
|
. ( $pos / 3 ) |
332
|
|
|
|
|
|
|
. qq(: "$data->[$pos]"... ) |
333
|
|
|
|
|
|
|
if $debug; |
334
|
0
|
0
|
|
|
|
0
|
if ( $data->[$pos] =~ $pattern ) { |
335
|
0
|
0
|
|
|
|
0
|
if ( lc $data->[ $pos + 2 ] eq lc $key ) { |
336
|
0
|
0
|
|
|
|
0
|
print STDERR "exact match.\n" if $debug; |
337
|
0
|
|
|
|
|
0
|
return $pos + 1; |
338
|
|
|
|
|
|
|
} |
339
|
0
|
0
|
0
|
|
|
0
|
unless ( defined $found ) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
340
|
0
|
|
|
|
|
0
|
$found = $pos + 1; |
341
|
0
|
0
|
|
|
|
0
|
print STDERR qq( matches, value: "$data->[$found]"\n) |
342
|
|
|
|
|
|
|
if $debug; |
343
|
|
|
|
|
|
|
} |
344
|
|
|
|
|
|
|
elsif ( |
345
|
|
|
|
|
|
|
defined $data->[$found] |
346
|
|
|
|
|
|
|
? !defined $data->[ $pos + 1 ] |
347
|
|
|
|
|
|
|
|| $data->[ $pos + 1 ] ne $data->[$found] |
348
|
|
|
|
|
|
|
: defined $data->[ $pos + 1 ] |
349
|
|
|
|
|
|
|
) |
350
|
|
|
|
|
|
|
{ |
351
|
0
|
0
|
|
|
|
0
|
print STDERR |
352
|
|
|
|
|
|
|
qq( also matches, but has a different value: "$data->[$pos+1]"\n) |
353
|
|
|
|
|
|
|
if $debug; |
354
|
0
|
|
|
|
|
0
|
return; |
355
|
|
|
|
|
|
|
} |
356
|
|
|
|
|
|
|
} |
357
|
0
|
0
|
|
|
|
0
|
else { print STDERR "does not match.\n" if $debug } |
358
|
|
|
|
|
|
|
} while ( $pos += 3 ) < $#$data |
359
|
|
|
|
|
|
|
&& $prefix eq substr $data->[$pos], 0, length $prefix; |
360
|
0
|
0
|
|
|
|
0
|
print STDERR $pos > $#$data ? "Last element reached.\n" |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
361
|
|
|
|
|
|
|
: qq("$data->[$pos]" has a different prefix.\n), |
362
|
|
|
|
|
|
|
defined $found ? "Search was successful.\n" |
363
|
|
|
|
|
|
|
: "Search was NOT successful.\n" |
364
|
|
|
|
|
|
|
if $debug; |
365
|
0
|
|
|
|
|
0
|
$found; |
366
|
|
|
|
|
|
|
} |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
=head2 normalize |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
Given a key as the its only argument, |
371
|
|
|
|
|
|
|
this method will return the normalized key in scalar |
372
|
|
|
|
|
|
|
and a three element list in array context, consisting of |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
=over 4 |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
=item 0. |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
the L |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
=item 1. |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
the L"search pattern"> and |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
=item 2. |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
the L"normalized key">. |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
=back |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
=cut |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
sub normalize { |
393
|
0
|
|
|
0
|
1
|
0
|
my ( $self, $key ) = @_; |
394
|
0
|
|
|
|
|
0
|
my ( $exceptions, $stopwords ) = @{$self}[ EXCEPTIONS, STOPWORDS ]; |
|
0
|
|
|
|
|
0
|
|
395
|
0
|
|
|
|
|
0
|
local $_ = $key; |
396
|
0
|
|
|
|
|
0
|
for my $pp ( $self->preprocess ) { |
397
|
0
|
0
|
|
|
|
0
|
if ( ref $pp ) { &$pp } |
|
0
|
|
|
|
|
0
|
|
398
|
0
|
|
|
|
|
0
|
else { eval $pp } |
399
|
|
|
|
|
|
|
} |
400
|
|
|
|
|
|
|
( |
401
|
0
|
0
|
|
|
|
0
|
my $normkey = |
|
|
0
|
|
|
|
|
|
402
|
|
|
|
|
|
|
join ' ', |
403
|
|
|
|
|
|
|
map exists $exceptions->{ +lc } |
404
|
|
|
|
|
|
|
? defined $exceptions->{ +lc } ? $exceptions->{ +lc } : () |
405
|
|
|
|
|
|
|
: lc, |
406
|
|
|
|
|
|
|
grep !exists $stopwords->{$_}, |
407
|
|
|
|
|
|
|
split /\s+|-/ |
408
|
|
|
|
|
|
|
) =~ s/\W+(?=\s|-|$)//g; |
409
|
0
|
0
|
|
|
|
0
|
return $normkey unless wantarray; |
410
|
0
|
|
|
|
|
0
|
my ($prefix) = $normkey =~ /^([^\s-]*)/; |
411
|
0
|
|
|
|
|
0
|
my $pattern = '^' |
412
|
|
|
|
|
|
|
. join ( ' ', map quotemeta() . '\S*', split /\s+|-/, $normkey ) . '$'; |
413
|
0
|
0
|
|
|
|
0
|
$prefix, $] < 5.006 ? $pattern : eval 'qr/$pattern/', $normkey; |
414
|
|
|
|
|
|
|
} |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
=head2 pos |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
expects an (usually L) key as (its only) argument |
419
|
|
|
|
|
|
|
and returns the position at which this key is stored (if it exists) |
420
|
|
|
|
|
|
|
or should be sorted (if it does not already exist). |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
=cut |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
sub pos { |
425
|
0
|
|
|
0
|
1
|
0
|
my ( $self, $key ) = @_; |
426
|
0
|
|
|
|
|
0
|
my $data = $self->[DATA]; |
427
|
0
|
|
|
|
|
0
|
my $a = 0; |
428
|
0
|
|
|
|
|
0
|
my $b = @$data; |
429
|
0
|
|
0
|
|
|
0
|
while ( $a < $b && $a < $#$data ) { # perform a binary search |
430
|
0
|
0
|
|
|
|
0
|
if ( $data->[ my $c = 3 * int +( $a + $b >> 1 ) / 3 ] lt $key ) { |
431
|
0
|
|
|
|
|
0
|
$a = $c + 3; |
432
|
|
|
|
|
|
|
} |
433
|
0
|
|
|
|
|
0
|
else { $b = $c } |
434
|
|
|
|
|
|
|
} |
435
|
0
|
|
|
|
|
0
|
$a; |
436
|
|
|
|
|
|
|
} |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
=head2 startover |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
expects no arguments and simply resets the iterator for the hash, |
441
|
|
|
|
|
|
|
so that the next call to L will return the first key/value |
442
|
|
|
|
|
|
|
pair again. |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
=cut |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
sub startover { |
447
|
1
|
|
|
1
|
1
|
3
|
my ($self) = @_; |
448
|
1
|
|
|
|
|
99
|
$self->[I] = undef; |
449
|
|
|
|
|
|
|
} |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
=head1 BUGS |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
None known so far. |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
=head1 AUTHOR |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
Martin H. Sluka |
458
|
|
|
|
|
|
|
mailto:martin@sluka.de |
459
|
|
|
|
|
|
|
http://martin.sluka.de/ |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=head1 THANKS TO |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
Dr. Hermann Schier from the Max Planck Institute for Solid State Research |
464
|
|
|
|
|
|
|
in Stuttgart/Germany for initiating and underwriting the development of this |
465
|
|
|
|
|
|
|
module and for contribution a lot of ideas. |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
=head1 COPYRIGHT |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
This program is free software; you can redistribute |
470
|
|
|
|
|
|
|
it and/or modify it under the same terms as Perl itself. |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
The full text of the license can be found in the |
473
|
|
|
|
|
|
|
LICENSE file included with this module. |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
=head1 SEE ALSO |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
L |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
=cut |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
1 |