| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Tie::Hash::Abbrev::BibRefs; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
Tie::Hash::Abbrev::BibRefs - match bibliographic references to the original titles |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
use Tie::Hash::Abbrev::BibRefs; |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
tie my %hash, 'Tie::Hash::Abbrev::BibRefs', |
|
12
|
|
|
|
|
|
|
preprocess => sub { s/\s+[[:upper:]]:.*// }, |
|
13
|
|
|
|
|
|
|
stopwords => [ qw( a and de del der des di |
|
14
|
|
|
|
|
|
|
et for für i if in la las |
|
15
|
|
|
|
|
|
|
of on part Part Pt. Sect. |
|
16
|
|
|
|
|
|
|
the to und ) ], |
|
17
|
|
|
|
|
|
|
exceptions => { jpn => 'japan', |
|
18
|
|
|
|
|
|
|
natl => 'national' }; |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
$hash{'Physical Review B'} = '0163-1829'; |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
print $hash{'Phys. Rev. B: Condens. Matter Mater. Phys.'}; |
|
23
|
|
|
|
|
|
|
# will print '0163-1829' |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
This module is an attempt to ease the mapping of often abbreviated |
|
28
|
|
|
|
|
|
|
bibliographical references to the original titles. |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
To achieve this, it simplyfies the title according to parameterizable rules and |
|
31
|
|
|
|
|
|
|
stores it as a I. |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
When accessing the hash, the key given is also L |
|
34
|
|
|
|
|
|
|
and compared to the normalized version of the original title. |
|
35
|
|
|
|
|
|
|
In addition, each word (words are separated by whitespace) may be abbreviated by |
|
36
|
|
|
|
|
|
|
specifying only the first few letters. |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
If more than one matching hash entry is found, the values of all matching |
|
39
|
|
|
|
|
|
|
entries are compared; as long as they are all |
|
40
|
|
|
|
|
|
|
Lual (or all L), the |
|
41
|
|
|
|
|
|
|
lookup is still considered to be successful. |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=head1 KEY NORMALIZATION |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
The process of normalization is implemented as follows: |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=over 4 |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=item 1. |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
execute any preprocessing code (see L), which is |
|
52
|
|
|
|
|
|
|
expected to operate on C<$_>. |
|
53
|
|
|
|
|
|
|
You can use subroutine references or strings here; strings will be |
|
54
|
|
|
|
|
|
|
L. |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=item 2. |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
split the key into parts (at whitespace). |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=item 3. |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
remove any parts contained in the list of stopwords |
|
63
|
|
|
|
|
|
|
(see L). |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=item 4. |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
replace any parts contained in the list of exceptions |
|
68
|
|
|
|
|
|
|
by their corresponding value. |
|
69
|
|
|
|
|
|
|
If the value is L, the entire part will be removed. |
|
70
|
|
|
|
|
|
|
(In the L, "Jpn" would be replaced by "japan".) |
|
71
|
|
|
|
|
|
|
This lookup is done case-insensitively. |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=item 5. |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
remove any non-word characters at the end of each part or followed by a dash |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=back |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=cut |
|
80
|
|
|
|
|
|
|
|
|
81
|
1
|
|
|
1
|
|
27468
|
use strict; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
46
|
|
|
82
|
1
|
|
|
1
|
|
6
|
use vars '$VERSION'; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
61
|
|
|
83
|
|
|
|
|
|
|
|
|
84
|
1
|
|
|
1
|
|
7
|
use Carp 'croak'; |
|
|
1
|
|
|
|
|
7
|
|
|
|
1
|
|
|
|
|
93
|
|
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
$VERSION = 0.02; |
|
87
|
|
|
|
|
|
|
|
|
88
|
1
|
|
|
1
|
|
7
|
use constant DATA => 0; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
59
|
|
|
89
|
1
|
|
|
1
|
|
7
|
use constant I => 1; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
59
|
|
|
90
|
1
|
|
|
1
|
|
6
|
use constant PREPROCESS => 2; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
51
|
|
|
91
|
1
|
|
|
1
|
|
7
|
use constant STOPWORDS => 3; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
84
|
|
|
92
|
1
|
|
|
1
|
|
7
|
use constant EXCEPTIONS => 4; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
60
|
|
|
93
|
1
|
|
|
1
|
|
6
|
use constant DEBUG => 5; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
2689
|
|
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
sub TIEHASH { |
|
96
|
1
|
50
|
|
1
|
|
17
|
croak 'Odd number of arguments.' unless @_ & 1; |
|
97
|
1
|
|
|
|
|
3
|
my $package = shift; |
|
98
|
1
|
50
|
|
|
|
5
|
$package = ref $package if length ref $package; |
|
99
|
1
|
|
|
|
|
4
|
my $self = bless [], $package; |
|
100
|
1
|
|
|
|
|
7
|
$self->[DATA] = []; |
|
101
|
1
|
|
|
|
|
4
|
while (@_) { |
|
102
|
0
|
|
|
|
|
0
|
my ( $option, $argument ) = splice @_, 0, 2; |
|
103
|
0
|
0
|
|
|
|
0
|
if ( $option eq 'debug' ) { $self->debug($argument) } |
|
|
0
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
104
|
0
|
|
|
|
|
0
|
elsif ( $option =~ /^exceptions?\z/ ) { $self->exceptions($argument) } |
|
105
|
0
|
|
|
|
|
0
|
elsif ( $option eq 'preprocess' ) { $self->preprocess($argument) } |
|
106
|
|
|
|
|
|
|
elsif ( $option =~ /^stopwords?\z/ ) { |
|
107
|
0
|
0
|
|
|
|
0
|
$self->stopwords( ref $argument ? @$argument : $argument ); |
|
108
|
|
|
|
|
|
|
} |
|
109
|
0
|
|
|
|
|
0
|
else { croak qq(Unknown TIEHASH option "$option"!) } |
|
110
|
|
|
|
|
|
|
} |
|
111
|
1
|
|
|
|
|
5
|
$self; |
|
112
|
|
|
|
|
|
|
} |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
sub FETCH { |
|
115
|
0
|
|
|
0
|
|
0
|
my ( $self, $key ) = @_; |
|
116
|
0
|
0
|
|
|
|
0
|
if ( defined( my $found = $self->find($key) ) ) { $self->[DATA][$found] } |
|
|
0
|
|
|
|
|
0
|
|
|
117
|
0
|
|
|
|
|
0
|
else { undef } |
|
118
|
|
|
|
|
|
|
} |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
sub STORE { |
|
121
|
0
|
|
|
0
|
|
0
|
my ( $self, $key, $value ) = @_; |
|
122
|
0
|
0
|
|
|
|
0
|
if ( |
|
123
|
|
|
|
|
|
|
defined $self->exact( |
|
124
|
|
|
|
|
|
|
$key, my $pos = $self->pos( my $normkey = $self->normalize($key) ) |
|
125
|
|
|
|
|
|
|
) |
|
126
|
|
|
|
|
|
|
) |
|
127
|
|
|
|
|
|
|
{ |
|
128
|
0
|
|
|
|
|
0
|
$self->[DATA][ $pos + 1 ] = $value; |
|
129
|
|
|
|
|
|
|
} |
|
130
|
0
|
|
|
|
|
0
|
else { splice @{ $self->[DATA] }, $pos, 0, $normkey, $value, $key } |
|
|
0
|
|
|
|
|
0
|
|
|
131
|
|
|
|
|
|
|
} |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
sub EXISTS { |
|
134
|
0
|
|
|
0
|
|
0
|
my ( $self, $key ) = @_; |
|
135
|
0
|
0
|
|
|
|
0
|
if ( defined $self->find($key) ) { 1 } |
|
|
0
|
|
|
|
|
0
|
|
|
136
|
0
|
|
|
|
|
0
|
else { '' } |
|
137
|
|
|
|
|
|
|
} |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
sub DELETE { |
|
140
|
0
|
|
|
0
|
|
0
|
my ( $self, $key ) = @_; |
|
141
|
0
|
|
|
|
|
0
|
my $pos = $self->pos( my $normkey = $self->normalize($key) ); |
|
142
|
0
|
0
|
|
|
|
0
|
if ( defined $self->exact( $key, $pos ) ) { |
|
143
|
0
|
|
|
|
|
0
|
( undef, my $value ) = splice @{ $self->[DATA] }, $pos, 3; |
|
|
0
|
|
|
|
|
0
|
|
|
144
|
0
|
|
|
|
|
0
|
$self->startover; |
|
145
|
0
|
|
|
|
|
0
|
$value; |
|
146
|
|
|
|
|
|
|
} |
|
147
|
0
|
|
|
|
|
0
|
else { undef } |
|
148
|
|
|
|
|
|
|
} |
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
sub CLEAR { |
|
151
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
|
152
|
0
|
|
|
|
|
0
|
$self->startover; |
|
153
|
0
|
|
|
|
|
0
|
@{ $self->[DATA] } = (); |
|
|
0
|
|
|
|
|
0
|
|
|
154
|
|
|
|
|
|
|
} |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
sub FIRSTKEY { |
|
157
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
|
158
|
0
|
0
|
|
|
|
0
|
return undef unless @{ $self->[DATA] }; |
|
|
0
|
|
|
|
|
0
|
|
|
159
|
0
|
|
|
|
|
0
|
$self->[ $self->[I] = 2 ]; |
|
160
|
|
|
|
|
|
|
} |
|
161
|
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
sub NEXTKEY { |
|
163
|
0
|
|
|
0
|
|
0
|
my ( $self, $lastkey ) = @_; |
|
164
|
0
|
0
|
|
|
|
0
|
if ( ( my $i = $self->[I] += 3 ) <= $#{ $self->[DATA] } ) { |
|
|
0
|
|
|
|
|
0
|
|
|
165
|
0
|
|
|
|
|
0
|
$self->[DATA][$i]; |
|
166
|
|
|
|
|
|
|
} |
|
167
|
|
|
|
|
|
|
else { |
|
168
|
0
|
|
|
|
|
0
|
$self->startover; |
|
169
|
0
|
|
|
|
|
0
|
undef; |
|
170
|
|
|
|
|
|
|
} |
|
171
|
|
|
|
|
|
|
} |
|
172
|
|
|
|
|
|
|
|
|
173
|
0
|
|
|
0
|
|
0
|
sub UNTIE { } |
|
174
|
|
|
|
|
|
|
|
|
175
|
1
|
|
|
1
|
|
853
|
sub DESTROY { shift->startover } |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=head1 ADDITIONAL METHODS |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=head2 debug |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
turn debug mode on (when given a true value as argument) or off |
|
182
|
|
|
|
|
|
|
(when given a false value). |
|
183
|
|
|
|
|
|
|
Returns the (possibly new) value. |
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
In debug mode, the L method will print debug messages to STDERR. |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
=cut |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
sub debug { |
|
190
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
191
|
0
|
0
|
|
|
|
0
|
$self->[DEBUG] = shift if @_; |
|
192
|
0
|
|
|
|
|
0
|
$self->[DEBUG]; |
|
193
|
|
|
|
|
|
|
} |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=head2 delete_abbrev |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
my @deleted = tied(%hash)->delete_abbrev('foo','bar'); |
|
198
|
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
Will delete all elements on the basis of all unambiguous abbreviations given as |
|
200
|
|
|
|
|
|
|
arguments and return a (possibly empty) list of all deleted values. |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
=cut |
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
sub delete_abbrev { |
|
205
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
206
|
0
|
|
|
|
|
0
|
my @deleted; |
|
207
|
0
|
|
|
|
|
0
|
for (@_) { |
|
208
|
|
|
|
|
|
|
next |
|
209
|
|
|
|
|
|
|
unless |
|
210
|
0
|
0
|
|
|
|
0
|
defined( my $pos1 = $self->valid( $_, my $pos = $self->pos($_) ) ); |
|
211
|
0
|
|
|
|
|
0
|
my $i = 0; |
|
212
|
0
|
|
|
|
|
0
|
push @deleted, grep $i++ & 1, splice @{ $self->[DATA] }, $pos, |
|
|
0
|
|
|
|
|
0
|
|
|
213
|
|
|
|
|
|
|
3 + $pos1 - $pos; |
|
214
|
|
|
|
|
|
|
} |
|
215
|
0
|
0
|
|
|
|
0
|
$self->startover if @deleted; |
|
216
|
0
|
|
|
|
|
0
|
@deleted; |
|
217
|
|
|
|
|
|
|
} |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
=head2 exceptions |
|
220
|
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
get or set the exceptions table for the hash. |
|
222
|
|
|
|
|
|
|
Expects hash references or L, which clears the table. |
|
223
|
|
|
|
|
|
|
Returns a reference to the new exception table. |
|
224
|
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
=cut |
|
226
|
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
sub exceptions { |
|
228
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
229
|
0
|
|
|
|
|
0
|
for (@_) { |
|
230
|
0
|
0
|
|
|
|
0
|
if (defined) { |
|
231
|
0
|
|
|
|
|
0
|
while ( my ( $k, $v ) = each %$_ ) { |
|
232
|
0
|
|
|
|
|
0
|
$self->[EXCEPTIONS]{ lc $k } = lc $v; |
|
233
|
|
|
|
|
|
|
} |
|
234
|
|
|
|
|
|
|
} |
|
235
|
0
|
|
|
|
|
0
|
else { $self->[EXCEPTIONS] = {} } |
|
236
|
|
|
|
|
|
|
} |
|
237
|
0
|
0
|
|
|
|
0
|
$self->[EXCEPTIONS] || {}; |
|
238
|
|
|
|
|
|
|
} |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
=head2 preprocess |
|
241
|
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
set up the preprocessing code chain for the hash. |
|
243
|
|
|
|
|
|
|
Any code references or strings will be added to the chain, |
|
244
|
|
|
|
|
|
|
an L will clear the chain. |
|
245
|
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
=cut |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
sub preprocess { |
|
249
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
250
|
0
|
|
|
|
|
0
|
for (@_) { |
|
251
|
0
|
0
|
|
|
|
0
|
if (defined) { push @{ $self->[PREPROCESS] }, $_ } |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
252
|
0
|
|
|
|
|
0
|
else { @{ $self->[PREPROCESS] } = [] } |
|
|
0
|
|
|
|
|
0
|
|
|
253
|
|
|
|
|
|
|
} |
|
254
|
0
|
0
|
|
|
|
0
|
@{ $self->[PREPROCESS] || [] }; |
|
|
0
|
|
|
|
|
0
|
|
|
255
|
|
|
|
|
|
|
} |
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
=head2 stopwords |
|
258
|
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
get or set the /stopwords for the hash. |
|
260
|
|
|
|
|
|
|
Any arguments given will be added to the list of stopwords. |
|
261
|
|
|
|
|
|
|
An L> as argument will clear the list of stopwords. |
|
262
|
|
|
|
|
|
|
The method returns the new list of stopwords (in an unsorted manner). |
|
263
|
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
=cut |
|
265
|
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
sub stopwords { |
|
267
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
268
|
0
|
|
|
|
|
0
|
for (@_) { |
|
269
|
0
|
0
|
|
|
|
0
|
if (defined) { $self->[STOPWORDS]{$_} = undef } |
|
|
0
|
|
|
|
|
0
|
|
|
270
|
0
|
|
|
|
|
0
|
else { $self->[STOPWORDS] = {} } |
|
271
|
|
|
|
|
|
|
} |
|
272
|
0
|
0
|
|
|
|
0
|
keys %{ $self->[STOPWORDS] || {} }; |
|
|
0
|
|
|
|
|
0
|
|
|
273
|
|
|
|
|
|
|
} |
|
274
|
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
=head1 INTERNAL METHODS |
|
276
|
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
The following methods should usually not be called "from the outside"; |
|
278
|
|
|
|
|
|
|
the main intention of ducumenting them is that the author still wants to |
|
279
|
|
|
|
|
|
|
understand his own module in case changes will be neccessary later. :o) |
|
280
|
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
=head2 exact |
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
expects a key as first and a L as second argument. |
|
284
|
|
|
|
|
|
|
Returns the position if the given key equals (case-insensitively) the real key |
|
285
|
|
|
|
|
|
|
stored at that position or undef if not. |
|
286
|
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
=cut |
|
288
|
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
sub exact { |
|
290
|
0
|
|
|
0
|
1
|
0
|
my ( $self, $key, $pos ) = @_; |
|
291
|
0
|
0
|
0
|
|
|
0
|
if ( $pos < $#{ $self->[DATA] } && lc $self->[DATA][ $pos + 2 ] eq lc $key ) |
|
|
0
|
|
|
|
|
0
|
|
|
292
|
|
|
|
|
|
|
{ |
|
293
|
0
|
|
|
|
|
0
|
$pos; |
|
294
|
|
|
|
|
|
|
} |
|
295
|
0
|
|
|
|
|
0
|
else { undef } |
|
296
|
|
|
|
|
|
|
} |
|
297
|
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
=head2 find |
|
299
|
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
This is the central method for lookups, used by L and |
|
301
|
|
|
|
|
|
|
C. |
|
302
|
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
It expects a key as its only argument. |
|
304
|
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
Upon success, the method returns an array index at which the corresponding value |
|
306
|
|
|
|
|
|
|
can be found, or undef otherwise. |
|
307
|
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
=cut |
|
309
|
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
sub find { |
|
311
|
0
|
|
|
0
|
1
|
0
|
my ( $self, $key ) = @_; |
|
312
|
0
|
|
|
|
|
0
|
my $debug = $self->debug; |
|
313
|
0
|
|
|
|
|
0
|
my ( $prefix, $pattern, $normkey ) = $self->normalize($key); |
|
314
|
0
|
0
|
|
|
|
0
|
print STDERR <<_ if $debug; |
|
315
|
|
|
|
|
|
|
-------------------------------------------------------------------------------- |
|
316
|
|
|
|
|
|
|
Key: <$key> |
|
317
|
|
|
|
|
|
|
Prefix: <$prefix> |
|
318
|
|
|
|
|
|
|
Pattern: <$pattern> |
|
319
|
|
|
|
|
|
|
NormKey: <$normkey> |
|
320
|
|
|
|
|
|
|
_ |
|
321
|
0
|
0
|
|
|
|
0
|
defined( my $pos = $self->pos($prefix) ) or return undef; |
|
322
|
0
|
|
|
|
|
0
|
my $data = $self->[DATA]; |
|
323
|
0
|
0
|
|
|
|
0
|
print STDERR 'Starting search at entry #' |
|
|
|
0
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
. ( $pos / 3 ) |
|
325
|
|
|
|
|
|
|
. ( |
|
326
|
|
|
|
|
|
|
$pos ? qq(; the key before that would be: "$data->[$pos-3]"\n) : ".\n" ) |
|
327
|
|
|
|
|
|
|
if $debug; |
|
328
|
0
|
|
|
|
|
0
|
my $found; |
|
329
|
0
|
|
0
|
|
|
0
|
do { |
|
330
|
0
|
0
|
|
|
|
0
|
print STDERR 'Examining entry #' |
|
331
|
|
|
|
|
|
|
. ( $pos / 3 ) |
|
332
|
|
|
|
|
|
|
. qq(: "$data->[$pos]"... ) |
|
333
|
|
|
|
|
|
|
if $debug; |
|
334
|
0
|
0
|
|
|
|
0
|
if ( $data->[$pos] =~ $pattern ) { |
|
335
|
0
|
0
|
|
|
|
0
|
if ( lc $data->[ $pos + 2 ] eq lc $key ) { |
|
336
|
0
|
0
|
|
|
|
0
|
print STDERR "exact match.\n" if $debug; |
|
337
|
0
|
|
|
|
|
0
|
return $pos + 1; |
|
338
|
|
|
|
|
|
|
} |
|
339
|
0
|
0
|
0
|
|
|
0
|
unless ( defined $found ) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
340
|
0
|
|
|
|
|
0
|
$found = $pos + 1; |
|
341
|
0
|
0
|
|
|
|
0
|
print STDERR qq( matches, value: "$data->[$found]"\n) |
|
342
|
|
|
|
|
|
|
if $debug; |
|
343
|
|
|
|
|
|
|
} |
|
344
|
|
|
|
|
|
|
elsif ( |
|
345
|
|
|
|
|
|
|
defined $data->[$found] |
|
346
|
|
|
|
|
|
|
? !defined $data->[ $pos + 1 ] |
|
347
|
|
|
|
|
|
|
|| $data->[ $pos + 1 ] ne $data->[$found] |
|
348
|
|
|
|
|
|
|
: defined $data->[ $pos + 1 ] |
|
349
|
|
|
|
|
|
|
) |
|
350
|
|
|
|
|
|
|
{ |
|
351
|
0
|
0
|
|
|
|
0
|
print STDERR |
|
352
|
|
|
|
|
|
|
qq( also matches, but has a different value: "$data->[$pos+1]"\n) |
|
353
|
|
|
|
|
|
|
if $debug; |
|
354
|
0
|
|
|
|
|
0
|
return; |
|
355
|
|
|
|
|
|
|
} |
|
356
|
|
|
|
|
|
|
} |
|
357
|
0
|
0
|
|
|
|
0
|
else { print STDERR "does not match.\n" if $debug } |
|
358
|
|
|
|
|
|
|
} while ( $pos += 3 ) < $#$data |
|
359
|
|
|
|
|
|
|
&& $prefix eq substr $data->[$pos], 0, length $prefix; |
|
360
|
0
|
0
|
|
|
|
0
|
print STDERR $pos > $#$data ? "Last element reached.\n" |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
: qq("$data->[$pos]" has a different prefix.\n), |
|
362
|
|
|
|
|
|
|
defined $found ? "Search was successful.\n" |
|
363
|
|
|
|
|
|
|
: "Search was NOT successful.\n" |
|
364
|
|
|
|
|
|
|
if $debug; |
|
365
|
0
|
|
|
|
|
0
|
$found; |
|
366
|
|
|
|
|
|
|
} |
|
367
|
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
=head2 normalize |
|
369
|
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
Given a key as the its only argument, |
|
371
|
|
|
|
|
|
|
this method will return the normalized key in scalar |
|
372
|
|
|
|
|
|
|
and a three element list in array context, consisting of |
|
373
|
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
=over 4 |
|
375
|
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
=item 0. |
|
377
|
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
the L |
|
379
|
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
=item 1. |
|
381
|
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
the L"search pattern"> and |
|
383
|
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
=item 2. |
|
385
|
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
the L"normalized key">. |
|
387
|
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
=back |
|
389
|
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
=cut |
|
391
|
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
sub normalize { |
|
393
|
0
|
|
|
0
|
1
|
0
|
my ( $self, $key ) = @_; |
|
394
|
0
|
|
|
|
|
0
|
my ( $exceptions, $stopwords ) = @{$self}[ EXCEPTIONS, STOPWORDS ]; |
|
|
0
|
|
|
|
|
0
|
|
|
395
|
0
|
|
|
|
|
0
|
local $_ = $key; |
|
396
|
0
|
|
|
|
|
0
|
for my $pp ( $self->preprocess ) { |
|
397
|
0
|
0
|
|
|
|
0
|
if ( ref $pp ) { &$pp } |
|
|
0
|
|
|
|
|
0
|
|
|
398
|
0
|
|
|
|
|
0
|
else { eval $pp } |
|
399
|
|
|
|
|
|
|
} |
|
400
|
|
|
|
|
|
|
( |
|
401
|
0
|
0
|
|
|
|
0
|
my $normkey = |
|
|
|
0
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
join ' ', |
|
403
|
|
|
|
|
|
|
map exists $exceptions->{ +lc } |
|
404
|
|
|
|
|
|
|
? defined $exceptions->{ +lc } ? $exceptions->{ +lc } : () |
|
405
|
|
|
|
|
|
|
: lc, |
|
406
|
|
|
|
|
|
|
grep !exists $stopwords->{$_}, |
|
407
|
|
|
|
|
|
|
split /\s+|-/ |
|
408
|
|
|
|
|
|
|
) =~ s/\W+(?=\s|-|$)//g; |
|
409
|
0
|
0
|
|
|
|
0
|
return $normkey unless wantarray; |
|
410
|
0
|
|
|
|
|
0
|
my ($prefix) = $normkey =~ /^([^\s-]*)/; |
|
411
|
0
|
|
|
|
|
0
|
my $pattern = '^' |
|
412
|
|
|
|
|
|
|
. join ( ' ', map quotemeta() . '\S*', split /\s+|-/, $normkey ) . '$'; |
|
413
|
0
|
0
|
|
|
|
0
|
$prefix, $] < 5.006 ? $pattern : eval 'qr/$pattern/', $normkey; |
|
414
|
|
|
|
|
|
|
} |
|
415
|
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
=head2 pos |
|
417
|
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
expects an (usually L) key as (its only) argument |
|
419
|
|
|
|
|
|
|
and returns the position at which this key is stored (if it exists) |
|
420
|
|
|
|
|
|
|
or should be sorted (if it does not already exist). |
|
421
|
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
=cut |
|
423
|
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
sub pos { |
|
425
|
0
|
|
|
0
|
1
|
0
|
my ( $self, $key ) = @_; |
|
426
|
0
|
|
|
|
|
0
|
my $data = $self->[DATA]; |
|
427
|
0
|
|
|
|
|
0
|
my $a = 0; |
|
428
|
0
|
|
|
|
|
0
|
my $b = @$data; |
|
429
|
0
|
|
0
|
|
|
0
|
while ( $a < $b && $a < $#$data ) { # perform a binary search |
|
430
|
0
|
0
|
|
|
|
0
|
if ( $data->[ my $c = 3 * int +( $a + $b >> 1 ) / 3 ] lt $key ) { |
|
431
|
0
|
|
|
|
|
0
|
$a = $c + 3; |
|
432
|
|
|
|
|
|
|
} |
|
433
|
0
|
|
|
|
|
0
|
else { $b = $c } |
|
434
|
|
|
|
|
|
|
} |
|
435
|
0
|
|
|
|
|
0
|
$a; |
|
436
|
|
|
|
|
|
|
} |
|
437
|
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
=head2 startover |
|
439
|
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
expects no arguments and simply resets the iterator for the hash, |
|
441
|
|
|
|
|
|
|
so that the next call to L will return the first key/value |
|
442
|
|
|
|
|
|
|
pair again. |
|
443
|
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
=cut |
|
445
|
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
sub startover { |
|
447
|
1
|
|
|
1
|
1
|
3
|
my ($self) = @_; |
|
448
|
1
|
|
|
|
|
99
|
$self->[I] = undef; |
|
449
|
|
|
|
|
|
|
} |
|
450
|
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
=head1 BUGS |
|
452
|
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
None known so far. |
|
454
|
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
=head1 AUTHOR |
|
456
|
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
Martin H. Sluka |
|
458
|
|
|
|
|
|
|
mailto:martin@sluka.de |
|
459
|
|
|
|
|
|
|
http://martin.sluka.de/ |
|
460
|
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=head1 THANKS TO |
|
462
|
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
Dr. Hermann Schier from the Max Planck Institute for Solid State Research |
|
464
|
|
|
|
|
|
|
in Stuttgart/Germany for initiating and underwriting the development of this |
|
465
|
|
|
|
|
|
|
module and for contribution a lot of ideas. |
|
466
|
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
=head1 COPYRIGHT |
|
468
|
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
This program is free software; you can redistribute |
|
470
|
|
|
|
|
|
|
it and/or modify it under the same terms as Perl itself. |
|
471
|
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
The full text of the license can be found in the |
|
473
|
|
|
|
|
|
|
LICENSE file included with this module. |
|
474
|
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
476
|
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
L |
|
478
|
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
=cut |
|
480
|
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
1 |