line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package jsFind; |
2
|
|
|
|
|
|
|
|
3
|
7
|
|
|
7
|
|
182378
|
use 5.005; |
|
7
|
|
|
|
|
68
|
|
|
7
|
|
|
|
|
277
|
|
4
|
7
|
|
|
7
|
|
109
|
use strict; |
|
7
|
|
|
|
|
16
|
|
|
7
|
|
|
|
|
234
|
|
5
|
7
|
|
|
7
|
|
49
|
use warnings; |
|
7
|
|
|
|
|
18
|
|
|
7
|
|
|
|
|
1044
|
|
6
|
7
|
|
|
7
|
|
13935
|
use HTML::Entities; |
|
7
|
|
|
|
|
68942
|
|
|
7
|
|
|
|
|
979
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
our $VERSION = '0.07_01'; |
9
|
|
|
|
|
|
|
|
10
|
7
|
|
|
7
|
|
73
|
use Exporter; |
|
7
|
|
|
|
|
14
|
|
|
7
|
|
|
|
|
243
|
|
11
|
7
|
|
|
7
|
|
38
|
use Carp; |
|
7
|
|
|
|
|
13
|
|
|
7
|
|
|
|
|
1001
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
BEGIN { |
16
|
7
|
|
|
7
|
|
13841
|
Exporter::import 'jsFind::Node'; |
17
|
|
|
|
|
|
|
} |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 NAME |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
jsFind - generate index for full text search engine in JavaScript |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
=head1 SYNOPSIS |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
use jsFind; |
26
|
|
|
|
|
|
|
my $t = new jsFind(B => 4); |
27
|
|
|
|
|
|
|
my $f = 1; |
28
|
|
|
|
|
|
|
foreach my $k (qw{minima ut dolorem sapiente voluptatem}) { |
29
|
|
|
|
|
|
|
$t->B_search(Key => $k, |
30
|
|
|
|
|
|
|
Data => { |
31
|
|
|
|
|
|
|
"path" => { |
32
|
|
|
|
|
|
|
t => "word $k", |
33
|
|
|
|
|
|
|
f => $f }, |
34
|
|
|
|
|
|
|
}, |
35
|
|
|
|
|
|
|
Insert => 1, |
36
|
|
|
|
|
|
|
Append => 1, |
37
|
|
|
|
|
|
|
); |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head1 DESCRIPTION |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
This module can be used to create index files for jsFind, powerful tool for |
43
|
|
|
|
|
|
|
adding a search engine to a CDROM archive or catalog without requiring the |
44
|
|
|
|
|
|
|
user to install anything. |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
Main difference between this module and scripts delivered with jsFind are: |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=over 5 |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=item * |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
You don't need to use swish-e to create index |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
=item * |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
you can programatically (and incrementaly) create index for jsFind |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
=item * |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
you can create more than one index and search them using same C |
61
|
|
|
|
|
|
|
page |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=back |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
You can also examine examples which come as tests with this module, |
66
|
|
|
|
|
|
|
for example C or C. |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
=head2 jsFind |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
jsFind search engine was written by Shawn Garbett from eLucid Software. |
71
|
|
|
|
|
|
|
The search engine itself is a small piece of JavaScript (1.2 with level 2 |
72
|
|
|
|
|
|
|
DOM). It is easily customizable to fit into a current set of HTML. This |
73
|
|
|
|
|
|
|
JavaScript searches an XML index dataset for the appropriate links, and can |
74
|
|
|
|
|
|
|
filter and sort the results. |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
JavaScript code distributed with this module is based on version 0.0.3 which |
77
|
|
|
|
|
|
|
was current when this module development started. Various changes where done |
78
|
|
|
|
|
|
|
on JavaScript code to fix bugs, add features and remove warnings. For |
79
|
|
|
|
|
|
|
complete list see C file which comes with distribution. |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
This module has been tested using C with following browsers: |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=over 5 |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=item Mozilla FireFox 0.8 to 1.0 |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
using DOM 2 C |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
=item Internet Explorer 5.5 and 6.0 |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
using ActiveX C or C |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=item Konqueror 3.3 |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
using DOM 2 C |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
=item Opera 7.54 (without Java) |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
using experimental iframe implementation which is much slower than other methods. |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=back |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
If searching doesn't work for your combination of operating system and |
104
|
|
|
|
|
|
|
browser, please open C file and wait a while. It will search sample |
105
|
|
|
|
|
|
|
file included with distribution and report results. Reports with included |
106
|
|
|
|
|
|
|
test debugging are welcomed. |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=head1 jsFind methods |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
C is mode implementing methods which you, the user, are going to |
111
|
|
|
|
|
|
|
use to create indexes. |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=head2 new |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
Create new tree. Arguments are C which is maximum numbers of keys in |
116
|
|
|
|
|
|
|
each node and optional C node. Each root node may have child nodes. |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
All nodes are objects from C. |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
my $t = new jsFind(B => 4); |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=cut |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
my $DEBUG = 1; |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
sub new { |
127
|
4
|
|
|
4
|
1
|
50
|
my $package = shift; |
128
|
4
|
|
|
|
|
18
|
my %ARGV = @_; |
129
|
4
|
50
|
|
|
|
21
|
croak "Usage: {$package}::new(B => number [, Root => root node ])" |
130
|
|
|
|
|
|
|
unless exists $ARGV{B}; |
131
|
4
|
50
|
|
|
|
19
|
if ($ARGV{B} % 2) { |
132
|
0
|
|
|
|
|
0
|
my $B = $ARGV{B} + 1; |
133
|
0
|
|
|
|
|
0
|
carp "B must be an even number. Using $B instead."; |
134
|
0
|
|
|
|
|
0
|
$ARGV{B} = $B; |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
|
137
|
4
|
|
|
|
|
11
|
my $B = $ARGV{B}; |
138
|
4
|
50
|
|
|
|
43
|
my $Root = exists($ARGV{Root}) ? $ARGV{Root} : jsFind::Node->emptynode; |
139
|
4
|
|
|
|
|
28
|
bless { B => $B, Root => $Root } => $package; |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
=head2 B_search |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
Search, insert, append or replace data in B-Tree |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
$t->B_search( |
147
|
|
|
|
|
|
|
Key => 'key value', |
148
|
|
|
|
|
|
|
Data => { "path" => { |
149
|
|
|
|
|
|
|
"t" => "title of document", |
150
|
|
|
|
|
|
|
"f" => 99, |
151
|
|
|
|
|
|
|
}, |
152
|
|
|
|
|
|
|
}, |
153
|
|
|
|
|
|
|
Insert => 1, |
154
|
|
|
|
|
|
|
Append => 1, |
155
|
|
|
|
|
|
|
); |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
Semantics: |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
If key not found, insert it iff C argument is present. |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
If key B found, replace existing data iff C argument |
162
|
|
|
|
|
|
|
is present or add new datum to existing iff C argument is present. |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=cut |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
sub B_search { |
167
|
5927
|
|
|
5927
|
1
|
146132
|
my $self = shift; |
168
|
5927
|
|
|
|
|
22692
|
my %args = @_; |
169
|
5927
|
|
|
|
|
11714
|
my $cur_node = $self->root; |
170
|
5927
|
|
|
|
|
14139
|
my $k = $args{Key}; |
171
|
5927
|
|
|
|
|
7280
|
my $d = $args{Data}; |
172
|
5927
|
|
|
|
|
6038
|
my @path; |
173
|
|
|
|
|
|
|
|
174
|
5927
|
100
|
|
|
|
10785
|
if ($cur_node->is_empty) { # Special case for empty root |
175
|
3
|
50
|
|
|
|
12
|
if ($args{Insert}) { |
176
|
3
|
|
|
|
|
15
|
$cur_node->kdp_insert($k => $d); |
177
|
3
|
|
|
|
|
10
|
return $d; |
178
|
|
|
|
|
|
|
} else { |
179
|
0
|
|
|
|
|
0
|
return undef; |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
# Descend tree to leaf |
184
|
5924
|
|
|
|
|
8189
|
for (;;) { |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
# Didn't hit bottom yet. |
187
|
|
|
|
|
|
|
|
188
|
11524
|
|
|
|
|
23396
|
my($there, $where) = $cur_node->locate_key($k); |
189
|
11524
|
100
|
|
|
|
21073
|
if ($there) { # Found it! |
190
|
3871
|
100
|
|
|
|
13320
|
if ($args{Replace}) { |
|
|
50
|
|
|
|
|
|
191
|
1
|
|
|
|
|
9
|
$cur_node->kdp_replace($where, $k => $d); |
192
|
|
|
|
|
|
|
} elsif ($args{Append}) { |
193
|
3870
|
|
|
|
|
8564
|
$cur_node->kdp_append($where, $k => $d); |
194
|
|
|
|
|
|
|
} |
195
|
3871
|
|
|
|
|
11403
|
return $cur_node->data($where); |
196
|
|
|
|
|
|
|
} |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
# Not here---must be in a subtree. |
199
|
|
|
|
|
|
|
|
200
|
7653
|
100
|
|
|
|
13657
|
if ($cur_node->is_leaf) { # But there are no subtrees |
201
|
2053
|
50
|
|
|
|
4918
|
return undef unless $args{Insert}; # Search failed |
202
|
|
|
|
|
|
|
# Stuff it in |
203
|
2053
|
|
|
|
|
4924
|
$cur_node->kdp_insert($k => $d); |
204
|
2053
|
100
|
|
|
|
5493
|
if ($self->node_overfull($cur_node)) { # Oops--there was no room. |
205
|
40
|
|
|
|
|
139
|
$self->split_and_promote($cur_node, @path); |
206
|
|
|
|
|
|
|
} |
207
|
2053
|
|
|
|
|
11736
|
return $d; |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
# There are subtrees, and the key is in one of them. |
211
|
|
|
|
|
|
|
|
212
|
5600
|
|
|
|
|
13941
|
push @path, [$cur_node, $where]; # Record path from root. |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
# Move down to search the subtree |
215
|
5600
|
|
|
|
|
26244
|
$cur_node = $cur_node->subnode($where); |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
# and start over. |
218
|
|
|
|
|
|
|
} # for (;;) ... |
219
|
|
|
|
|
|
|
|
220
|
0
|
|
|
|
|
0
|
croak ("How did I get here?"); |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
sub split_and_promote_old { |
226
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
227
|
0
|
|
|
|
|
0
|
my ($cur_node, @path) = @_; |
228
|
|
|
|
|
|
|
|
229
|
0
|
|
|
|
|
0
|
for (;;) { |
230
|
0
|
|
|
|
|
0
|
my ($newleft, $newright, $kdp) = $cur_node->halves($self->B / 2); |
231
|
0
|
|
|
|
|
0
|
my ($up, $where) = @{pop @path}; |
|
0
|
|
|
|
|
0
|
|
232
|
0
|
0
|
|
|
|
0
|
if ($up) { |
233
|
0
|
|
|
|
|
0
|
$up->kdp_insert(@$kdp); |
234
|
0
|
|
|
|
|
0
|
my ($tthere, $twhere) = $up->locate_key($kdp->[0]); |
235
|
0
|
0
|
|
|
|
0
|
croak "Couldn't find key `$kdp->[0]' in node after just inserting it!" |
236
|
|
|
|
|
|
|
unless $tthere; |
237
|
0
|
0
|
|
|
|
0
|
croak "`$kdp->[0]' went into node at `$twhere' instead of expected `$where'!" |
238
|
|
|
|
|
|
|
unless $twhere == $where; |
239
|
0
|
|
|
|
|
0
|
$up->subnode($where, $newleft); |
240
|
0
|
|
|
|
|
0
|
$up->subnode($where+1, $newright); |
241
|
0
|
0
|
|
|
|
0
|
return unless $self->node_overfull($up); |
242
|
0
|
|
|
|
|
0
|
$cur_node = $up; |
243
|
|
|
|
|
|
|
} else { # We're at the top; make a new root. |
244
|
0
|
|
|
|
|
0
|
my $newroot = new jsFind::Node ([$kdp->[0]], |
245
|
|
|
|
|
|
|
[$kdp->[1]], |
246
|
|
|
|
|
|
|
[$newleft, $newright]); |
247
|
0
|
|
|
|
|
0
|
$self->root($newroot); |
248
|
0
|
|
|
|
|
0
|
return; |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
} |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
} |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
sub split_and_promote { |
255
|
40
|
|
|
40
|
0
|
64
|
my $self = shift; |
256
|
40
|
|
|
|
|
79
|
my ($cur_node, @path) = @_; |
257
|
|
|
|
|
|
|
|
258
|
40
|
|
|
|
|
72
|
for (;;) { |
259
|
47
|
|
|
|
|
114
|
my ($newleft, $newright, $kdp) = $cur_node->halves($self->B / 2); |
260
|
47
|
100
|
|
|
|
151
|
my ($up, $where) = @{pop @path} if (@path); |
|
42
|
|
|
|
|
74
|
|
261
|
47
|
100
|
|
|
|
114
|
if ($up) { |
262
|
42
|
|
|
|
|
100
|
$up->kdp_insert(@$kdp); |
263
|
42
|
50
|
|
|
|
97
|
if ($DEBUG) { |
264
|
42
|
|
|
|
|
120
|
my ($tthere, $twhere) = $up->locate_key($kdp->[0]); |
265
|
42
|
50
|
|
|
|
981
|
croak "Couldn't find key `$kdp->[0]' in node after just inserting it!" |
266
|
|
|
|
|
|
|
unless $tthere; |
267
|
42
|
50
|
|
|
|
95
|
croak "`$kdp->[0]' went into node at `$twhere' instead of expected `$where'!" |
268
|
|
|
|
|
|
|
unless $twhere == $where; |
269
|
|
|
|
|
|
|
} |
270
|
42
|
|
|
|
|
96
|
$up->subnode($where, $newleft); |
271
|
42
|
|
|
|
|
96
|
$up->subnode($where+1, $newright); |
272
|
42
|
100
|
|
|
|
83
|
return unless $self->node_overfull($up); |
273
|
7
|
|
|
|
|
19
|
$cur_node = $up; |
274
|
|
|
|
|
|
|
} else { # We're at the top; make a new root. |
275
|
5
|
|
|
|
|
32
|
my $newroot = new jsFind::Node([$kdp->[0]], |
276
|
|
|
|
|
|
|
[$kdp->[1]], |
277
|
|
|
|
|
|
|
[$newleft, $newright]); |
278
|
5
|
|
|
|
|
16
|
$self->root($newroot); |
279
|
5
|
|
|
|
|
17
|
return; |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
=head2 B |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
Return B (maximum number of keys) |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
my $max_size = $t->B; |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=cut |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
sub B { |
293
|
2143
|
|
|
2143
|
1
|
8918
|
$_[0]{B}; |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
=head2 root |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
Returns root node |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
my $root = $t->root; |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
=cut |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
sub root { |
305
|
5941
|
|
|
5941
|
1
|
8376
|
my ($self, $newroot) = @_; |
306
|
5941
|
100
|
|
|
|
11148
|
$self->{Root} = $newroot if defined $newroot; |
307
|
5941
|
|
|
|
|
12458
|
$self->{Root}; |
308
|
|
|
|
|
|
|
} |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
=head2 node_overfull |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
Returns if node is overfull |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
if ($node->node_overfull) { something } |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
=cut |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
sub node_overfull { |
319
|
2095
|
|
|
2095
|
1
|
2404
|
my $self = shift; |
320
|
2095
|
|
|
|
|
1990
|
my $node = shift; |
321
|
2095
|
|
|
|
|
3951
|
$node->size > $self->B; |
322
|
|
|
|
|
|
|
} |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
=head2 to_string |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
Returns your tree as formatted string. |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
my $text = $root->to_string; |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
Mostly usefull for debugging as output leaves much to be desired. |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
=cut |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
sub to_string { |
335
|
3
|
|
|
3
|
1
|
103377
|
$_[0]->root->to_string; |
336
|
|
|
|
|
|
|
} |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
=head2 to_dot |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
Create Graphviz graph of your tree |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
my $dot_graph = $root->to_dot; |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
=cut |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
sub to_dot { |
347
|
1
|
|
|
1
|
1
|
22261
|
my $self = shift; |
348
|
|
|
|
|
|
|
|
349
|
1
|
|
|
|
|
69
|
my $dot = qq/digraph dns {\nrankdir=LR;\n/; |
350
|
1
|
|
|
|
|
8
|
$dot .= $self->root->to_dot; |
351
|
1
|
|
|
|
|
2
|
$dot .= qq/\n}\n/; |
352
|
|
|
|
|
|
|
|
353
|
1
|
|
|
|
|
16
|
return $dot; |
354
|
|
|
|
|
|
|
} |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
=head2 to_jsfind |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
Create xml index files for jsFind. This should be called after |
359
|
|
|
|
|
|
|
your B-Tree has been filled with data. |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
$root->to_jsfind( |
362
|
|
|
|
|
|
|
dir => '/full/path/to/index/dir/', |
363
|
|
|
|
|
|
|
data_codepage => 'ISO-8859-2', |
364
|
|
|
|
|
|
|
index_codepage => 'UTF-8', |
365
|
|
|
|
|
|
|
output_filter => sub { |
366
|
|
|
|
|
|
|
my $t = shift || return; |
367
|
|
|
|
|
|
|
$t =~ s/è/e/; |
368
|
|
|
|
|
|
|
} |
369
|
|
|
|
|
|
|
); |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
All options except C are optional. |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
Returns number of nodes in created tree. |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
Options: |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
=over 4 |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
=item dir |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
Full path to directory for index (which will be created if needed). |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
=item data_codepage |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
If your imput data isn't in C encoding, you will have to specify |
386
|
|
|
|
|
|
|
this option. |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
=item index_codepage |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
If your index encoding is not C use this option. |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
If you are not using supplied JavaScript search code, or your browser is |
393
|
|
|
|
|
|
|
terribly broken and thinks that index shouldn't be in UTF-8 encoding, use |
394
|
|
|
|
|
|
|
this option to specify encoding for created XML index. |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
=item output_filter |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
B |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
Code ref to sub which can do modifications on resulting XML file for node. |
401
|
|
|
|
|
|
|
Encoding of this data will be in L and you have to take care |
402
|
|
|
|
|
|
|
not to break XML structure. Calling L on your result index |
403
|
|
|
|
|
|
|
(like C does in this distribution) is a good idea after using |
404
|
|
|
|
|
|
|
this option. |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
This option is also right place to plug in unaccenting function using |
407
|
|
|
|
|
|
|
L. |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
=back |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
=cut |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
my $iconv; |
414
|
|
|
|
|
|
|
my $iconv_l1; |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
sub to_jsfind { |
417
|
4
|
|
|
4
|
1
|
5946
|
my $self = shift; |
418
|
|
|
|
|
|
|
|
419
|
4
|
|
|
|
|
45
|
my %arg = @_; |
420
|
|
|
|
|
|
|
|
421
|
4
|
50
|
|
|
|
62
|
confess "to_jsfind need path to your index directory !" unless ($arg{'dir'}); |
422
|
|
|
|
|
|
|
|
423
|
4
|
|
|
|
|
15
|
my $data_codepage = $arg{'data_codepage'}; |
424
|
4
|
|
100
|
|
|
31
|
my $index_codepage = $arg{'index_codepage'} || 'UTF-8'; |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
# create ISO-8859-1 iconv for HTML::Entities decode |
427
|
4
|
|
|
|
|
1030
|
$iconv_l1 = Text::Iconv->new('ISO-8859-1',$index_codepage); |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
# create another iconv for data |
430
|
4
|
100
|
66
|
|
|
53
|
if ($data_codepage && $index_codepage) { |
431
|
2
|
|
|
|
|
206
|
$iconv = Text::Iconv->new($data_codepage,$index_codepage); |
432
|
|
|
|
|
|
|
} |
433
|
|
|
|
|
|
|
|
434
|
4
|
|
|
|
|
23
|
return $self->root->to_jsfind($arg{'dir'},"0"); |
435
|
|
|
|
|
|
|
} |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
# private, default cmd function |
439
|
|
|
|
|
|
|
sub default_cmp { |
440
|
609161
|
|
|
609161
|
0
|
914753
|
$_[0] cmp $_[1]; |
441
|
|
|
|
|
|
|
} |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
=head2 _recode |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
This is internal function to recode charset. |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
It will also try to decode entities in data using L. |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
=cut |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
sub _recode { |
452
|
13239
|
|
|
13239
|
|
16937
|
my $self = shift; |
453
|
13239
|
|
50
|
|
|
24668
|
my $text = shift || return; |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
sub _decode_html_entities { |
456
|
36
|
|
50
|
36
|
|
133
|
my $data = shift || return; |
457
|
36
|
|
33
|
|
|
355
|
$data = $iconv_l1->convert(decode_entities($data)) || croak "entity decode problem: $data"; |
458
|
|
|
|
|
|
|
} |
459
|
|
|
|
|
|
|
|
460
|
13239
|
100
|
|
|
|
23652
|
if ($iconv) { |
461
|
66
|
|
33
|
|
|
379
|
$text = $iconv->convert($text) || $text && carp "convert problem: $text"; |
462
|
66
|
|
|
|
|
234
|
$text =~ s/(\&\w+;)/_decode_html_entities($1)/ges; |
|
36
|
|
|
|
|
60
|
|
463
|
|
|
|
|
|
|
} |
464
|
|
|
|
|
|
|
|
465
|
13239
|
|
|
|
|
31521
|
return $text; |
466
|
|
|
|
|
|
|
} |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
##################################################################### |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
=head1 jsFind::Node methods |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
Each node has C key-data pairs, with C <= C <= C<2B>, and |
473
|
|
|
|
|
|
|
each has C subnodes, which might be null. |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
The node is a blessed reference to a list with three elements: |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
($keylist, $datalist, $subnodelist) |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
each is a reference to a list list. |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
The null node is represented by a blessed reference to an empty list. |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
=cut |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
package jsFind::Node; |
486
|
|
|
|
|
|
|
|
487
|
7
|
|
|
7
|
|
55
|
use warnings; |
|
7
|
|
|
|
|
14
|
|
|
7
|
|
|
|
|
465
|
|
488
|
7
|
|
|
7
|
|
78
|
use strict; |
|
7
|
|
|
|
|
14
|
|
|
7
|
|
|
|
|
299
|
|
489
|
|
|
|
|
|
|
|
490
|
7
|
|
|
7
|
|
41
|
use Carp; |
|
7
|
|
|
|
|
21
|
|
|
7
|
|
|
|
|
600
|
|
491
|
7
|
|
|
7
|
|
46
|
use File::Path; |
|
7
|
|
|
|
|
11
|
|
|
7
|
|
|
|
|
477
|
|
492
|
7
|
|
|
7
|
|
8655
|
use Text::Iconv; |
|
7
|
|
|
|
|
28337
|
|
|
7
|
|
|
|
|
457
|
|
493
|
7
|
|
|
7
|
|
8537
|
use POSIX; |
|
7
|
|
|
|
|
99467
|
|
|
7
|
|
|
|
|
99
|
|
494
|
|
|
|
|
|
|
|
495
|
7
|
|
|
7
|
|
31313
|
use base 'jsFind'; |
|
7
|
|
|
|
|
15
|
|
|
7
|
|
|
|
|
21191
|
|
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
my $KEYS = 0; |
498
|
|
|
|
|
|
|
my $DATA = 1; |
499
|
|
|
|
|
|
|
my $SUBNODES = 2; |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
=head2 new |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
Create New node |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
my $node = new jsFind::Node ($keylist, $datalist, $subnodelist); |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
You can also mit argument list to create empty node. |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
my $empty_node = new jsFind::Node; |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
=cut |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
sub new { |
514
|
103
|
|
|
103
|
|
144
|
my $self = shift; |
515
|
103
|
|
66
|
|
|
395
|
my $package = ref $self || $self; |
516
|
103
|
50
|
66
|
|
|
314
|
croak "Internal error: jsFind::Node::new called with wrong number of arguments." |
517
|
|
|
|
|
|
|
unless @_ == 3 || @_ == 0; |
518
|
103
|
|
|
|
|
553
|
bless [@_] => $package; |
519
|
|
|
|
|
|
|
} |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
=head2 locate_key |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
Locate key in node using linear search. This should probably be replaced |
524
|
|
|
|
|
|
|
by binary search for better performance. |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
my ($found, $index) = $node->locate_key($key, $cmp_coderef); |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
Argument C<$cmp_coderef> is optional reference to custom comparison |
529
|
|
|
|
|
|
|
operator. |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
Returns (1, $index) if $key[$index] eq $key. |
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
Returns (0, $index) if key could be found in $subnode[$index]. |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
In scalar context, just returns 1 or 0. |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
=cut |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
sub locate_key { |
540
|
|
|
|
|
|
|
# Use linear search for testing, replace with binary search. |
541
|
13661
|
|
|
13661
|
|
15531
|
my $self = shift; |
542
|
13661
|
|
|
|
|
16483
|
my $key = shift; |
543
|
13661
|
|
50
|
|
|
52791
|
my $cmp = shift || \&jsFind::default_cmp; |
544
|
13661
|
|
|
|
|
14358
|
my $i; |
545
|
|
|
|
|
|
|
my $cmp_result; |
546
|
13661
|
|
|
|
|
38936
|
my $N = $self->size; |
547
|
13661
|
|
|
|
|
31495
|
for ($i = 0; $i < $N; $i++) { |
548
|
609161
|
|
|
|
|
1038504
|
$cmp_result = &$cmp($key, $self->key($i)); |
549
|
609161
|
100
|
|
|
|
1692253
|
last if $cmp_result <= 0; |
550
|
|
|
|
|
|
|
} |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
# $i is now the index of the first node-key greater than $key |
553
|
|
|
|
|
|
|
# or $N if there is no such. $cmp_result is 0 iff the key was found. |
554
|
13661
|
|
|
|
|
38709
|
(!$cmp_result, $i); |
555
|
|
|
|
|
|
|
} |
556
|
|
|
|
|
|
|
|
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
=head2 emptynode |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
Creates new empty node |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
$node = $root->emptynode; |
563
|
|
|
|
|
|
|
$new_node = $node->emptynode; |
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
=cut |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
sub emptynode { |
568
|
4
|
|
|
4
|
|
20
|
new($_[0]); # Pass package name, but not anything else. |
569
|
|
|
|
|
|
|
} |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
=head2 is_empty |
572
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
Test if node is empty |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
if ($node->is_empty) { something } |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
=cut |
578
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
# undef is empty; so is a blessed empty list. |
580
|
|
|
|
|
|
|
sub is_empty { |
581
|
8173
|
|
|
8173
|
|
12584
|
my $self = shift; |
582
|
8173
|
50
|
|
|
|
49320
|
!defined($self) || $#$self < 0; |
583
|
|
|
|
|
|
|
} |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
=head2 key |
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
Return C<$i>th key from node |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
my $key = $node->key($i); |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
=cut |
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
sub key { |
594
|
|
|
|
|
|
|
# my ($self, $n) = @_; |
595
|
|
|
|
|
|
|
# $self->[$KEYS][$n]; |
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
# speedup |
598
|
609161
|
|
|
609161
|
|
1584860
|
$_[0]->[$KEYS][$_[1]]; |
599
|
|
|
|
|
|
|
} |
600
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
=head2 data |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
Return C<$i>th data from node |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
my $data = $node->data($i); |
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
=cut |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
sub data { |
610
|
3871
|
|
|
3871
|
|
5065
|
my ($self, $n) = @_; |
611
|
3871
|
|
|
|
|
18055
|
$self->[$DATA][$n]; |
612
|
|
|
|
|
|
|
} |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
=head2 kdp_replace |
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
Set key data pair for C<$i>th element in node |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
$node->kdp_replace($i, "key value" => { |
619
|
|
|
|
|
|
|
"data key 1" => "data value 1", |
620
|
|
|
|
|
|
|
"data key 2" => "data value 2", |
621
|
|
|
|
|
|
|
}; |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
=cut |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
sub kdp_replace { |
626
|
1
|
|
|
1
|
|
3
|
my ($self, $n, $k => $d) = @_; |
627
|
1
|
50
|
|
|
|
5
|
if (defined $k) { |
628
|
1
|
|
|
|
|
2
|
$self->[$KEYS][$n] = $k; |
629
|
1
|
|
|
|
|
4
|
$self->[$DATA][$n] = $d; |
630
|
|
|
|
|
|
|
} |
631
|
1
|
|
|
|
|
9
|
[$self->[$KEYS][$n], |
632
|
|
|
|
|
|
|
$self->[$DATA][$n]]; |
633
|
|
|
|
|
|
|
} |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
=head2 kdp_insert |
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
Insert key/data pair in tree |
638
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
$node->kdp_insert("key value" => "data value"); |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
No return value. |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
=cut |
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
sub kdp_insert { |
646
|
2098
|
|
|
2098
|
|
3249
|
my $self = shift; |
647
|
2098
|
|
|
|
|
2948
|
my ($k => $d) = @_; |
648
|
2098
|
100
|
|
|
|
3944
|
my ($there, $where) = $self->locate_key($k) unless $self->is_empty; |
649
|
|
|
|
|
|
|
|
650
|
2098
|
50
|
|
|
|
5082
|
if ($there) { croak("Tried to insert `$k => $d' into node where `$k' was already present."); } |
|
0
|
|
|
|
|
0
|
|
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
# undef fix |
653
|
2098
|
|
100
|
|
|
4507
|
$where ||= 0; |
654
|
|
|
|
|
|
|
|
655
|
2098
|
|
|
|
|
2409
|
splice(@{$self->[$KEYS]}, $where, 0, $k); |
|
2098
|
|
|
|
|
6853
|
|
656
|
2098
|
|
|
|
|
2723
|
splice(@{$self->[$DATA]}, $where, 0, $d); |
|
2098
|
|
|
|
|
4958
|
|
657
|
2098
|
|
|
|
|
2237
|
splice(@{$self->[$SUBNODES]}, $where, 0, undef); |
|
2098
|
|
|
|
|
8305
|
|
658
|
|
|
|
|
|
|
} |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
=head2 kdp_append |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
Adds new data keys and values to C<$i>th element in node |
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
$node->kdp_append($i, "key value" => { |
665
|
|
|
|
|
|
|
"added data key" => "added data value", |
666
|
|
|
|
|
|
|
}; |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
=cut |
669
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
sub kdp_append { |
671
|
3870
|
|
|
3870
|
|
6500
|
my ($self, $n, $k => $d) = @_; |
672
|
3870
|
50
|
|
|
|
7292
|
if (defined $k) { |
673
|
3870
|
|
|
|
|
6067
|
$self->[$KEYS][$n] = $k; |
674
|
3870
|
|
|
|
|
3739
|
my ($kv,$dv) = %{$d}; |
|
3870
|
|
|
|
|
11270
|
|
675
|
3870
|
|
|
|
|
14680
|
$self->[$DATA][$n]->{$kv} = $dv; |
676
|
|
|
|
|
|
|
} |
677
|
3870
|
|
|
|
|
13396
|
[$self->[$KEYS][$n], |
678
|
|
|
|
|
|
|
$self->[$DATA][$n]]; |
679
|
|
|
|
|
|
|
} |
680
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
=head2 subnode |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
Set new or return existing subnode |
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
# return 4th subnode |
686
|
|
|
|
|
|
|
my $my_node = $node->subnode(4); |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
# create new subnode 5 from $my_node |
689
|
|
|
|
|
|
|
$node->subnode(5, $my_node); |
690
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
=cut |
692
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
sub subnode { |
694
|
5684
|
|
|
5684
|
|
8540
|
my ($self, $n, $newnode) = @_; |
695
|
5684
|
100
|
|
|
|
11403
|
$self->[$SUBNODES][$n] = $newnode if defined $newnode; |
696
|
5684
|
|
|
|
|
13753
|
$self->[$SUBNODES][$n]; |
697
|
|
|
|
|
|
|
} |
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
=head2 is_leaf |
700
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
Test if node is leaf |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
if ($node->is_leaf) { something } |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
=cut |
706
|
|
|
|
|
|
|
|
707
|
|
|
|
|
|
|
sub is_leaf { |
708
|
7686
|
|
|
7686
|
|
9346
|
my $self = shift; |
709
|
7686
|
|
|
|
|
23254
|
! defined $self->[$SUBNODES][0]; # undefined subnode means leaf node. |
710
|
|
|
|
|
|
|
} |
711
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
=head2 size |
713
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
Return number of keys in the node |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
my $nr = $node->size; |
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
=cut |
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
sub size { |
721
|
15951
|
|
|
15951
|
|
25329
|
my $self = shift; |
722
|
15951
|
|
|
|
|
14915
|
return scalar(@{$self->[$KEYS]}); |
|
15951
|
|
|
|
|
41253
|
|
723
|
|
|
|
|
|
|
} |
724
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
=head2 halves |
726
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
Split node into two halves so that keys C<0 .. $n-1> are in one node |
728
|
|
|
|
|
|
|
and keys C<$n+1 ... $size> are in the other. |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
my ($left_node, $right_node, $kdp) = $node->halves($n); |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
=cut |
733
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
sub halves { |
735
|
47
|
|
|
47
|
|
76
|
my $self = shift; |
736
|
47
|
|
|
|
|
63
|
my $n = shift; |
737
|
47
|
|
|
|
|
96
|
my $s = $self->size; |
738
|
47
|
|
|
|
|
64
|
my @right; |
739
|
|
|
|
|
|
|
my @left; |
740
|
|
|
|
|
|
|
|
741
|
47
|
|
|
|
|
310
|
$left[$KEYS] = [@{$self->[$KEYS]}[0 .. $n-1]]; |
|
47
|
|
|
|
|
665
|
|
742
|
47
|
|
|
|
|
182
|
$left[$DATA] = [@{$self->[$DATA]}[0 .. $n-1]]; |
|
47
|
|
|
|
|
613
|
|
743
|
47
|
|
|
|
|
176
|
$left[$SUBNODES] = [@{$self->[$SUBNODES]}[0 .. $n]]; |
|
47
|
|
|
|
|
268
|
|
744
|
|
|
|
|
|
|
|
745
|
47
|
|
|
|
|
201
|
$right[$KEYS] = [@{$self->[$KEYS]}[$n+1 .. $s-1]]; |
|
47
|
|
|
|
|
730
|
|
746
|
47
|
|
|
|
|
182
|
$right[$DATA] = [@{$self->[$DATA]}[$n+1 .. $s-1]]; |
|
47
|
|
|
|
|
551
|
|
747
|
47
|
|
|
|
|
173
|
$right[$SUBNODES] = [@{$self->[$SUBNODES]}[$n+1 .. $s]]; |
|
47
|
|
|
|
|
318
|
|
748
|
|
|
|
|
|
|
|
749
|
47
|
|
|
|
|
209
|
my @middle = ($self->[$KEYS][$n], $self->[$DATA][$n]); |
750
|
|
|
|
|
|
|
|
751
|
47
|
|
|
|
|
159
|
($self->new(@left), $self->new(@right), \@middle); |
752
|
|
|
|
|
|
|
} |
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
=head2 to_string |
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
Dumps tree as string |
757
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
my $str = $root->to_string; |
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
=cut |
761
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
sub to_string { |
763
|
55
|
|
|
55
|
|
97
|
my $self = shift; |
764
|
55
|
|
100
|
|
|
245
|
my $indent = shift || 0; |
765
|
55
|
|
|
|
|
275
|
my $I = ' ' x $indent; |
766
|
55
|
50
|
|
|
|
136
|
return '' if $self->is_empty; |
767
|
55
|
|
|
|
|
150
|
my ($k, $d, $s) = @$self; |
768
|
55
|
|
|
|
|
79
|
my $result = ''; |
769
|
55
|
100
|
|
|
|
322
|
$result .= defined($s->[0]) ? $s->[0]->to_string($indent+2) : ''; |
770
|
55
|
|
|
|
|
144
|
my $N = $self->size; |
771
|
55
|
|
|
|
|
89
|
my $i; |
772
|
55
|
|
|
|
|
322
|
for ($i = 0; $i < $N; $i++) { |
773
|
|
|
|
|
|
|
# $result .= $I . "$k->[$i] => $d->[$i]\n"; |
774
|
2056
|
|
|
|
|
3552
|
$result .= $I . "$k->[$i]\n"; |
775
|
2056
|
100
|
|
|
|
5550
|
$result .= defined($s->[$i+1]) ? $s->[$i+1]->to_string($indent+2) : ''; |
776
|
|
|
|
|
|
|
} |
777
|
55
|
|
|
|
|
445
|
$result; |
778
|
|
|
|
|
|
|
} |
779
|
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
=begin comment |
781
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
use Data::Dumper; |
783
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
sub to_string { |
785
|
|
|
|
|
|
|
my $self = shift; |
786
|
|
|
|
|
|
|
my $indent = shift || 0; |
787
|
|
|
|
|
|
|
my $path = shift || '0'; |
788
|
|
|
|
|
|
|
return '' if $self->is_empty; |
789
|
|
|
|
|
|
|
my ($k, $d, $s) = @$self; |
790
|
|
|
|
|
|
|
my $result = ''; |
791
|
|
|
|
|
|
|
$result .= defined($s->[0]) ? $s->[0]->to_string($indent+1,"$path/0") : ''; |
792
|
|
|
|
|
|
|
my $N = $self->size; |
793
|
|
|
|
|
|
|
for (my $i = 0; $i < $N; $i++) { |
794
|
|
|
|
|
|
|
my $dump = Dumper($d->[$i]); |
795
|
|
|
|
|
|
|
$dump =~ s/[\n\r\s]+/ /gs; |
796
|
|
|
|
|
|
|
$dump =~ s/\$VAR1\s*=\s*//; |
797
|
|
|
|
|
|
|
$result .= sprintf("%-5s [%2d] %2s: %s => %s\n", $path, $i, $indent, $k->[$i], $dump); |
798
|
|
|
|
|
|
|
$result .= defined($s->[$i+1]) ? $s->[$i+1]->to_string($indent+1,"$path/$i") : ''; |
799
|
|
|
|
|
|
|
} |
800
|
|
|
|
|
|
|
$result; |
801
|
|
|
|
|
|
|
} |
802
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
=end comment |
804
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
=head2 to_dot |
806
|
|
|
|
|
|
|
|
807
|
|
|
|
|
|
|
Recursivly walk nodes of tree |
808
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
=cut |
810
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
sub to_dot { |
812
|
33
|
|
|
33
|
|
41
|
my $self = shift; |
813
|
33
|
|
|
|
|
33
|
my $parent = shift; |
814
|
|
|
|
|
|
|
|
815
|
33
|
50
|
|
|
|
54
|
return '' if $self->is_empty; |
816
|
|
|
|
|
|
|
|
817
|
33
|
|
|
|
|
47
|
my $dot = ''; |
818
|
|
|
|
|
|
|
|
819
|
33
|
|
|
|
|
58
|
my ($k, $d, $s) = @$self; |
820
|
33
|
|
|
|
|
60
|
my $N = $self->size; |
821
|
|
|
|
|
|
|
|
822
|
33
|
|
|
|
|
36
|
my @dot_keys; |
823
|
|
|
|
|
|
|
|
824
|
33
|
|
100
|
|
|
72
|
my $node_name = $parent || '_'; |
825
|
33
|
|
|
|
|
145
|
$node_name =~ s/\W+//g; |
826
|
33
|
|
|
|
|
60
|
$node_name .= " [$N]"; |
827
|
|
|
|
|
|
|
|
828
|
33
|
|
|
|
|
76
|
for (my $i = 0; $i <= $N; $i++) { |
829
|
118
|
100
|
|
|
|
218
|
if (my $key = $k->[$i]) { |
830
|
85
|
|
|
|
|
156
|
push @dot_keys, qq{<$i>$key}; |
831
|
|
|
|
|
|
|
} |
832
|
118
|
100
|
|
|
|
1189
|
$dot .= $s->[$i]->to_dot(qq{"$node_name":$i}) if ($s->[$i]); |
833
|
|
|
|
|
|
|
} |
834
|
33
|
100
|
|
|
|
74
|
push @dot_keys, qq{<$N>...} if (! $self->is_leaf); |
835
|
|
|
|
|
|
|
|
836
|
33
|
|
|
|
|
69
|
my $label = join("|",@dot_keys); |
837
|
33
|
|
|
|
|
78
|
$dot .= qq{"$node_name" [ shape=record, label="$label" ];\n}; |
838
|
|
|
|
|
|
|
|
839
|
33
|
100
|
|
|
|
82
|
$dot .= qq{$parent -> "$node_name";\n} if ($parent); |
840
|
|
|
|
|
|
|
|
841
|
33
|
|
|
|
|
162
|
$dot; |
842
|
|
|
|
|
|
|
} |
843
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
=head2 to_xml |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
Escape <, >, & and ", and to produce valid XML |
847
|
|
|
|
|
|
|
|
848
|
|
|
|
|
|
|
=cut |
849
|
|
|
|
|
|
|
|
850
|
|
|
|
|
|
|
my %escape = ('<'=>'<', '>'=>'>', '&'=>'&', '"'=>'"'); |
851
|
|
|
|
|
|
|
my $escape_re = join '|' => keys %escape; |
852
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
sub to_xml { |
854
|
13239
|
|
33
|
13239
|
|
30409
|
my $self = shift || confess "you should call to_xml as object!"; |
855
|
|
|
|
|
|
|
|
856
|
13239
|
|
50
|
|
|
35474
|
my $d = shift || return; |
857
|
13239
|
|
|
|
|
41825
|
$d = $self->SUPER::_recode($d); |
858
|
13239
|
50
|
|
|
|
29986
|
confess "escape_re undefined!" unless ($escape_re); |
859
|
13239
|
|
|
|
|
62557
|
$d =~ s/($escape_re)/$escape{$1}/g; |
860
|
13239
|
|
|
|
|
35555
|
return $d; |
861
|
|
|
|
|
|
|
} |
862
|
|
|
|
|
|
|
|
863
|
|
|
|
|
|
|
=head2 base_x |
864
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
Convert number to base x (used for jsFind index filenames). |
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
my $n = $tree->base_x(50); |
868
|
|
|
|
|
|
|
|
869
|
|
|
|
|
|
|
=cut |
870
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
sub base_x { |
872
|
1356
|
|
|
1356
|
|
5625
|
my $self = shift; |
873
|
|
|
|
|
|
|
|
874
|
1356
|
|
|
|
|
1695
|
my $value = shift; |
875
|
|
|
|
|
|
|
|
876
|
1356
|
50
|
33
|
|
|
7362
|
confess("need non-negative number") if (! defined($value) || $value < 0); |
877
|
|
|
|
|
|
|
|
878
|
1356
|
|
|
|
|
13159
|
my @digits = qw( |
879
|
|
|
|
|
|
|
0 1 2 3 4 5 6 7 8 9 |
880
|
|
|
|
|
|
|
a b c d e f g h i j k l m n o p q r s t u v w x y z |
881
|
|
|
|
|
|
|
); |
882
|
|
|
|
|
|
|
|
883
|
1356
|
|
|
|
|
2846
|
my $base = scalar(@digits); |
884
|
1356
|
|
|
|
|
1759
|
my $out = ""; |
885
|
1356
|
|
|
|
|
3070
|
my $pow = 1; |
886
|
1356
|
|
|
|
|
1542
|
my $pos = 0; |
887
|
|
|
|
|
|
|
|
888
|
|
|
|
|
|
|
|
889
|
1356
|
100
|
|
|
|
3012
|
if($value == 0) { |
890
|
18
|
|
|
|
|
82
|
return "0"; |
891
|
|
|
|
|
|
|
} |
892
|
|
|
|
|
|
|
|
893
|
1338
|
|
|
|
|
3246
|
while($value > 0) { |
894
|
2598
|
|
|
|
|
3500
|
$pos = $value % $base; |
895
|
2598
|
|
|
|
|
4472
|
$out = $digits[$pos] . $out; |
896
|
2598
|
|
|
|
|
7916
|
$value = floor($value/$base); |
897
|
2598
|
|
|
|
|
7380
|
$pow *= $base; |
898
|
|
|
|
|
|
|
} |
899
|
|
|
|
|
|
|
|
900
|
1338
|
|
|
|
|
15344
|
return $out; |
901
|
|
|
|
|
|
|
} |
902
|
|
|
|
|
|
|
|
903
|
|
|
|
|
|
|
=head2 to_jsfind |
904
|
|
|
|
|
|
|
|
905
|
|
|
|
|
|
|
Create jsFind xml files |
906
|
|
|
|
|
|
|
|
907
|
|
|
|
|
|
|
my $nr=$tree->to_jsfind('/path/to/index','0'); |
908
|
|
|
|
|
|
|
|
909
|
|
|
|
|
|
|
Returns number of elements created |
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
=cut |
912
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
sub to_jsfind { |
914
|
60
|
|
|
60
|
|
100
|
my $self = shift; |
915
|
60
|
|
|
|
|
686
|
my ($path,$file) = @_; |
916
|
|
|
|
|
|
|
|
917
|
60
|
50
|
|
|
|
193
|
return 0 if $self->is_empty; |
918
|
|
|
|
|
|
|
|
919
|
60
|
50
|
|
|
|
177
|
confess("path is undefined.") unless ($path); |
920
|
60
|
50
|
|
|
|
166
|
confess("file is undefined. Did you call \$t->root->to_jsfind(..) instead of \$t->to_jsfind(..) ?") unless (defined($file)); |
921
|
|
|
|
|
|
|
|
922
|
60
|
|
|
|
|
234
|
$file = $self->base_x($file); |
923
|
|
|
|
|
|
|
|
924
|
60
|
|
|
|
|
94
|
my $nr_keys = 0; |
925
|
|
|
|
|
|
|
|
926
|
60
|
|
|
|
|
138
|
my ($k, $d, $s) = @$self; |
927
|
60
|
|
|
|
|
176
|
my $N = $self->size; |
928
|
|
|
|
|
|
|
|
929
|
60
|
|
|
|
|
118
|
my ($key_xml, $data_xml) = ("",""); |
930
|
|
|
|
|
|
|
|
931
|
60
|
|
|
|
|
564
|
for (my $i = 0; $i <= $N; $i++) { |
932
|
2127
|
|
|
|
|
9231
|
my $key = lc($k->[$i]); |
933
|
|
|
|
|
|
|
|
934
|
2127
|
100
|
|
|
|
4194
|
if ($key) { |
935
|
2067
|
|
|
|
|
4117
|
$key_xml .= ''.$self->to_xml($key).''; |
936
|
2067
|
|
|
|
|
3312
|
$data_xml .= ''; |
937
|
|
|
|
|
|
|
#use Data::Dumper; |
938
|
|
|
|
|
|
|
#print Dumper($d->[$i]); |
939
|
2067
|
|
|
|
|
2382
|
foreach my $path (keys %{$d->[$i]}) { |
|
2067
|
|
|
|
|
10423
|
|
940
|
5586
|
|
100
|
|
|
38516
|
$data_xml .= ''.$self->to_xml($path).''; |
|
|
|
50
|
|
|
|
|
941
|
5586
|
|
|
|
|
10536
|
$nr_keys++; |
942
|
|
|
|
|
|
|
} |
943
|
2067
|
|
|
|
|
4126
|
$data_xml .= ''; |
944
|
|
|
|
|
|
|
} |
945
|
|
|
|
|
|
|
|
946
|
2127
|
100
|
|
|
|
21364
|
$nr_keys += $s->[$i]->to_jsfind("$path/$file","$i") if ($s->[$i]); |
947
|
|
|
|
|
|
|
} |
948
|
|
|
|
|
|
|
|
949
|
60
|
|
|
|
|
97
|
$key_xml .= ''; |
950
|
60
|
|
|
|
|
88
|
$data_xml .= ''; |
951
|
|
|
|
|
|
|
|
952
|
60
|
50
|
|
|
|
1905
|
if (! -e $path) { |
953
|
0
|
0
|
|
|
|
0
|
mkpath($path) || croak "can't create dir '$path': $!"; |
954
|
|
|
|
|
|
|
} |
955
|
|
|
|
|
|
|
|
956
|
60
|
50
|
|
|
|
11124
|
open(K, "> ${path}/${file}.xml") || croak "can't open '$path/$file.xml': $!"; |
957
|
60
|
50
|
|
|
|
17906
|
open(D, "> ${path}/_${file}.xml") || croak "can't open '$path/_$file.xml': $!"; |
958
|
|
|
|
|
|
|
|
959
|
60
|
|
|
|
|
1004
|
print K $key_xml; |
960
|
60
|
|
|
|
|
1506
|
print D $data_xml; |
961
|
|
|
|
|
|
|
|
962
|
60
|
|
|
|
|
2212
|
close(K); |
963
|
60
|
|
|
|
|
2089
|
close(D); |
964
|
|
|
|
|
|
|
|
965
|
60
|
|
|
|
|
709
|
return $nr_keys; |
966
|
|
|
|
|
|
|
} |
967
|
|
|
|
|
|
|
|
968
|
|
|
|
|
|
|
1; |
969
|
|
|
|
|
|
|
__END__ |