| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Plucene::Index::TermInfosWriter; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
Plucene::Index::TermInfosWriter - write to the term infos file |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
my $writer = Plucene::Index::TermInfosWriter->new( |
|
10
|
|
|
|
|
|
|
$dir_name, $segment, $field_infos); |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
$writer->add(Plucene::Index::Term $term, |
|
13
|
|
|
|
|
|
|
Plucene::Index::TermInfo $term_info); |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
$writer->write_term(Plucene::Index::Term $term); |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
This will allow for the writing and adding to a term infos file for a |
|
20
|
|
|
|
|
|
|
particular segment. It also writes the term infos index. |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 METHODS |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=cut |
|
25
|
|
|
|
|
|
|
|
|
26
|
19
|
|
|
19
|
|
1197
|
use strict; |
|
|
19
|
|
|
|
|
36
|
|
|
|
19
|
|
|
|
|
659
|
|
|
27
|
19
|
|
|
19
|
|
176
|
use warnings; |
|
|
19
|
|
|
|
|
39
|
|
|
|
19
|
|
|
|
|
670
|
|
|
28
|
|
|
|
|
|
|
|
|
29
|
19
|
|
|
19
|
|
99
|
use constant INDEX_INTERVAL => 128; |
|
|
19
|
|
|
|
|
36
|
|
|
|
19
|
|
|
|
|
1378
|
|
|
30
|
|
|
|
|
|
|
|
|
31
|
19
|
|
|
19
|
|
98
|
use Carp qw(confess carp); |
|
|
19
|
|
|
|
|
37
|
|
|
|
19
|
|
|
|
|
1112
|
|
|
32
|
|
|
|
|
|
|
|
|
33
|
19
|
|
|
19
|
|
10040
|
use Plucene::Store::OutputStream; |
|
|
19
|
|
|
|
|
50
|
|
|
|
19
|
|
|
|
|
791
|
|
|
34
|
19
|
|
|
19
|
|
15241
|
use Plucene::Index::Term; |
|
|
19
|
|
|
|
|
43
|
|
|
|
19
|
|
|
|
|
148
|
|
|
35
|
19
|
|
|
19
|
|
1297
|
use Plucene::Index::TermInfo; |
|
|
19
|
|
|
|
|
39
|
|
|
|
19
|
|
|
|
|
128
|
|
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=head2 new |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
my $writer = Plucene::Index::TermInfosWriter->new( |
|
40
|
|
|
|
|
|
|
$dir_name, $segment, $field_infos); |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
This will create a new Plucene::Index::TermInfosWriter object. |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
=cut |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
sub new { |
|
47
|
594
|
|
|
594
|
1
|
1538
|
my ($class, $d, $segment, $fis, $is_i) = @_; |
|
48
|
|
|
|
|
|
|
|
|
49
|
594
|
100
|
|
|
|
4495
|
my $self = bless { |
|
50
|
|
|
|
|
|
|
field_infos => $fis, |
|
51
|
|
|
|
|
|
|
is_index => $is_i, |
|
52
|
|
|
|
|
|
|
size => 0, |
|
53
|
|
|
|
|
|
|
last_term => Plucene::Index::Term->new({ field => "", text => "" }), |
|
54
|
|
|
|
|
|
|
last_ti => Plucene::Index::TermInfo->new, |
|
55
|
|
|
|
|
|
|
last_index_ptr => 0, |
|
56
|
|
|
|
|
|
|
output => Plucene::Store::OutputStream->new( |
|
57
|
|
|
|
|
|
|
"$d/$segment.ti" . ($is_i ? "i" : "s") |
|
58
|
|
|
|
|
|
|
), |
|
59
|
|
|
|
|
|
|
}, $class; |
|
60
|
594
|
50
|
|
|
|
4748
|
confess("No field_infos!") unless $self->{field_infos}; |
|
61
|
594
|
|
|
|
|
2997
|
$self->{output}->write_int(0); # Will be filled in when DESTROYed |
|
62
|
594
|
100
|
|
|
|
1572
|
if (!$is_i) { |
|
63
|
297
|
|
|
|
|
1901
|
$self->{other} = $class->new($d, $segment, $fis, 1); |
|
64
|
297
|
|
|
|
|
1116
|
$self->{other}->{other} = $self; # My enemy's enemy is my friend |
|
65
|
|
|
|
|
|
|
} |
|
66
|
594
|
|
|
|
|
2386
|
return $self; |
|
67
|
|
|
|
|
|
|
} |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=head2 break_ref |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
This will break a circular reference. |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=cut |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
# Damned circular references. |
|
76
|
297
|
|
|
297
|
1
|
155285
|
sub break_ref { undef shift->{other} } |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=head2 add |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
$writer->add(Plucene::Index::Term $term, |
|
81
|
|
|
|
|
|
|
Plucene::Index::TermInfo $term_info); |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
This will add the term and term info to the term infos file. |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=cut |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
sub add { |
|
88
|
46707
|
|
|
46707
|
1
|
342625
|
my ($self, $term, $ti) = @_; |
|
89
|
19
|
|
|
19
|
|
5050
|
no warnings 'uninitialized'; |
|
|
19
|
|
|
|
|
40
|
|
|
|
19
|
|
|
|
|
7457
|
|
|
90
|
46707
|
50
|
66
|
|
|
218126
|
carp sprintf "Can't add out-of-order term %s lt %s (%s lt %s)", $term->text, |
|
91
|
|
|
|
|
|
|
$self->{last_term}->text, $term->field, $self->{last_term}->{field} |
|
92
|
|
|
|
|
|
|
if !$self->{is_index} && $term->lt($self->{last_term}); |
|
93
|
46707
|
50
|
|
|
|
129938
|
carp "Frequency pointer out of order" |
|
94
|
|
|
|
|
|
|
if $ti->freq_pointer < $self->{last_ti}->freq_pointer; |
|
95
|
46707
|
50
|
|
|
|
416927
|
carp "Proximity pointer out of order" |
|
96
|
|
|
|
|
|
|
if $ti->prox_pointer < $self->{last_ti}->prox_pointer; |
|
97
|
|
|
|
|
|
|
|
|
98
|
46707
|
100
|
100
|
|
|
484002
|
$self->{other}->add($self->{last_term}, $self->{last_ti}) |
|
99
|
|
|
|
|
|
|
if !$self->{is_index} |
|
100
|
|
|
|
|
|
|
and (($self->{size} % INDEX_INTERVAL) == 0); |
|
101
|
|
|
|
|
|
|
|
|
102
|
46707
|
|
|
|
|
98660
|
$self->write_term($term); |
|
103
|
46707
|
|
|
|
|
162920
|
$self->{output}->write_vint($ti->doc_freq); |
|
104
|
46707
|
|
|
|
|
154744
|
$self->{output} |
|
105
|
|
|
|
|
|
|
->write_vlong($ti->freq_pointer - $self->{last_ti}->freq_pointer); |
|
106
|
46707
|
|
|
|
|
146039
|
$self->{output} |
|
107
|
|
|
|
|
|
|
->write_vlong($ti->prox_pointer - $self->{last_ti}->prox_pointer); |
|
108
|
|
|
|
|
|
|
|
|
109
|
46707
|
100
|
|
|
|
130309
|
if ($self->{is_index}) { # I bet Tony will think about subclassing |
|
110
|
|
|
|
|
|
|
# at this point |
|
111
|
635
|
|
|
|
|
4290
|
$self->{output}->write_vlong( |
|
112
|
|
|
|
|
|
|
$self->{other}->{output}->tell - $self->{last_index_pointer}); |
|
113
|
635
|
|
|
|
|
3085
|
$self->{last_index_pointer} = $self->{other}->{output}->tell; |
|
114
|
|
|
|
|
|
|
} |
|
115
|
|
|
|
|
|
|
|
|
116
|
46707
|
|
|
|
|
118648
|
$self->{last_ti} = $ti->clone; |
|
117
|
46707
|
|
|
|
|
221689
|
$self->{size}++; |
|
118
|
|
|
|
|
|
|
} |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=head2 write_term |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
$writer->write_term(Plucene::Index::Term $term); |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
This will write the term to the term infos file. |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=cut |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
sub write_term { |
|
129
|
46707
|
|
|
46707
|
1
|
69802
|
my ($self, $term) = @_; |
|
130
|
46707
|
|
100
|
|
|
121512
|
my $text = $term->text || ""; |
|
131
|
19
|
|
|
19
|
|
110
|
no warnings 'uninitialized'; |
|
|
19
|
|
|
|
|
43
|
|
|
|
19
|
|
|
|
|
3740
|
|
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# Find longest common prefix |
|
134
|
46707
|
|
|
|
|
328933
|
($text ^ $self->{last_term}->text) =~ /^(\0*)/; |
|
135
|
46707
|
|
|
|
|
347786
|
my $start = length $1; |
|
136
|
|
|
|
|
|
|
|
|
137
|
46707
|
|
|
|
|
150688
|
$self->{output}->write_vint($start); |
|
138
|
46707
|
|
|
|
|
179174
|
$self->{output}->write_string(substr($text, $start)); |
|
139
|
46707
|
|
|
|
|
176850
|
$self->{output} |
|
140
|
|
|
|
|
|
|
->write_vint($self->{field_infos}->field_number($term->field)); |
|
141
|
46707
|
|
|
|
|
129801
|
$self->{last_term} = $term; |
|
142
|
|
|
|
|
|
|
} |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
sub DESTROY { |
|
145
|
594
|
|
|
594
|
|
1257
|
my $self = shift; |
|
146
|
594
|
|
|
|
|
2719
|
$self->{output}->seek(0, 0); |
|
147
|
594
|
|
|
|
|
3599
|
$self->{output}->write_int($self->{size}); |
|
148
|
|
|
|
|
|
|
} |
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
1; |