line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Plucene::Index::TermInfosWriter; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
Plucene::Index::TermInfosWriter - write to the term infos file |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
my $writer = Plucene::Index::TermInfosWriter->new( |
10
|
|
|
|
|
|
|
$dir_name, $segment, $field_infos); |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
$writer->add(Plucene::Index::Term $term, |
13
|
|
|
|
|
|
|
Plucene::Index::TermInfo $term_info); |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
$writer->write_term(Plucene::Index::Term $term); |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 DESCRIPTION |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
This will allow for the writing and adding to a term infos file for a |
20
|
|
|
|
|
|
|
particular segment. It also writes the term infos index. |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 METHODS |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=cut |
25
|
|
|
|
|
|
|
|
26
|
19
|
|
|
19
|
|
1197
|
use strict; |
|
19
|
|
|
|
|
36
|
|
|
19
|
|
|
|
|
659
|
|
27
|
19
|
|
|
19
|
|
176
|
use warnings; |
|
19
|
|
|
|
|
39
|
|
|
19
|
|
|
|
|
670
|
|
28
|
|
|
|
|
|
|
|
29
|
19
|
|
|
19
|
|
99
|
use constant INDEX_INTERVAL => 128; |
|
19
|
|
|
|
|
36
|
|
|
19
|
|
|
|
|
1378
|
|
30
|
|
|
|
|
|
|
|
31
|
19
|
|
|
19
|
|
98
|
use Carp qw(confess carp); |
|
19
|
|
|
|
|
37
|
|
|
19
|
|
|
|
|
1112
|
|
32
|
|
|
|
|
|
|
|
33
|
19
|
|
|
19
|
|
10040
|
use Plucene::Store::OutputStream; |
|
19
|
|
|
|
|
50
|
|
|
19
|
|
|
|
|
791
|
|
34
|
19
|
|
|
19
|
|
15241
|
use Plucene::Index::Term; |
|
19
|
|
|
|
|
43
|
|
|
19
|
|
|
|
|
148
|
|
35
|
19
|
|
|
19
|
|
1297
|
use Plucene::Index::TermInfo; |
|
19
|
|
|
|
|
39
|
|
|
19
|
|
|
|
|
128
|
|
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=head2 new |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
my $writer = Plucene::Index::TermInfosWriter->new( |
40
|
|
|
|
|
|
|
$dir_name, $segment, $field_infos); |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
This will create a new Plucene::Index::TermInfosWriter object. |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
=cut |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
sub new { |
47
|
594
|
|
|
594
|
1
|
1538
|
my ($class, $d, $segment, $fis, $is_i) = @_; |
48
|
|
|
|
|
|
|
|
49
|
594
|
100
|
|
|
|
4495
|
my $self = bless { |
50
|
|
|
|
|
|
|
field_infos => $fis, |
51
|
|
|
|
|
|
|
is_index => $is_i, |
52
|
|
|
|
|
|
|
size => 0, |
53
|
|
|
|
|
|
|
last_term => Plucene::Index::Term->new({ field => "", text => "" }), |
54
|
|
|
|
|
|
|
last_ti => Plucene::Index::TermInfo->new, |
55
|
|
|
|
|
|
|
last_index_ptr => 0, |
56
|
|
|
|
|
|
|
output => Plucene::Store::OutputStream->new( |
57
|
|
|
|
|
|
|
"$d/$segment.ti" . ($is_i ? "i" : "s") |
58
|
|
|
|
|
|
|
), |
59
|
|
|
|
|
|
|
}, $class; |
60
|
594
|
50
|
|
|
|
4748
|
confess("No field_infos!") unless $self->{field_infos}; |
61
|
594
|
|
|
|
|
2997
|
$self->{output}->write_int(0); # Will be filled in when DESTROYed |
62
|
594
|
100
|
|
|
|
1572
|
if (!$is_i) { |
63
|
297
|
|
|
|
|
1901
|
$self->{other} = $class->new($d, $segment, $fis, 1); |
64
|
297
|
|
|
|
|
1116
|
$self->{other}->{other} = $self; # My enemy's enemy is my friend |
65
|
|
|
|
|
|
|
} |
66
|
594
|
|
|
|
|
2386
|
return $self; |
67
|
|
|
|
|
|
|
} |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=head2 break_ref |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
This will break a circular reference. |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=cut |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
# Damned circular references. |
76
|
297
|
|
|
297
|
1
|
155285
|
sub break_ref { undef shift->{other} } |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=head2 add |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
$writer->add(Plucene::Index::Term $term, |
81
|
|
|
|
|
|
|
Plucene::Index::TermInfo $term_info); |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
This will add the term and term info to the term infos file. |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=cut |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
sub add { |
88
|
46707
|
|
|
46707
|
1
|
342625
|
my ($self, $term, $ti) = @_; |
89
|
19
|
|
|
19
|
|
5050
|
no warnings 'uninitialized'; |
|
19
|
|
|
|
|
40
|
|
|
19
|
|
|
|
|
7457
|
|
90
|
46707
|
50
|
66
|
|
|
218126
|
carp sprintf "Can't add out-of-order term %s lt %s (%s lt %s)", $term->text, |
91
|
|
|
|
|
|
|
$self->{last_term}->text, $term->field, $self->{last_term}->{field} |
92
|
|
|
|
|
|
|
if !$self->{is_index} && $term->lt($self->{last_term}); |
93
|
46707
|
50
|
|
|
|
129938
|
carp "Frequency pointer out of order" |
94
|
|
|
|
|
|
|
if $ti->freq_pointer < $self->{last_ti}->freq_pointer; |
95
|
46707
|
50
|
|
|
|
416927
|
carp "Proximity pointer out of order" |
96
|
|
|
|
|
|
|
if $ti->prox_pointer < $self->{last_ti}->prox_pointer; |
97
|
|
|
|
|
|
|
|
98
|
46707
|
100
|
100
|
|
|
484002
|
$self->{other}->add($self->{last_term}, $self->{last_ti}) |
99
|
|
|
|
|
|
|
if !$self->{is_index} |
100
|
|
|
|
|
|
|
and (($self->{size} % INDEX_INTERVAL) == 0); |
101
|
|
|
|
|
|
|
|
102
|
46707
|
|
|
|
|
98660
|
$self->write_term($term); |
103
|
46707
|
|
|
|
|
162920
|
$self->{output}->write_vint($ti->doc_freq); |
104
|
46707
|
|
|
|
|
154744
|
$self->{output} |
105
|
|
|
|
|
|
|
->write_vlong($ti->freq_pointer - $self->{last_ti}->freq_pointer); |
106
|
46707
|
|
|
|
|
146039
|
$self->{output} |
107
|
|
|
|
|
|
|
->write_vlong($ti->prox_pointer - $self->{last_ti}->prox_pointer); |
108
|
|
|
|
|
|
|
|
109
|
46707
|
100
|
|
|
|
130309
|
if ($self->{is_index}) { # I bet Tony will think about subclassing |
110
|
|
|
|
|
|
|
# at this point |
111
|
635
|
|
|
|
|
4290
|
$self->{output}->write_vlong( |
112
|
|
|
|
|
|
|
$self->{other}->{output}->tell - $self->{last_index_pointer}); |
113
|
635
|
|
|
|
|
3085
|
$self->{last_index_pointer} = $self->{other}->{output}->tell; |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
46707
|
|
|
|
|
118648
|
$self->{last_ti} = $ti->clone; |
117
|
46707
|
|
|
|
|
221689
|
$self->{size}++; |
118
|
|
|
|
|
|
|
} |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=head2 write_term |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
$writer->write_term(Plucene::Index::Term $term); |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
This will write the term to the term infos file. |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=cut |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
sub write_term { |
129
|
46707
|
|
|
46707
|
1
|
69802
|
my ($self, $term) = @_; |
130
|
46707
|
|
100
|
|
|
121512
|
my $text = $term->text || ""; |
131
|
19
|
|
|
19
|
|
110
|
no warnings 'uninitialized'; |
|
19
|
|
|
|
|
43
|
|
|
19
|
|
|
|
|
3740
|
|
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# Find longest common prefix |
134
|
46707
|
|
|
|
|
328933
|
($text ^ $self->{last_term}->text) =~ /^(\0*)/; |
135
|
46707
|
|
|
|
|
347786
|
my $start = length $1; |
136
|
|
|
|
|
|
|
|
137
|
46707
|
|
|
|
|
150688
|
$self->{output}->write_vint($start); |
138
|
46707
|
|
|
|
|
179174
|
$self->{output}->write_string(substr($text, $start)); |
139
|
46707
|
|
|
|
|
176850
|
$self->{output} |
140
|
|
|
|
|
|
|
->write_vint($self->{field_infos}->field_number($term->field)); |
141
|
46707
|
|
|
|
|
129801
|
$self->{last_term} = $term; |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
sub DESTROY { |
145
|
594
|
|
|
594
|
|
1257
|
my $self = shift; |
146
|
594
|
|
|
|
|
2719
|
$self->{output}->seek(0, 0); |
147
|
594
|
|
|
|
|
3599
|
$self->{output}->write_int($self->{size}); |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
1; |