line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package KinoSearch1::Index::PostingsWriter; |
2
|
34
|
|
|
34
|
|
192
|
use strict; |
|
34
|
|
|
|
|
83
|
|
|
34
|
|
|
|
|
1235
|
|
3
|
34
|
|
|
34
|
|
244
|
use warnings; |
|
34
|
|
|
|
|
66
|
|
|
34
|
|
|
|
|
892
|
|
4
|
34
|
|
|
34
|
|
187
|
use KinoSearch1::Util::ToolSet; |
|
34
|
|
|
|
|
90
|
|
|
34
|
|
|
|
|
5088
|
|
5
|
34
|
|
|
34
|
|
203
|
use base qw( KinoSearch1::Util::Class ); |
|
34
|
|
|
|
|
71
|
|
|
34
|
|
|
|
|
3359
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
BEGIN { |
8
|
34
|
|
|
34
|
|
401
|
__PACKAGE__->init_instance_vars( |
9
|
|
|
|
|
|
|
#constructor params / members |
10
|
|
|
|
|
|
|
invindex => undef, |
11
|
|
|
|
|
|
|
seg_name => undef, |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# members |
14
|
|
|
|
|
|
|
sort_pool => undef, |
15
|
|
|
|
|
|
|
); |
16
|
|
|
|
|
|
|
} |
17
|
|
|
|
|
|
|
|
18
|
34
|
|
|
34
|
|
264
|
use KinoSearch1::Index::TermInfo; |
|
34
|
|
|
|
|
72
|
|
|
34
|
|
|
|
|
913
|
|
19
|
34
|
|
|
34
|
|
21079
|
use KinoSearch1::Index::TermInfosWriter; |
|
34
|
|
|
|
|
102
|
|
|
34
|
|
|
|
|
851
|
|
20
|
34
|
|
|
34
|
|
23744
|
use KinoSearch1::Util::SortExternal; |
|
34
|
|
|
|
|
118
|
|
|
34
|
|
|
|
|
14502
|
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
sub init_instance { |
23
|
62
|
|
|
62
|
1
|
158
|
my $self = shift; |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# create a SortExternal object which autosorts the posting list cache |
26
|
62
|
|
|
|
|
834
|
$self->{sort_pool} = KinoSearch1::Util::SortExternal->new( |
27
|
|
|
|
|
|
|
invindex => $self->{invindex}, |
28
|
|
|
|
|
|
|
seg_name => $self->{seg_name}, |
29
|
|
|
|
|
|
|
); |
30
|
|
|
|
|
|
|
} |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
# Add all the postings in an inverted document to the sort pool. |
33
|
|
|
|
|
|
|
sub add_postings { |
34
|
15376
|
|
|
15376
|
0
|
21682
|
my ( $self, $postings_array ) = @_; |
35
|
15376
|
|
|
|
|
139165
|
$self->{sort_pool}->feed(@$postings_array); |
36
|
|
|
|
|
|
|
} |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# Bulk add all the postings in a segment to the sort pool. |
39
|
|
|
|
|
|
|
sub add_segment { |
40
|
16
|
|
|
16
|
0
|
41
|
my ( $self, $seg_reader, $doc_map ) = @_; |
41
|
16
|
|
|
|
|
71
|
my $term_enum = $seg_reader->terms; |
42
|
16
|
|
|
|
|
67
|
my $term_docs = $seg_reader->term_docs; |
43
|
16
|
|
|
|
|
52
|
$term_docs->set_read_positions(1); |
44
|
16
|
|
|
|
|
169134
|
_add_segment( $self->{sort_pool}, $term_enum, $term_docs, $doc_map ); |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=for comment |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
Process all the postings in the sort pool. Generate the freqs and positions |
50
|
|
|
|
|
|
|
files. Hand off data to TermInfosWriter for the generating the term |
51
|
|
|
|
|
|
|
dictionaries. |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=cut |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
sub write_postings { |
56
|
62
|
|
|
62
|
0
|
140
|
my $self = shift; |
57
|
62
|
|
|
|
|
126
|
my ( $invindex, $seg_name ) = @{$self}{ 'invindex', 'seg_name' }; |
|
62
|
|
|
|
|
193
|
|
58
|
|
|
|
|
|
|
|
59
|
62
|
|
|
|
|
478
|
$self->{sort_pool}->sort_all; |
60
|
|
|
|
|
|
|
|
61
|
62
|
|
|
|
|
703
|
my $tinfos_writer = KinoSearch1::Index::TermInfosWriter->new( |
62
|
|
|
|
|
|
|
invindex => $invindex, |
63
|
|
|
|
|
|
|
seg_name => $seg_name, |
64
|
|
|
|
|
|
|
); |
65
|
62
|
|
|
|
|
171
|
my $frq_file = "$seg_name.frq"; |
66
|
62
|
|
|
|
|
140
|
my $prx_file = "$seg_name.prx"; |
67
|
62
|
|
|
|
|
160
|
for ( $frq_file, $prx_file ) { |
68
|
124
|
50
|
|
|
|
395
|
$invindex->delete_file($_) if $invindex->file_exists($_); |
69
|
|
|
|
|
|
|
} |
70
|
62
|
|
|
|
|
231
|
my $frq_out = $invindex->open_outstream($frq_file); |
71
|
62
|
|
|
|
|
239
|
my $prx_out = $invindex->open_outstream($prx_file); |
72
|
|
|
|
|
|
|
|
73
|
62
|
|
|
|
|
34529
|
_write_postings( $self->{sort_pool}, $tinfos_writer, $frq_out, $prx_out ); |
74
|
|
|
|
|
|
|
|
75
|
62
|
|
|
|
|
259
|
$frq_out->close; |
76
|
62
|
|
|
|
|
239
|
$prx_out->close; |
77
|
62
|
|
|
|
|
769
|
$tinfos_writer->finish; |
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
sub finish { |
81
|
62
|
|
|
62
|
0
|
131
|
my $self = shift; |
82
|
62
|
|
|
|
|
312
|
$self->{sort_pool}->close; |
83
|
|
|
|
|
|
|
} |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
1; |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
__END__ |