line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package KinoSearch1::Index::TermInfosReader; |
2
|
34
|
|
|
34
|
|
31965
|
use strict; |
|
34
|
|
|
|
|
73
|
|
|
34
|
|
|
|
|
1134
|
|
3
|
34
|
|
|
34
|
|
181
|
use warnings; |
|
34
|
|
|
|
|
75
|
|
|
34
|
|
|
|
|
808
|
|
4
|
34
|
|
|
34
|
|
781
|
use KinoSearch1::Util::ToolSet; |
|
34
|
|
|
|
|
68
|
|
|
34
|
|
|
|
|
5007
|
|
5
|
34
|
|
|
34
|
|
203
|
use base qw( KinoSearch1::Util::Class ); |
|
34
|
|
|
|
|
69
|
|
|
34
|
|
|
|
|
3949
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
BEGIN { |
8
|
34
|
|
|
34
|
|
354
|
__PACKAGE__->init_instance_vars( |
9
|
|
|
|
|
|
|
# constructor params / members |
10
|
|
|
|
|
|
|
invindex => undef, |
11
|
|
|
|
|
|
|
seg_name => undef, |
12
|
|
|
|
|
|
|
finfos => undef, |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
# members |
15
|
|
|
|
|
|
|
orig_enum => undef, |
16
|
|
|
|
|
|
|
index_enum => undef, |
17
|
|
|
|
|
|
|
); |
18
|
|
|
|
|
|
|
} |
19
|
|
|
|
|
|
|
|
20
|
34
|
|
|
34
|
|
21525
|
use KinoSearch1::Index::SegTermEnum; |
|
34
|
|
|
|
|
166
|
|
|
34
|
|
|
|
|
17685
|
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
sub init_instance { |
23
|
95
|
|
|
95
|
1
|
175
|
my $self = shift; |
24
|
95
|
|
|
|
|
290
|
my $invindex = $self->{invindex}; |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# prepare a main Enum which can access all terms |
27
|
95
|
|
|
|
|
490
|
$self->{orig_enum} = KinoSearch1::Index::SegTermEnum->new( |
28
|
|
|
|
|
|
|
finfos => $self->{finfos}, |
29
|
|
|
|
|
|
|
instream => $invindex->open_instream("$self->{seg_name}.tis"), |
30
|
|
|
|
|
|
|
); |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
# load an index Enum into memory which can point to places in main |
33
|
95
|
|
|
|
|
550
|
$self->{index_enum} = KinoSearch1::Index::SegTermEnum->new( |
34
|
|
|
|
|
|
|
finfos => $self->{finfos}, |
35
|
|
|
|
|
|
|
instream => $invindex->open_instream("$self->{seg_name}.tii"), |
36
|
|
|
|
|
|
|
is_index => 1, |
37
|
|
|
|
|
|
|
); |
38
|
95
|
|
|
|
|
958
|
$self->{index_enum}->fill_cache; |
39
|
|
|
|
|
|
|
} |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# Return a SegTermEnum, pre-located at the right spot if a Term is supplied. |
42
|
|
|
|
|
|
|
sub terms { |
43
|
17
|
|
|
17
|
0
|
31
|
my ( $self, $term ) = @_; |
44
|
17
|
100
|
|
|
|
46
|
if ( defined $term ) { |
45
|
1
|
|
|
|
|
4
|
$self->fetch_term_info($term); |
46
|
|
|
|
|
|
|
} |
47
|
|
|
|
|
|
|
else { |
48
|
16
|
|
|
|
|
120
|
$self->{orig_enum}->reset; |
49
|
|
|
|
|
|
|
} |
50
|
17
|
|
|
|
|
90
|
return $self->{orig_enum}->clone_enum; |
51
|
|
|
|
|
|
|
} |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
# Given a Term, return a TermInfo if the Term is present in the segment, or |
54
|
|
|
|
|
|
|
# undef if it's not. |
55
|
|
|
|
|
|
|
sub fetch_term_info { |
56
|
2068
|
|
|
2068
|
0
|
2775
|
my ( $self, $term ) = @_; |
57
|
2068
|
|
|
|
|
6100
|
my $termstring = $term->get_termstring( $self->{finfos} ); |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# termstring will be undefined if field doesn't exist |
60
|
2068
|
100
|
|
|
|
5044
|
return unless defined $termstring; |
61
|
|
|
|
|
|
|
|
62
|
2022
|
|
|
|
|
4268
|
$self->_seek_enum($termstring); |
63
|
|
|
|
|
|
|
|
64
|
2022
|
|
|
|
|
7954
|
return $self->_scan_enum($termstring); |
65
|
|
|
|
|
|
|
} |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
# Locate the main Enum as close as possible to where the term might be found. |
68
|
|
|
|
|
|
|
sub _seek_enum { |
69
|
2022
|
|
|
2022
|
|
2736
|
my ( $self, $termstring ) = @_; |
70
|
2022
|
|
|
|
|
3174
|
my $index_enum = $self->{index_enum}; |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
# get the approximate possible location of the term in the main Enum |
73
|
2022
|
|
|
|
|
8059
|
my $tii_position = $index_enum->scan_cache($termstring); |
74
|
2022
|
|
|
|
|
6850
|
my $ballpark_termstring = $index_enum->get_termstring; |
75
|
2022
|
|
|
|
|
9337
|
my $ballpark_tinfo = $index_enum->get_term_info; |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# point the main Enum just before the term |
78
|
2022
|
|
|
|
|
16492
|
$self->{orig_enum}->seek( |
79
|
|
|
|
|
|
|
$ballpark_tinfo->get_index_fileptr, |
80
|
|
|
|
|
|
|
( ( $tii_position * $self->{orig_enum}->get_index_interval ) - 1 ), |
81
|
|
|
|
|
|
|
$ballpark_termstring, |
82
|
|
|
|
|
|
|
$ballpark_tinfo, |
83
|
|
|
|
|
|
|
); |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
# One-by-one targeted iteration through TermEnum. |
87
|
|
|
|
|
|
|
sub _scan_enum { |
88
|
2022
|
|
|
2022
|
|
3205
|
my ( $self, $target_termstring ) = @_; |
89
|
2022
|
|
|
|
|
2878
|
my $orig_enum = $self->{orig_enum}; |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
# iterate through the Enum until the result is ge the term |
92
|
2022
|
|
|
|
|
30038
|
$orig_enum->scan_to($target_termstring); |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
# if the stopping point matches the target, return info; otherwise, undef |
95
|
2022
|
|
|
|
|
7084
|
my $found_termstring = $orig_enum->get_termstring; |
96
|
2022
|
100
|
100
|
|
|
9512
|
if ( defined $found_termstring |
97
|
|
|
|
|
|
|
and $found_termstring eq $target_termstring ) |
98
|
|
|
|
|
|
|
{ |
99
|
1825
|
|
|
|
|
11471
|
return $orig_enum->get_term_info; |
100
|
|
|
|
|
|
|
} |
101
|
197
|
|
|
|
|
698
|
return; |
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
sub get_skip_interval { |
105
|
654
|
|
|
654
|
0
|
4380
|
shift->{orig_enum}->get_skip_interval; |
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
sub close { |
109
|
39
|
|
|
39
|
0
|
61
|
my $self = shift; |
110
|
39
|
|
|
|
|
163
|
$self->{orig_enum}->close; |
111
|
39
|
|
|
|
|
139
|
$self->{index_enum}->close; |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
1; |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
__END__ |