line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package KinoSearch1::Search::PhraseScorer; |
2
|
18
|
|
|
18
|
|
105
|
use strict; |
|
18
|
|
|
|
|
45
|
|
|
18
|
|
|
|
|
663
|
|
3
|
18
|
|
|
18
|
|
129
|
use warnings; |
|
18
|
|
|
|
|
40
|
|
|
18
|
|
|
|
|
597
|
|
4
|
18
|
|
|
18
|
|
109
|
use KinoSearch1::Util::ToolSet; |
|
18
|
|
|
|
|
39
|
|
|
18
|
|
|
|
|
2434
|
|
5
|
18
|
|
|
18
|
|
105
|
use base qw( KinoSearch1::Search::Scorer ); |
|
18
|
|
|
|
|
45
|
|
|
18
|
|
|
|
|
2241
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
BEGIN { |
8
|
18
|
|
|
18
|
|
209
|
__PACKAGE__->init_instance_vars( |
9
|
|
|
|
|
|
|
# constructor params |
10
|
|
|
|
|
|
|
weight => undef, |
11
|
|
|
|
|
|
|
term_docs => undef, |
12
|
|
|
|
|
|
|
phrase_offsets => undef, |
13
|
|
|
|
|
|
|
norms_reader => undef, |
14
|
|
|
|
|
|
|
slop => 0, |
15
|
|
|
|
|
|
|
); |
16
|
|
|
|
|
|
|
} |
17
|
|
|
|
|
|
|
our %instance_vars; |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
sub new { |
20
|
62
|
|
|
62
|
1
|
104
|
my $either = shift; |
21
|
62
|
50
|
|
|
|
232
|
confess kerror() unless verify_args( \%instance_vars, @_ ); |
22
|
62
|
|
|
|
|
481
|
my %args = ( %instance_vars, @_ ); |
23
|
62
|
|
|
|
|
317
|
my $self = $either->SUPER::new; |
24
|
62
|
|
|
|
|
255
|
$self->_init_child; |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# set/derive some member vars |
27
|
62
|
|
|
|
|
266
|
$self->_set_norms( $args{norms_reader}->get_bytes ); |
28
|
62
|
|
|
|
|
278
|
$self->set_similarity( $args{similarity} ); |
29
|
62
|
|
|
|
|
228
|
$self->_set_weight_value( $args{weight}->get_value ); |
30
|
62
|
50
|
|
|
|
193
|
confess("Sloppy phrase matching not yet implemented") |
31
|
|
|
|
|
|
|
unless $args{slop} == 0; # TODO -- enable slop. |
32
|
62
|
|
|
|
|
191
|
$self->_set_slop( $args{slop} ); |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
# sort terms by ascending frequency |
35
|
62
|
|
|
|
|
132
|
confess("positions count doesn't match term count") |
36
|
62
|
50
|
|
|
|
89
|
unless $#{ $args{term_docs} } == $#{ $args{phrase_offsets} }; |
|
62
|
|
|
|
|
153
|
|
37
|
77
|
|
|
|
|
548
|
my @by_size = sort { $a->[0]->get_doc_freq <=> $b->[0]->get_doc_freq } |
|
134
|
|
|
|
|
718
|
|
38
|
62
|
|
|
|
|
160
|
map { [ $args{term_docs}[$_], $args{phrase_offsets}[$_] ] } |
39
|
62
|
|
|
|
|
177
|
0 .. $#{ $args{term_docs} }; |
40
|
62
|
|
|
|
|
153
|
my @term_docs = map { $_->[0] } @by_size; |
|
134
|
|
|
|
|
275
|
|
41
|
62
|
|
|
|
|
107
|
my @phrase_offsets = map { $_->[1] } @by_size; |
|
134
|
|
|
|
|
254
|
|
42
|
62
|
|
|
|
|
268
|
$self->_init_elements( \@term_docs, \@phrase_offsets ); |
43
|
|
|
|
|
|
|
|
44
|
62
|
|
|
|
|
392
|
return $self; |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
1; |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
__END__ |