line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Search::Query::Dialect::KSx::WildcardScorer; |
2
|
3
|
|
|
3
|
|
17
|
use strict; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
97
|
|
3
|
3
|
|
|
3
|
|
19
|
use warnings; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
91
|
|
4
|
3
|
|
|
3
|
|
27
|
use base qw( KinoSearch::Search::Matcher ); |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
2661
|
|
5
|
3
|
|
|
3
|
|
289
|
use Carp; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
1503
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
our $VERSION = '0.201'; |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
# Inside-out member vars. |
10
|
|
|
|
|
|
|
my ( %doc_ids, %pos, %boosts, %sim, %term_freqs ); |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
sub DESTROY { |
13
|
12
|
|
|
12
|
|
25
|
my $self = shift; |
14
|
12
|
|
|
|
|
26
|
delete $doc_ids{$$self}; |
15
|
12
|
|
|
|
|
111
|
delete $pos{$$self}; |
16
|
12
|
|
|
|
|
18
|
delete $boosts{$$self}; |
17
|
12
|
|
|
|
|
22
|
delete $sim{$$self}; |
18
|
12
|
|
|
|
|
31
|
delete $term_freqs{$$self}; |
19
|
12
|
|
|
|
|
136
|
$self->SUPER::DESTROY; |
20
|
|
|
|
|
|
|
} |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 NAME |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
Search::Query::Dialect::KSx::WildcardScorer - KinoSearch query extension |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=head1 SYNOPSIS |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
# see KinoSearch::Search::Matcher |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head1 METHODS |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
This class isa KinoSearch::Search::Matcher subclass. |
33
|
|
|
|
|
|
|
Only new or overridden methods are documented. |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=cut |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=head2 new( I ) |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
Returns a new Scorer object. |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=cut |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
sub new { |
44
|
12
|
|
|
12
|
1
|
42
|
my ( $class, %args ) = @_; |
45
|
|
|
|
|
|
|
|
46
|
12
|
|
|
|
|
21
|
my $compiler = delete $args{compiler}; |
47
|
12
|
|
|
|
|
22
|
my $posting_lists = delete $args{posting_lists}; |
48
|
12
|
|
|
|
|
154
|
my $self = $class->SUPER::new(%args); |
49
|
|
|
|
|
|
|
|
50
|
12
|
|
|
|
|
216
|
my %hits; # The keys are the doc nums; the values the tfs. |
51
|
12
|
|
|
|
|
27
|
for my $posting_list (@$posting_lists) { |
52
|
25
|
|
|
|
|
157
|
while ( my $doc_id = $posting_list->next ) { |
53
|
25
|
|
|
|
|
130
|
my $posting = $posting_list->get_posting; |
54
|
25
|
|
|
|
|
194
|
$hits{$doc_id} += $posting->get_freq; |
55
|
|
|
|
|
|
|
} |
56
|
|
|
|
|
|
|
} |
57
|
|
|
|
|
|
|
|
58
|
12
|
|
|
|
|
61
|
$sim{$$self} = $compiler->get_similarity; |
59
|
12
|
|
|
|
|
64
|
$doc_ids{$$self} = [ sort { $a <=> $b } keys %hits ]; |
|
18
|
|
|
|
|
54
|
|
60
|
12
|
|
|
|
|
28
|
$term_freqs{$$self} = \%hits; |
61
|
12
|
|
|
|
|
18
|
$pos{$$self} = -1; |
62
|
12
|
|
|
|
|
37
|
$boosts{$$self} = $compiler->get_boost; |
63
|
|
|
|
|
|
|
|
64
|
12
|
|
|
|
|
332
|
return $self; |
65
|
|
|
|
|
|
|
} |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head2 next |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
Returns the next doc_id. |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=cut |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
sub next { |
74
|
37
|
|
|
37
|
1
|
48
|
my $self = shift; |
75
|
37
|
|
|
|
|
55
|
my $doc_ids = $doc_ids{$$self}; |
76
|
37
|
100
|
|
|
|
177
|
return 0 if $pos{$$self} >= $#$doc_ids; |
77
|
25
|
|
|
|
|
199
|
return $doc_ids->[ ++$pos{$$self} ]; |
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=head2 get_doc_id |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
Returns the doc_id for the current position. |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=cut |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
sub get_doc_id { |
87
|
12
|
|
|
12
|
1
|
17
|
my $self = shift; |
88
|
12
|
|
|
|
|
17
|
my $pos = $pos{$$self}; |
89
|
12
|
|
|
|
|
16
|
my $dids = $doc_ids{$$self}; |
90
|
12
|
50
|
|
|
|
83
|
return $pos < scalar @$dids ? $$dids[$pos] : 0; |
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=head2 score |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
Returns the score of the hit. |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
=cut |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
sub score { |
100
|
15
|
|
|
15
|
1
|
22
|
my $self = shift; |
101
|
15
|
|
|
|
|
20
|
my $pos = $pos{$$self}; |
102
|
15
|
|
|
|
|
21
|
my $dids = $doc_ids{$$self}; |
103
|
15
|
|
|
|
|
19
|
my $boost = $boosts{$$self}; |
104
|
15
|
|
|
|
|
20
|
my $doc_id = $$dids[$pos]; |
105
|
15
|
|
|
|
|
26
|
my $term_freq = $term_freqs{$$self}->{$doc_id}; |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
#carp "doc_id=$doc_id term_freq=$term_freq boost=$boost"; |
108
|
15
|
|
|
|
|
119
|
return ( $boost * $sim{$$self}->tf($term_freq) ) / 10; |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
1; |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
__END__ |