line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package KinoSearch1::Document::Field; |
2
|
34
|
|
|
34
|
|
338
|
use strict; |
|
34
|
|
|
|
|
67
|
|
|
34
|
|
|
|
|
1194
|
|
3
|
34
|
|
|
34
|
|
291
|
use warnings; |
|
34
|
|
|
|
|
65
|
|
|
34
|
|
|
|
|
974
|
|
4
|
34
|
|
|
34
|
|
186
|
use KinoSearch1::Util::ToolSet; |
|
34
|
|
|
|
|
65
|
|
|
34
|
|
|
|
|
4980
|
|
5
|
34
|
|
|
34
|
|
194
|
use base qw( KinoSearch1::Util::Class ); |
|
34
|
|
|
|
|
91
|
|
|
34
|
|
|
|
|
5369
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
BEGIN { |
8
|
34
|
|
|
34
|
|
572
|
__PACKAGE__->init_instance_vars( |
9
|
|
|
|
|
|
|
# constructor args / members |
10
|
|
|
|
|
|
|
name => undef, |
11
|
|
|
|
|
|
|
analyzer => undef, |
12
|
|
|
|
|
|
|
boost => 1, |
13
|
|
|
|
|
|
|
stored => 1, |
14
|
|
|
|
|
|
|
indexed => 1, |
15
|
|
|
|
|
|
|
analyzed => 1, |
16
|
|
|
|
|
|
|
vectorized => 1, |
17
|
|
|
|
|
|
|
binary => 0, |
18
|
|
|
|
|
|
|
compressed => 0, |
19
|
|
|
|
|
|
|
omit_norms => 0, |
20
|
|
|
|
|
|
|
field_num => undef, |
21
|
|
|
|
|
|
|
value => '', |
22
|
|
|
|
|
|
|
fnm_bits => undef, |
23
|
|
|
|
|
|
|
fdt_bits => undef, |
24
|
|
|
|
|
|
|
tv_string => '', |
25
|
|
|
|
|
|
|
tv_cache => undef, |
26
|
|
|
|
|
|
|
); |
27
|
34
|
|
|
|
|
358
|
__PACKAGE__->ready_get_set( |
28
|
|
|
|
|
|
|
qw( |
29
|
|
|
|
|
|
|
value |
30
|
|
|
|
|
|
|
tv_string |
31
|
|
|
|
|
|
|
boost |
32
|
|
|
|
|
|
|
indexed |
33
|
|
|
|
|
|
|
stored |
34
|
|
|
|
|
|
|
analyzed |
35
|
|
|
|
|
|
|
vectorized |
36
|
|
|
|
|
|
|
binary |
37
|
|
|
|
|
|
|
compressed |
38
|
|
|
|
|
|
|
analyzer |
39
|
|
|
|
|
|
|
field_num |
40
|
|
|
|
|
|
|
name |
41
|
|
|
|
|
|
|
omit_norms |
42
|
|
|
|
|
|
|
) |
43
|
|
|
|
|
|
|
); |
44
|
|
|
|
|
|
|
} |
45
|
|
|
|
|
|
|
|
46
|
34
|
|
|
34
|
|
13375
|
use KinoSearch1::Index::FieldsReader; |
|
34
|
|
|
|
|
188
|
|
|
34
|
|
|
|
|
1953
|
|
47
|
34
|
|
|
34
|
|
32379
|
use KinoSearch1::Index::FieldInfos; |
|
34
|
|
|
|
|
296
|
|
|
34
|
|
|
|
|
1815
|
|
48
|
34
|
|
|
34
|
|
22029
|
use KinoSearch1::Index::TermVector; |
|
34
|
|
|
|
|
103
|
|
|
34
|
|
|
|
|
1026
|
|
49
|
|
|
|
|
|
|
|
50
|
34
|
|
|
34
|
|
17216
|
use Storable qw( dclone ); |
|
34
|
|
|
|
|
62467
|
|
|
34
|
|
|
|
|
21103
|
|
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
sub init_instance { |
53
|
531
|
|
|
531
|
1
|
766
|
my $self = shift; |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# field name is required |
56
|
531
|
50
|
|
|
|
1775
|
croak("Missing required parameter 'name'") |
57
|
|
|
|
|
|
|
unless length $self->{name}; |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# don't index binary fields |
60
|
531
|
100
|
|
|
|
1856
|
if ( $self->{binary} ) { |
61
|
4
|
|
|
|
|
7
|
$self->{indexed} = 0; |
62
|
4
|
|
|
|
|
11
|
$self->{analyzed} = 0; |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
} |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
sub clone { |
67
|
230
|
|
|
230
|
0
|
347
|
my $self = shift; |
68
|
230
|
|
|
|
|
14487
|
return dclone($self); |
69
|
|
|
|
|
|
|
} |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
# Given two Field objects, return a child which has all the positive |
72
|
|
|
|
|
|
|
# attributes of both parents (meaning: values are OR'd). |
73
|
|
|
|
|
|
|
sub breed_with { |
74
|
28
|
|
|
28
|
0
|
45
|
my ( $self, $other ) = @_; |
75
|
28
|
|
|
|
|
58
|
my $kid = $self->clone; |
76
|
28
|
|
|
|
|
74
|
for (qw( indexed vectorized )) { |
77
|
56
|
|
33
|
|
|
176
|
$kid->{$_} ||= $other->{$_}; |
78
|
|
|
|
|
|
|
} |
79
|
28
|
|
|
|
|
176
|
return $kid; |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
|
82
|
128
|
|
|
128
|
0
|
439
|
sub set_fnm_bits { $_[0]->{fnm_bits} = $_[1] } |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
sub get_fnm_bits { |
85
|
162
|
|
|
162
|
0
|
238
|
my $self = shift; |
86
|
162
|
100
|
|
|
|
515
|
$self->{fnm_bits} = KinoSearch1::Index::FieldInfos->encode_fnm_bits($self) |
87
|
|
|
|
|
|
|
unless defined $self->{fnm_bits}; |
88
|
162
|
|
|
|
|
909
|
return $self->{fnm_bits}; |
89
|
|
|
|
|
|
|
} |
90
|
|
|
|
|
|
|
|
91
|
128
|
|
|
128
|
0
|
427
|
sub set_fdt_bits { $_[0]->{fdt_bits} = $_[1] } |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
sub get_fdt_bits { |
94
|
15390
|
|
|
15390
|
0
|
19322
|
my $self = shift; |
95
|
15390
|
50
|
|
|
|
37427
|
$self->{fdt_bits} |
96
|
|
|
|
|
|
|
= KinoSearch1::Index::FieldsReader->encode_fdt_bits($self) |
97
|
|
|
|
|
|
|
unless defined $self->{fdt_bits}; |
98
|
15390
|
|
|
|
|
58341
|
return $self->{fdt_bits}; |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
|
101
|
30683
|
|
|
30683
|
0
|
74839
|
sub get_value_len { bytes::length( $_[0]->{value} ) } |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# Return a TermVector object for a given Term, if it's in this field. |
104
|
|
|
|
|
|
|
sub term_vector { |
105
|
14
|
|
|
14
|
0
|
22
|
my ( $self, $term_text ) = @_; |
106
|
14
|
50
|
|
|
|
30
|
return unless bytes::length( $self->{tv_string} ); |
107
|
14
|
100
|
|
|
|
64
|
if ( !defined $self->{tv_cache} ) { |
108
|
4
|
|
|
|
|
67
|
$self->{tv_cache} = _extract_tv_cache( $self->{tv_string} ); |
109
|
|
|
|
|
|
|
} |
110
|
14
|
100
|
|
|
|
32
|
if ( exists $self->{tv_cache}{$term_text} ) { |
111
|
11
|
|
|
|
|
42
|
my ( $positions, $starts, $ends ) |
112
|
|
|
|
|
|
|
= _unpack_posdata( $self->{tv_cache}{$term_text} ); |
113
|
11
|
|
|
|
|
61
|
my $term_vector = KinoSearch1::Index::TermVector->new( |
114
|
|
|
|
|
|
|
text => $term_text, |
115
|
|
|
|
|
|
|
field => $self->{name}, |
116
|
|
|
|
|
|
|
positions => $positions, |
117
|
|
|
|
|
|
|
start_offsets => $starts, |
118
|
|
|
|
|
|
|
end_offsets => $ends, |
119
|
|
|
|
|
|
|
); |
120
|
11
|
|
|
|
|
33
|
return $term_vector; |
121
|
|
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
|
123
|
3
|
|
|
|
|
9
|
return; |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
1; |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
__END__ |