line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Algorithm::LSH::Bucket; |
2
|
4
|
|
|
4
|
|
618
|
use strict; |
|
4
|
|
|
|
|
6
|
|
|
4
|
|
|
|
|
125
|
|
3
|
4
|
|
|
4
|
|
22
|
use warnings; |
|
4
|
|
|
|
|
9
|
|
|
4
|
|
|
|
|
122
|
|
4
|
4
|
|
|
4
|
|
3561
|
use List::MoreUtils qw(uniq first_index); |
|
4
|
|
|
|
|
5017
|
|
|
4
|
|
|
|
|
380
|
|
5
|
4
|
|
|
4
|
|
25
|
use base qw( Algorithm::LSH::Base ); |
|
4
|
|
|
|
|
8
|
|
|
4
|
|
|
|
|
2518
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
sub insert { |
8
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
9
|
0
|
|
|
|
|
|
my ( $label, $vector, $hashed_arrayref ) = @_; |
10
|
0
|
|
|
|
|
|
my $L = $self->L; |
11
|
0
|
|
|
|
|
|
for my $i ( 0 .. $L - 1 ) { |
12
|
0
|
|
|
|
|
|
$self->_put( $i, $vector, $hashed_arrayref->[$i] ); |
13
|
|
|
|
|
|
|
} |
14
|
0
|
|
|
|
|
|
$self->{label}->{ join( ':', @$vector ) } = $label; |
15
|
|
|
|
|
|
|
} |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
sub select { |
18
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
19
|
0
|
|
|
|
|
|
my ( $query_vector, $hashed_arrayref, $without_self ) = @_; |
20
|
0
|
|
|
|
|
|
my $query_vector_str = join( ":", @$query_vector ); |
21
|
0
|
|
|
|
|
|
my $L = $self->L; |
22
|
0
|
|
|
|
|
|
my @result; |
23
|
|
|
|
|
|
|
my %seen; |
24
|
0
|
|
|
|
|
|
for my $i ( 0 .. $L - 1 ) { |
25
|
0
|
|
|
|
|
|
my $hashed = join( "", @{ $hashed_arrayref->[$i] } ); |
|
0
|
|
|
|
|
|
|
26
|
0
|
|
|
|
|
|
my $vectors = $self->{data}->[$i]->{$hashed}; |
27
|
0
|
0
|
|
|
|
|
if ( ref $vectors ne 'ARRAY' ) { |
28
|
0
|
|
|
|
|
|
$vectors = $self->_left_light( $hashed, $i ); |
29
|
|
|
|
|
|
|
} |
30
|
0
|
|
|
|
|
|
for my $vector (@$vectors) { |
31
|
0
|
|
|
|
|
|
my $str = join( ":", @$vector ); |
32
|
0
|
0
|
|
|
|
|
next if defined $seen{$str}; |
33
|
0
|
0
|
0
|
|
|
|
next if $str eq $query_vector_str && $without_self; |
34
|
0
|
|
|
|
|
|
$seen{$str} = 1; |
35
|
0
|
|
|
|
|
|
push( @result, { $self->{label}->{$str} => $vector } ); |
36
|
|
|
|
|
|
|
} |
37
|
0
|
0
|
|
|
|
|
last if @result >= $L * 2; |
38
|
|
|
|
|
|
|
} |
39
|
0
|
|
|
|
|
|
return \@result; |
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
sub _left_right { |
43
|
0
|
|
|
0
|
|
|
my $self = shift; |
44
|
0
|
|
|
|
|
|
my $hashed = shift; |
45
|
0
|
|
|
|
|
|
my $i = shift; |
46
|
0
|
|
|
0
|
|
|
my $n = first_index { $hashed < $_ } @{ $self->index->[$i] }; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
47
|
0
|
0
|
0
|
|
|
|
if ( $n == 0 || $n == -1 ) { |
48
|
0
|
|
|
|
|
|
return $self->data->[$i]->{ $self->index->[$i]->[$n] }; |
49
|
|
|
|
|
|
|
} |
50
|
|
|
|
|
|
|
else { |
51
|
0
|
|
|
|
|
|
my @result; |
52
|
0
|
|
|
|
|
|
for ( $n - 1, $n ) { |
53
|
0
|
|
|
|
|
|
my $index = $self->index->[$i]->[$_]; |
54
|
0
|
|
|
|
|
|
push @result, @{ $self->data->[$i]->{$index} }; |
|
0
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
} |
56
|
0
|
|
|
|
|
|
return \@result; |
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
sub _put { |
61
|
0
|
|
|
0
|
|
|
my $self = shift; |
62
|
0
|
|
|
|
|
|
my $i = shift; |
63
|
0
|
|
|
|
|
|
my $vector = shift; |
64
|
0
|
|
|
|
|
|
my $hashed = shift; |
65
|
0
|
|
|
|
|
|
$hashed = join( "", @$hashed ); |
66
|
0
|
|
0
|
|
|
|
my $index = $self->{index}->[$i] || []; |
67
|
0
|
|
|
|
|
|
push @$index, $hashed; |
68
|
0
|
|
|
|
|
|
$self->{index}->[$i] = [ uniq sort @$index ]; |
69
|
0
|
|
|
|
|
|
push @{ $self->{data}->[$i]->{$hashed} }, $vector; |
|
0
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
1; |
73
|
|
|
|
|
|
|
__END__ |