|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Search::Fulltext::Tokenizer::Ngram;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # ABSTRACT: Character n-gram tokenizer for Search::Fulltext  | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
5
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
688
 | 
 use strict;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41
 | 
    | 
| 
6
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
25
 | 
 use warnings;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
34
 | 
    | 
| 
7
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
5
 | 
 use Carp ();  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
    | 
| 
8
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
5
 | 
 use Scalar::Util qw/looks_like_number/;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
428
 | 
    | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $VERSION = 0.01;  | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new {  | 
| 
13
 | 
32
 | 
 
 | 
 
 | 
  
32
  
 | 
  
0
  
 | 
66
 | 
   my ($class, $token_length) = @_;  | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
15
 | 
32
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
285
 | 
   unless (looks_like_number $token_length and $token_length > 0) {  | 
| 
16
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     Carp::croak('Token length must be 1+.');  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
19
 | 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
223
 | 
   bless +{ token_length => $token_length } => $class;  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub create_token_iterator {  | 
| 
23
 | 
32
 | 
 
 | 
 
 | 
  
32
  
 | 
  
0
  
 | 
66
 | 
   my ($self, $text) = @_;  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
25
 | 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41
 | 
   my $token_index = -1;  | 
| 
26
 | 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
68
 | 
   my $n = $self->token_length;  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return sub {  | 
| 
28
 | 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
267
 | 
   GET_NEXT_TOKEN:  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
30
 | 
218
 | 
 
 | 
 
 | 
  
218
  
 | 
 
 | 
291
 | 
       ++$token_index;  | 
| 
31
 | 
242
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1877
 | 
       return if $token_index + $n > length($text);  | 
| 
32
 | 
221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
409
 | 
       my $token = substr $text, $token_index, $n;  | 
| 
33
 | 
221
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
632
 | 
       redo GET_NEXT_TOKEN if $token =~ /\s/;  | 
| 
34
 | 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1248
 | 
       return ($token, $n, $token_index, $token_index + $n, $token_index);  | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
36
 | 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
614
 | 
   };  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
39
 | 
32
 | 
 
 | 
 
 | 
  
32
  
 | 
  
0
  
 | 
102
 | 
 sub token_length { $_[0]->{token_length} }  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __END__  |