|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Lingua::TokenParse;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Lingua::TokenParse::VERSION = '0.1602';  | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $AUTHORITY = 'cpan:GENE';  | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # ABSTRACT: DEPRECATED in favor of Lingua::Word::Parser  | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $VERSION = '0.1602';  | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
9
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
775
 | 
 use strict;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
    | 
| 
10
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
8
 | 
 use warnings;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
    | 
| 
11
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
8
 | 
 use Carp qw(croak);  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
55
 | 
    | 
| 
12
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
613
 | 
 use Storable qw(retrieve store);  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3155
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
61
 | 
    | 
| 
13
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
428
 | 
 use Math::BaseCalc ();  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1505
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2256
 | 
    | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new {  | 
| 
16
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
546
 | 
     my $proto = shift;  | 
| 
17
 | 
2
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
11
 | 
     my $class = ref $proto || $proto;  | 
| 
18
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
     my $self  = {  | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         verbose      => 0,  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # The word to parse!  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         word         => undef,  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # We need to use this.  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         word_length  => 0,  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Known tokens.  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         lexicon      => {},  | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Local lexicon cache file name.  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         lexicon_file => '',  # ?: 'lexicon-' . time(),  | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # All word parts.  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         parts        => [],  | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # All possible parts combinations.  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         combinations => [],  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Scored list of the known parts combinations.  | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         knowns       => {},  | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Definitions of the known and unknown fragments in knowns.  | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         definitions  => {},  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Fragment definition separator.  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         separator    => ' + ',  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Known-but-not-defined definition output string.  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         not_defined  => '.',  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Unknown definition output string.  | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         unknown      => '?',  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Known trimming regexp rules.  | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         constraints  => [],  | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         @_,  # slurp anything else and override defaults.  | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     };  | 
| 
46
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
     bless $self, $class;  | 
| 
47
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     $self->_init();  | 
| 
48
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     return $self;  | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _init {  | 
| 
52
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
3
 | 
     my $self = shift;  | 
| 
53
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     warn "Entering _init()\n" if $self->{verbose};  | 
| 
54
 | 
2
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
7
 | 
     $self->word( $self->{word} ) if $self->{word};  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Retrieve our lexicon cache if a filename was set.  | 
| 
56
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     $self->lexicon_cache;  | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub DESTROY {  | 
| 
60
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
739
 | 
     my $self = shift;  | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Cache our lexicon if a filename has been given.  | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $self->lexicon_cache( $self->{lexicon_file} )  | 
| 
63
 | 
2
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
11
 | 
         if $self->{lexicon_file};  | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub verbose {  | 
| 
67
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
     my $self = shift;  | 
| 
68
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $self->{verbose} = shift if @_;  | 
| 
69
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return $self->{verbose};  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub word {  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # WORD: This method is the only place where word_length is set.  | 
| 
74
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
1
 | 
     my $self = shift;  | 
| 
75
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     warn "Entering word()\n" if $self->{verbose};  | 
| 
76
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
     if( @_ ) {  | 
| 
77
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
         $self->{word} = shift;  | 
| 
78
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
         $self->{word_length} = length $self->{word};  | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         printf "\tword = %s\n\tlength = %d\n",  | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $self->{word}, $self->{word_length}  | 
| 
81
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
3
 | 
             if $self->{verbose};  | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
83
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
     return $self->{word};  | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub lexicon {  | 
| 
87
 | 
964
 | 
 
 | 
 
 | 
  
964
  
 | 
  
1
  
 | 
2075
 | 
     my $self = shift;  | 
| 
88
 | 
964
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1585
 | 
     if( @_ ) {  | 
| 
89
 | 
2
 | 
  
  0
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
17
 | 
         $self->{lexicon} = @_ == 1 && ref $_[0] eq 'HASH'  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ? shift  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             : @_ % 2 == 0  | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ? { @_ }  | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 : {};  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
95
 | 
964
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1641
 | 
     return $self->{lexicon};  | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub parts {  | 
| 
99
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
360
 | 
     my $self = shift;  | 
| 
100
 | 
2
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
7
 | 
     $self->{parts} = shift if @_;  | 
| 
101
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30
 | 
     return $self->{parts};  | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub combinations {  | 
| 
105
 | 
195
 | 
 
 | 
 
 | 
  
195
  
 | 
  
1
  
 | 
288
 | 
     my $self = shift;  | 
| 
106
 | 
195
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
341
 | 
     $self->{combinations} = shift if @_;  | 
| 
107
 | 
195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
493
 | 
     return $self->{combinations};  | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub knowns {  | 
| 
111
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
  
1
  
 | 
622
 | 
     my $self = shift;  | 
| 
112
 | 
3
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
9
 | 
     $self->{knowns} = shift if @_;  | 
| 
113
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
     return $self->{knowns};  | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub definitions {  | 
| 
117
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
4
 | 
     my $self = shift;  | 
| 
118
 | 
2
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
7
 | 
     $self->{definitions} = shift if @_;  | 
| 
119
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     return $self->{definitions};  | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub separator {  | 
| 
123
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
     my $self = shift;  | 
| 
124
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $self->{separator} = shift if @_;  | 
| 
125
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return $self->{separator};  | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub not_defined {  | 
| 
129
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
     my $self = shift;  | 
| 
130
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $self->{not_defined} = shift if @_;  | 
| 
131
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return $self->{not_defined};  | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub unknown {  | 
| 
135
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
     my $self = shift;  | 
| 
136
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $self->{unknown} = shift if @_;  | 
| 
137
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return $self->{unknown};  | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub constraints {  | 
| 
141
 | 
46
 | 
 
 | 
 
 | 
  
46
  
 | 
  
1
  
 | 
418
 | 
     my $self = shift;  | 
| 
142
 | 
46
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
81
 | 
     $self->{constraints} = shift if @_;  | 
| 
143
 | 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
82
 | 
     return $self->{constraints};  | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub parse {  | 
| 
147
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
4
 | 
     my $self = shift;  | 
| 
148
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
     warn "Entering parse()\n" if $self->{verbose};  | 
| 
149
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
2
 | 
     $self->word( shift ) if @_;  | 
| 
150
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     croak 'No word provided.' unless defined $self->{word};  | 
| 
151
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
1
 | 
     croak 'No lexicon defined.' unless keys %{ $self->{lexicon} };  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    | 
| 
152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Reset our data structures.  | 
| 
153
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
     $self->parts([]);  | 
| 
154
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     $self->definitions({});  | 
| 
155
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     $self->combinations([]);  | 
| 
156
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
     $self->knowns({});  | 
| 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Build new ones based on the word.  | 
| 
158
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     $self->build_parts;  | 
| 
159
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
     $self->build_definitions;  | 
| 
160
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     $self->build_combinations;  | 
| 
161
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
     $self->build_knowns;  | 
| 
162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub build_parts {  | 
| 
165
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
1
 | 
     my $self = shift;  | 
| 
166
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     warn "Entering build_parts()\n" if $self->{verbose};  | 
| 
167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
168
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
     for my $i (0 .. $self->{word_length} - 1) {  | 
| 
169
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
         for my $j (1 .. $self->{word_length} - $i) {  | 
| 
170
 | 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
73
 | 
             my $part = substr $self->{word}, $i, $j;  | 
| 
171
 | 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
105
 | 
             push @{ $self->{parts} }, $part  | 
| 
172
 | 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
146
 | 
                 unless grep { $part =~ /$_/ }  | 
| 
173
 | 
45
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
59
 | 
                     @{ $self->constraints };  | 
| 
 
 | 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
68
 | 
    | 
| 
174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
177
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     if($self->{verbose}) {  | 
| 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # XXX This is ugly.  | 
| 
179
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         my $last = 0;  | 
| 
180
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         for my $part (@{ $self->{parts} }) {  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
181
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             print '',  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ($last ? $last > length( $part ) ? "\n\t" : ', ' : "\t"),  | 
| 
183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $part;  | 
| 
184
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $last = length $part;  | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
186
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         print "\n" if @{ $self->{parts} };  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
188
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
     return $self->{parts};  | 
| 
189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Save a known combination entry => definition table.  | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub build_definitions {  | 
| 
193
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
2
 | 
     my $self = shift;  | 
| 
194
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     warn "Entering build_definitions()\n" if $self->{verbose};  | 
| 
195
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
     for my $part (@{ $self->{parts} }) {  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $self->{definitions}{$part} = $self->{lexicon}{$part}  | 
| 
197
 | 
45
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
83
 | 
             if $self->{lexicon}{$part};  | 
| 
198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
199
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     warn "\t", join( "\n\t", sort keys %{ $self->definitions } ), "\n"  | 
| 
200
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
25
 | 
         if $self->{verbose};  | 
| 
201
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     return $self->{definitions};  | 
| 
202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub build_combinations {  | 
| 
205
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
2
 | 
     my $self = shift;  | 
| 
206
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
2
 | 
     warn "Entering build_combinations()\n" if $self->{verbose};  | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # field size for binary iteration (digits of precision)  | 
| 
209
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
     my $y  = $self->{word_length} - 1;  | 
| 
210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # total number of zero-based combinations  | 
| 
211
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
     my $z  = 2 ** $y - 1;  | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # field size for the count  | 
| 
213
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     my $lz = length $z;  | 
| 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # field size for a combination  | 
| 
215
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
     my $m  = $self->{word_length} + $y;  | 
| 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     warn sprintf  | 
| 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         "\tTotal combinations: %d\n\tConstrained combinations:\n",  | 
| 
218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $z + 1  | 
| 
219
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
2
 | 
         if $self->{verbose};  | 
| 
220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Truth is a single partition character: the lowly dot.  | 
| 
222
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     my $c = Math::BaseCalc->new( digits => [ 0, '.' ] );  | 
| 
223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Build a word part combination for each iteration.  | 
| 
225
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
46
 | 
     for my $n ( 0 .. $z ) {  | 
| 
226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Iterate in base two.  | 
| 
227
 | 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
587
 | 
         my $i = $c->to_base( $n );  | 
| 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Get the binary digits as an array.  | 
| 
230
 | 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7306
 | 
         my @i = split //, sprintf( '%0'.$y.'s', $i );  | 
| 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Join the character and digit arrays into a partitioned word.  | 
| 
233
 | 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
408
 | 
         my $t = '';  | 
| 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # ..by stepping over the characters and peeling off a digit.  | 
| 
235
 | 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
777
 | 
         for( split //, $self->{word} ) {  | 
| 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # Zero values become ''. Haha!  Truth prevails.  | 
| 
237
 | 
2304
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
5575
 | 
             $t .= $_ . (shift( @i ) || '');  | 
| 
238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
240
 | 
256
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
435
 | 
         unless( grep { $t =~ /$_/ } @{ $self->{constraints} } ) {  | 
| 
 
 | 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1165
 | 
    | 
| 
 
 | 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
467
 | 
    | 
| 
241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # Preach it.  | 
| 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             printf "\t%".$lz.'d) %0'.$y.'s => %'.$m."s\n", $n, $i, $t  | 
| 
243
 | 
192
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
376
 | 
                 if $self->{verbose};  | 
| 
244
 | 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
235
 | 
             push @{ $self->combinations }, $t;  | 
| 
 
 | 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
293
 | 
    | 
| 
245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
248
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
     return $self->{combinations};  | 
| 
249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub build_knowns {  | 
| 
252
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
3
 | 
     my $self = shift;  | 
| 
253
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
2
 | 
     return unless scalar keys %{ $self->{lexicon} };  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
    | 
| 
254
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
     warn "Entering build_knowns()\n" if $self->{verbose};  | 
| 
255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Save the familiarity value for each "raw" combination.  | 
| 
257
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
     for my $combo (@{ $self->{combinations} }) {  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Skip combinations that have already been seen.  | 
| 
259
 | 
192
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
371
 | 
         next if exists $self->{knowns}{$combo};  | 
| 
260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
261
 | 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
296
 | 
         my ($sum, $frag_sum, $char_sum) = (0, 0, 0);  | 
| 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Get the bits of the combination.  | 
| 
264
 | 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
425
 | 
         my @chunks = split /\./, $combo;  | 
| 
265
 | 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
301
 | 
         for (@chunks) {  | 
| 
266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # XXX Uh.. Magically handle hyphens in lexicon entries.  | 
| 
267
 | 
960
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1449
 | 
             ($_, my $combo_seen) = _hyphenate($_, $self->lexicon, 0);  | 
| 
268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # Sum the combination familiarity values.  | 
| 
270
 | 
960
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1871
 | 
             if ($combo_seen) {  | 
| 
271
 | 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
128
 | 
                 $frag_sum++;  | 
| 
272
 | 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
171
 | 
                 $char_sum += length;  | 
| 
273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # XXX Huh? Why?  Can $_ change or something?  | 
| 
276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Stick our combination back together.  | 
| 
277
 | 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
354
 | 
         $combo = join '.', @chunks;  | 
| 
278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Save this combination and its familiarity ratios.  | 
| 
280
 | 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
309
 | 
         my $x = $frag_sum / @chunks;  | 
| 
281
 | 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
281
 | 
         my $y = $char_sum / $self->{word_length};  | 
| 
282
 | 
192
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
328
 | 
         warn "\t$combo: [$x, $y]\n" if $self->{verbose};  | 
| 
283
 | 
192
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
471
 | 
         if( $x || $y ) {  | 
| 
284
 | 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
297
 | 
             $self->{knowns}{$combo} = [ $x, $y ];  | 
| 
285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
287
 | 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
230
 | 
             delete $self->{knowns}{$combo};  | 
| 
288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
291
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     return $self->{knowns};  | 
| 
292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Reduce the number of known combinations by concatinating adjacent  | 
| 
295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # unknowns (and then removing any duplicates produced).  | 
| 
296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #sub learn {  | 
| 
298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    my ($self, %args) = @_;  | 
| 
299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Get the list of (partially) unknown stem combinations.  | 
| 
300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Loop through each looking in %args or prompting for a definition.  | 
| 
301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #}  | 
| 
302
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Update the given string with its actual lexicon value and increment  | 
| 
304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # the seen flag.  | 
| 
305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _hyphenate {  | 
| 
306
 | 
960
 | 
 
 | 
 
 | 
  
960
  
 | 
 
 | 
1514
 | 
     my ($string, $lexicon, $combo_seen) = @_;  | 
| 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
308
 | 
960
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2442
 | 
     if (exists $lexicon->{$string}) {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
309
 | 
108
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
195
 | 
         $combo_seen++ if defined $combo_seen;  | 
| 
310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif (exists $lexicon->{"-$string"}) {  | 
| 
312
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $combo_seen++ if defined $combo_seen;  | 
| 
313
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $string = "-$string";  | 
| 
314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif (exists $lexicon->{"$string-"}) {  | 
| 
316
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $combo_seen++ if defined $combo_seen;  | 
| 
317
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $string = "$string-";  | 
| 
318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
320
 | 
960
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
2097
 | 
     return wantarray ? ($string, $combo_seen) : $string;  | 
| 
321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub output_knowns {  | 
| 
324
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
     my $self = shift;  | 
| 
325
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my @out = ();  | 
| 
326
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $header = < | 
| 
327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Combination [frag familiarity, char familiarity]  | 
| 
328
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Fragment definitions  | 
| 
329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 HEADER  | 
| 
331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
332
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     for my $known (  | 
| 
333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         reverse sort {  | 
| 
334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $self->{knowns}{$a}[0] <=> $self->{knowns}{$b}[0] ||  | 
| 
335
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $self->{knowns}{$a}[1] <=> $self->{knowns}{$b}[1]  | 
| 
336
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         } keys %{ $self->{knowns} }  | 
| 
337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ) {  | 
| 
338
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         my @definition;  | 
| 
339
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         for my $chunk (split /\./, $known) {  | 
| 
340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             push @definition,  | 
| 
341
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 defined $self->{definitions}{$chunk}  | 
| 
342
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ? $self->{definitions}{$chunk}  | 
| 
343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         ? $self->{definitions}{$chunk}  | 
| 
344
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         : $self->{not_defined}  | 
| 
345
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     : $self->{unknown};  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         push @out, sprintf qq/%s [%s]\n%s/,  | 
| 
349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $known,  | 
| 
350
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             join (', ', map { sprintf '%0.2f', $_ }  | 
| 
351
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 @{ $self->{knowns}{$known} }),  | 
| 
352
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             join ($self->{separator}, @definition);  | 
| 
353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
355
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return wantarray ? @out : $header . join "\n\n", @out;  | 
| 
356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
358
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Naive, no locking read/write.  If you run a production environment,  | 
| 
359
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # you know what to do.  | 
| 
360
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub lexicon_cache {  | 
| 
361
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
  
1
  
 | 
444
 | 
     my( $self, $file, $value ) = @_;  | 
| 
362
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
13
 | 
     warn "Entering lexicon_cache()\n" if $self->{verbose};  | 
| 
363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Set the file and the lexicon_file attribute if we are told to.  | 
| 
365
 | 
5
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
25
 | 
     if( $file && $file eq 'lexicon_file' && $value ) {  | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
366
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
         $self->{lexicon_file} = $value;  | 
| 
367
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
         $file = $value;  | 
| 
368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
370
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # If there is no file try to use the lexicon_file.  | 
| 
371
 | 
5
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
16
 | 
     $file ||= $self->{lexicon_file};  | 
| 
372
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Otherwise, bail out!  | 
| 
373
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     warn( "No lexicon cache file set\n" ) and return  | 
| 
374
 | 
5
 | 
  
 50
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
12
 | 
         if $self->{verbose} && !$file;  | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
375
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
376
 | 
5
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
16
 | 
     if( $file ) {  | 
| 
377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Store 'em if you got 'em.  | 
| 
378
 | 
3
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
3
 | 
         if( keys %{ $self->{lexicon} } ) {  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
    | 
| 
379
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
15
 | 
             warn "store( $self->{lexicon}, $file )\n" if $self->{verbose};  | 
| 
380
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
             store( $self->{lexicon}, $file );  | 
| 
381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
382
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # ..Retrieve 'em if not.  | 
| 
383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
384
 | 
1
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
5
 | 
             warn "retrieve( $file )\n" if $self->{verbose} && -e $file;  | 
| 
385
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
22
 | 
             $self->lexicon( retrieve( $file ) ) if -e $file;  | 
| 
386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
387
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
390
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __END__  |