line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
5
|
|
|
5
|
|
879
|
use strict; |
|
5
|
|
|
|
|
10
|
|
|
5
|
|
|
|
|
193
|
|
2
|
5
|
|
|
5
|
|
28
|
use warnings; |
|
5
|
|
|
|
|
7
|
|
|
5
|
|
|
|
|
200
|
|
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
package AI::MaxEntropy; |
5
|
|
|
|
|
|
|
|
6
|
5
|
|
|
5
|
|
5211
|
use Algorithm::LBFGS; |
|
5
|
|
|
|
|
6980
|
|
|
5
|
|
|
|
|
158
|
|
7
|
5
|
|
|
5
|
|
3021
|
use AI::MaxEntropy::Model; |
|
5
|
|
|
|
|
14
|
|
|
5
|
|
|
|
|
152
|
|
8
|
5
|
|
|
5
|
|
42
|
use XSLoader; |
|
5
|
|
|
|
|
11
|
|
|
5
|
|
|
|
|
6312
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
our $VERSION = '0.20'; |
11
|
|
|
|
|
|
|
XSLoader::load('AI::MaxEntropy', $VERSION); |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
sub new { |
14
|
5
|
|
|
5
|
1
|
82
|
my $class = shift; |
15
|
5
|
|
|
|
|
77
|
my $self = { |
16
|
|
|
|
|
|
|
smoother => {}, |
17
|
|
|
|
|
|
|
algorithm => {}, |
18
|
|
|
|
|
|
|
@_, |
19
|
|
|
|
|
|
|
samples => [], |
20
|
|
|
|
|
|
|
x_bucket => {}, |
21
|
|
|
|
|
|
|
y_bucket => {}, |
22
|
|
|
|
|
|
|
x_list => [], |
23
|
|
|
|
|
|
|
y_list => [], |
24
|
|
|
|
|
|
|
x_num => 0, |
25
|
|
|
|
|
|
|
y_num => 0, |
26
|
|
|
|
|
|
|
f_num => 0, |
27
|
|
|
|
|
|
|
af_num => 0, |
28
|
|
|
|
|
|
|
f_freq => [], |
29
|
|
|
|
|
|
|
f_map => [], |
30
|
|
|
|
|
|
|
last_cut => -1, |
31
|
|
|
|
|
|
|
_c => {} |
32
|
|
|
|
|
|
|
}; |
33
|
5
|
|
|
|
|
27
|
return bless $self, $class; |
34
|
|
|
|
|
|
|
} |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
sub see { |
37
|
21
|
|
|
21
|
1
|
128
|
my ($self, $x, $y, $w) = @_; |
38
|
21
|
100
|
|
|
|
94
|
$w = 1 if not defined($w); |
39
|
21
|
|
|
|
|
42
|
my ($x1, $y1) = ([], undef); |
40
|
|
|
|
|
|
|
# preprocess if $x is hashref |
41
|
4
|
|
|
|
|
7
|
$x = [ |
42
|
|
|
|
|
|
|
map { |
43
|
21
|
100
|
|
|
|
65
|
my $attr = $_; |
44
|
2
|
|
|
|
|
17
|
ref($x->{$attr}) eq 'ARRAY' ? |
45
|
4
|
100
|
|
|
|
30
|
map { "$attr:$_" } @{$x->{$attr}} : "$_:$x->{$_}" |
|
1
|
|
|
|
|
4
|
|
46
|
|
|
|
|
|
|
} keys %$x |
47
|
|
|
|
|
|
|
] if ref($x) eq 'HASH'; |
48
|
|
|
|
|
|
|
# update af_num |
49
|
21
|
100
|
|
|
|
86
|
$self->{af_num} = scalar(@$x) if $self->{af_num} == 0; |
50
|
21
|
100
|
|
|
|
58
|
$self->{af_num} = -1 if $self->{af_num} != scalar(@$x); |
51
|
|
|
|
|
|
|
# convert y from string to ID |
52
|
21
|
|
|
|
|
43
|
my $y_id = $self->{y_bucket}->{$y}; |
53
|
|
|
|
|
|
|
# new y |
54
|
21
|
100
|
|
|
|
56
|
if (!defined($y_id)) { |
55
|
|
|
|
|
|
|
# update y_list, y_num, y_bucket, f_freq |
56
|
19
|
|
|
|
|
28
|
push @{$self->{y_list}}, $y; |
|
19
|
|
|
|
|
41
|
|
57
|
19
|
|
|
|
|
30
|
$self->{y_num} = scalar(@{$self->{y_list}}); |
|
19
|
|
|
|
|
135
|
|
58
|
19
|
|
|
|
|
36
|
$y_id = $self->{y_num} - 1; |
59
|
19
|
|
|
|
|
49
|
$self->{y_bucket}->{$y} = $y_id; |
60
|
19
|
|
|
|
|
32
|
push @{$self->{f_freq}}, [map { 0 } (1 .. $self->{x_num})]; |
|
19
|
|
|
|
|
83
|
|
|
36
|
|
|
|
|
71
|
|
61
|
|
|
|
|
|
|
# save ID |
62
|
19
|
|
|
|
|
36
|
$y1 = $y_id; |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
# old y |
65
|
2
|
|
|
|
|
4
|
else { $y1 = $y_id } |
66
|
|
|
|
|
|
|
# convert x from strings to IDs |
67
|
21
|
|
|
|
|
47
|
for (@$x) { |
68
|
52
|
|
|
|
|
92
|
my $x_id = $self->{x_bucket}->{$_}; |
69
|
|
|
|
|
|
|
# new x |
70
|
52
|
100
|
|
|
|
101
|
if (!defined($x_id)) { |
71
|
|
|
|
|
|
|
# update x_list, x_num, x_bucket, f_freq |
72
|
41
|
|
|
|
|
45
|
push @{$self->{x_list}}, $_; |
|
41
|
|
|
|
|
96
|
|
73
|
41
|
|
|
|
|
51
|
$self->{x_num} = scalar(@{$self->{x_list}}); |
|
41
|
|
|
|
|
75
|
|
74
|
41
|
|
|
|
|
59
|
$x_id = $self->{x_num} - 1; |
75
|
41
|
|
|
|
|
99
|
$self->{x_bucket}->{$_} = $x_id; |
76
|
41
|
|
|
|
|
94
|
push @{$self->{f_freq}->[$_]}, 0 for (0 .. $self->{y_num} - 1); |
|
67
|
|
|
|
|
173
|
|
77
|
|
|
|
|
|
|
# save ID |
78
|
41
|
|
|
|
|
96
|
push @$x1, $x_id; |
79
|
|
|
|
|
|
|
} |
80
|
|
|
|
|
|
|
# old x |
81
|
11
|
|
|
|
|
19
|
else { push @$x1, $x_id } |
82
|
|
|
|
|
|
|
# update f_freq |
83
|
52
|
|
|
|
|
124
|
$self->{f_freq}->[$y_id]->[$x_id] += $w; |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
# add the sample |
86
|
21
|
|
|
|
|
34
|
push @{$self->{samples}}, [$x1, $y1, $w]; |
|
21
|
|
|
|
|
77
|
|
87
|
21
|
|
|
|
|
81
|
$self->{last_cut} = -1; |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
sub cut { |
91
|
8
|
|
|
8
|
1
|
64
|
my ($self, $t) = @_; |
92
|
8
|
|
|
|
|
40
|
$self->{f_num} = 0; |
93
|
8
|
|
|
|
|
29
|
for my $y (0 .. $self->{y_num} - 1) { |
94
|
18
|
|
|
|
|
135
|
for my $x (0 .. $self->{x_num} - 1) { |
95
|
97
|
100
|
|
|
|
188
|
if ($self->{f_freq}->[$y]->[$x] >= $t) { |
96
|
84
|
|
|
|
|
173
|
$self->{f_map}->[$y]->[$x] = $self->{f_num}; |
97
|
84
|
|
|
|
|
143
|
$self->{f_num}++; |
98
|
|
|
|
|
|
|
} |
99
|
13
|
|
|
|
|
30
|
else { $self->{f_map}->[$y]->[$x] = -1 } |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
} |
102
|
8
|
|
|
|
|
24
|
$self->{last_cut} = $t; |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
sub forget_all { |
106
|
3
|
|
|
3
|
1
|
46
|
my $self = shift; |
107
|
3
|
|
|
|
|
8
|
$self->{samples} = []; |
108
|
3
|
|
|
|
|
17
|
$self->{x_bucket} = {}; |
109
|
3
|
|
|
|
|
14
|
$self->{y_bucket} = {}; |
110
|
3
|
|
|
|
|
11
|
$self->{x_num} = 0; |
111
|
3
|
|
|
|
|
5
|
$self->{y_num} = 0; |
112
|
3
|
|
|
|
|
7
|
$self->{f_num} = 0; |
113
|
3
|
|
|
|
|
7
|
$self->{x_list} = []; |
114
|
3
|
|
|
|
|
11
|
$self->{y_list} = []; |
115
|
3
|
|
|
|
|
8
|
$self->{af_num} = 0; |
116
|
3
|
|
|
|
|
7
|
$self->{f_freq} = []; |
117
|
3
|
|
|
|
|
12
|
$self->{f_map} = []; |
118
|
3
|
|
|
|
|
9
|
$self->{last_cut} = -1; |
119
|
3
|
|
|
|
|
8
|
$self->{_c} = {}; |
120
|
|
|
|
|
|
|
} |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
sub _cache { |
123
|
8
|
|
|
8
|
|
17
|
my $self = shift; |
124
|
8
|
|
|
|
|
90
|
$self->_cache_samples; |
125
|
8
|
|
|
|
|
46
|
$self->_cache_f_map; |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
sub _free_cache { |
129
|
8
|
|
|
8
|
|
20
|
my $self = shift; |
130
|
8
|
|
|
|
|
38
|
$self->_free_cache_samples; |
131
|
8
|
|
|
|
|
33
|
$self->_free_cache_f_map; |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
sub learn { |
135
|
7
|
|
|
7
|
1
|
67
|
my $self = shift; |
136
|
|
|
|
|
|
|
# cut 0 for default |
137
|
7
|
100
|
|
|
|
41
|
$self->cut(0) if $self->{last_cut} == -1; |
138
|
|
|
|
|
|
|
# initialize |
139
|
7
|
|
|
|
|
26
|
$self->{lambda} = [map { 0 } (1 .. $self->{f_num})]; |
|
92
|
|
|
|
|
163
|
|
140
|
7
|
|
|
|
|
37
|
$self->_cache; |
141
|
|
|
|
|
|
|
# optimize |
142
|
7
|
|
100
|
|
|
51
|
my $type = $self->{algorithm}->{type} || 'lbfgs'; |
143
|
7
|
100
|
|
|
|
30
|
if ($type eq 'lbfgs') { |
|
|
50
|
|
|
|
|
|
144
|
4
|
|
|
|
|
10
|
my $o = Algorithm::LBFGS->new(%{$self->{algorithm}}); |
|
4
|
|
|
|
|
56
|
|
145
|
4
|
|
|
|
|
138
|
$o->fmin(\&_neg_log_likelihood, $self->{lambda}, |
146
|
|
|
|
|
|
|
$self->{algorithm}->{progress_cb}, $self); |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
elsif ($type eq 'gis') { |
149
|
3
|
50
|
33
|
|
|
28
|
die 'GIS is not applicable' |
150
|
|
|
|
|
|
|
if $self->{af_num} == -1 or $self->{last_cut} != 0; |
151
|
3
|
|
|
|
|
9
|
my $progress_cb = $self->{algorithm}->{progress_cb}; |
152
|
|
|
|
|
|
|
$progress_cb = sub { |
153
|
0
|
|
|
0
|
|
0
|
print "$_[0]: |lambda| = $_[3], |d_lambda| = $_[4]\n"; 0; |
|
0
|
|
|
|
|
0
|
|
154
|
3
|
50
|
66
|
|
|
21
|
} if defined($progress_cb) and $progress_cb eq 'verbose'; |
155
|
3
|
|
50
|
|
|
29
|
my $epsilon = $self->{algorithm}->{epsilon} || 1e-3; |
156
|
3
|
|
|
|
|
139
|
$self->{lambda} = $self->_apply_gis($progress_cb, $epsilon); |
157
|
|
|
|
|
|
|
} |
158
|
0
|
|
|
|
|
0
|
else { die "$type is not a valid algorithm type" } |
159
|
|
|
|
|
|
|
# finish |
160
|
7
|
|
|
|
|
988
|
$self->_free_cache; |
161
|
7
|
|
|
|
|
23
|
return $self->_create_model; |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
sub _create_model { |
165
|
7
|
|
|
7
|
|
13
|
my $self = shift; |
166
|
7
|
|
|
|
|
87
|
my $model = AI::MaxEntropy::Model->new; |
167
|
21
|
|
|
|
|
198
|
$model->{$_} = ref($self->{$_}) eq 'ARRAY' ? [@{$self->{$_}}] : |
|
14
|
|
|
|
|
114
|
|
168
|
|
|
|
|
|
|
ref($self->{$_}) eq 'HASH' ? {%{$self->{$_}}} : |
169
|
|
|
|
|
|
|
$self->{$_} |
170
|
7
|
100
|
|
|
|
42
|
for qw/x_list y_list lambda x_num y_num f_num x_bucket y_bucket/; |
|
|
100
|
|
|
|
|
|
171
|
16
|
|
|
|
|
77
|
$model->{f_map}->[$_] = [@{$self->{f_map}->[$_]}] |
172
|
7
|
|
|
|
|
29
|
for (0 .. $self->{y_num} - 1); |
173
|
7
|
|
|
|
|
49
|
return $model; |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
1; |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
__END__ |