File Coverage

blib/lib/AI/MaxEntropy.pm

Criterion	Covered	Total	%
statement	112	115	97.3
branch	27	30	90.0
condition	6	10	60.0
subroutine	13	14	92.8
pod	5	5	100.0
total	163	174	93.6

line	stmt	bran	cond	sub	pod	time	code
1	5			5		879	use strict;
	5					10
	5					193
2	5			5		28	use warnings;
	5					7
	5					200
3
4							package AI::MaxEntropy;
5
6	5			5		5211	use Algorithm::LBFGS;
	5					6980
	5					158
7	5			5		3021	use AI::MaxEntropy::Model;
	5					14
	5					152
8	5			5		42	use XSLoader;
	5					11
	5					6312
9
10							our $VERSION = '0.20';
11							XSLoader::load('AI::MaxEntropy', $VERSION);
12
13							sub new {
14	5			5	1	82	my $class = shift;
15	5					77	my $self = {
16							smoother => {},
17							algorithm => {},
18							@_,
19							samples => [],
20							x_bucket => {},
21							y_bucket => {},
22							x_list => [],
23							y_list => [],
24							x_num => 0,
25							y_num => 0,
26							f_num => 0,
27							af_num => 0,
28							f_freq => [],
29							f_map => [],
30							last_cut => -1,
31							_c => {}
32							};
33	5					27	return bless $self, $class;
34							}
35
36							sub see {
37	21			21	1	128	my ($self, $x, $y, $w) = @_;
38	21	100				94	$w = 1 if not defined($w);
39	21					42	my ($x1, $y1) = ([], undef);
40							# preprocess if $x is hashref
41	4					7	$x = [
42							map {
43	21	100				65	my $attr = $_;
44	2					17	ref($x->{$attr}) eq 'ARRAY' ?
45	4	100				30	map { "$attr:$_" } @{$x->{$attr}} : "$_:$x->{$_}"
	1					4
46							} keys %$x
47							] if ref($x) eq 'HASH';
48							# update af_num
49	21	100				86	$self->{af_num} = scalar(@$x) if $self->{af_num} == 0;
50	21	100				58	$self->{af_num} = -1 if $self->{af_num} != scalar(@$x);
51							# convert y from string to ID
52	21					43	my $y_id = $self->{y_bucket}->{$y};
53							# new y
54	21	100				56	if (!defined($y_id)) {
55							# update y_list, y_num, y_bucket, f_freq
56	19					28	push @{$self->{y_list}}, $y;
	19					41
57	19					30	$self->{y_num} = scalar(@{$self->{y_list}});
	19					135
58	19					36	$y_id = $self->{y_num} - 1;
59	19					49	$self->{y_bucket}->{$y} = $y_id;
60	19					32	push @{$self->{f_freq}}, [map { 0 } (1 .. $self->{x_num})];
	19					83
	36					71
61							# save ID
62	19					36	$y1 = $y_id;
63							}
64							# old y
65	2					4	else { $y1 = $y_id }
66							# convert x from strings to IDs
67	21					47	for (@$x) {
68	52					92	my $x_id = $self->{x_bucket}->{$_};
69							# new x
70	52	100				101	if (!defined($x_id)) {
71							# update x_list, x_num, x_bucket, f_freq
72	41					45	push @{$self->{x_list}}, $_;
	41					96
73	41					51	$self->{x_num} = scalar(@{$self->{x_list}});
	41					75
74	41					59	$x_id = $self->{x_num} - 1;
75	41					99	$self->{x_bucket}->{$_} = $x_id;
76	41					94	push @{$self->{f_freq}->[$_]}, 0 for (0 .. $self->{y_num} - 1);
	67					173
77							# save ID
78	41					96	push @$x1, $x_id;
79							}
80							# old x
81	11					19	else { push @$x1, $x_id }
82							# update f_freq
83	52					124	$self->{f_freq}->[$y_id]->[$x_id] += $w;
84							}
85							# add the sample
86	21					34	push @{$self->{samples}}, [$x1, $y1, $w];
	21					77
87	21					81	$self->{last_cut} = -1;
88							}
89
90							sub cut {
91	8			8	1	64	my ($self, $t) = @_;
92	8					40	$self->{f_num} = 0;
93	8					29	for my $y (0 .. $self->{y_num} - 1) {
94	18					135	for my $x (0 .. $self->{x_num} - 1) {
95	97	100				188	if ($self->{f_freq}->[$y]->[$x] >= $t) {
96	84					173	$self->{f_map}->[$y]->[$x] = $self->{f_num};
97	84					143	$self->{f_num}++;
98							}
99	13					30	else { $self->{f_map}->[$y]->[$x] = -1 }
100							}
101							}
102	8					24	$self->{last_cut} = $t;
103							}
104
105							sub forget_all {
106	3			3	1	46	my $self = shift;
107	3					8	$self->{samples} = [];
108	3					17	$self->{x_bucket} = {};
109	3					14	$self->{y_bucket} = {};
110	3					11	$self->{x_num} = 0;
111	3					5	$self->{y_num} = 0;
112	3					7	$self->{f_num} = 0;
113	3					7	$self->{x_list} = [];
114	3					11	$self->{y_list} = [];
115	3					8	$self->{af_num} = 0;
116	3					7	$self->{f_freq} = [];
117	3					12	$self->{f_map} = [];
118	3					9	$self->{last_cut} = -1;
119	3					8	$self->{_c} = {};
120							}
121
122							sub _cache {
123	8			8		17	my $self = shift;
124	8					90	$self->_cache_samples;
125	8					46	$self->_cache_f_map;
126							}
127
128							sub _free_cache {
129	8			8		20	my $self = shift;
130	8					38	$self->_free_cache_samples;
131	8					33	$self->_free_cache_f_map;
132							}
133
134							sub learn {
135	7			7	1	67	my $self = shift;
136							# cut 0 for default
137	7	100				41	$self->cut(0) if $self->{last_cut} == -1;
138							# initialize
139	7					26	$self->{lambda} = [map { 0 } (1 .. $self->{f_num})];
	92					163
140	7					37	$self->_cache;
141							# optimize
142	7		100			51	my $type = $self->{algorithm}->{type} \|\| 'lbfgs';
143	7	100				30	if ($type eq 'lbfgs') {
		50
144	4					10	my $o = Algorithm::LBFGS->new(%{$self->{algorithm}});
	4					56
145	4					138	$o->fmin(\&_neg_log_likelihood, $self->{lambda},
146							$self->{algorithm}->{progress_cb}, $self);
147							}
148							elsif ($type eq 'gis') {
149	3	50	33			28	die 'GIS is not applicable'
150							if $self->{af_num} == -1 or $self->{last_cut} != 0;
151	3					9	my $progress_cb = $self->{algorithm}->{progress_cb};
152							$progress_cb = sub {
153	0			0		0	print "$_[0]: \|lambda\| = $_[3], \|d_lambda\| = $_[4]\n"; 0;
	0					0
154	3	50	66			21	} if defined($progress_cb) and $progress_cb eq 'verbose';
155	3		50			29	my $epsilon = $self->{algorithm}->{epsilon} \|\| 1e-3;
156	3					139	$self->{lambda} = $self->_apply_gis($progress_cb, $epsilon);
157							}
158	0					0	else { die "$type is not a valid algorithm type" }
159							# finish
160	7					988	$self->_free_cache;
161	7					23	return $self->_create_model;
162							}
163
164							sub _create_model {
165	7			7		13	my $self = shift;
166	7					87	my $model = AI::MaxEntropy::Model->new;
167	21					198	$model->{$_} = ref($self->{$_}) eq 'ARRAY' ? [@{$self->{$_}}] :
	14					114
168							ref($self->{$_}) eq 'HASH' ? {%{$self->{$_}}} :
169							$self->{$_}
170	7	100				42	for qw/x_list y_list lambda x_num y_num f_num x_bucket y_bucket/;
		100
171	16					77	$model->{f_map}->[$_] = [@{$self->{f_map}->[$_]}]
172	7					29	for (0 .. $self->{y_num} - 1);
173	7					49	return $model;
174							}
175
176							1;
177
178							__END__