File Coverage

blib/lib/Algorithm/AdaBoost.pm

Criterion	Covered	Total	%
statement	52	56	92.8
branch	3	6	50.0
condition	4	12	33.3
subroutine	14	15	93.3
pod	7	10	70.0
total	80	99	80.8

line	stmt	bran	cond	sub	pod	time	code
1							package Algorithm::AdaBoost;
2
3	2			2		850	use 5.014;
	2					6
	2					62
4	2			2		874	use Algorithm::AdaBoost::Classifier;
	2					7
	2					64
5	2			2		24	use Carp qw//;
	2					4
	2					36
6	2			2		13	use List::Util;
	2					5
	2					114
7	2			2		11	use Smart::Args;
	2					2
	2					1329
8
9							our $VERSION = '0.01';
10
11							sub new {
12	1			1	1	12	args
13							my $class => 'ClassName',
14							my $training_set => +{ isa => 'ArrayRef', optional => 1 },
15							my $weak_classifier_generator => +{ isa => 'CodeRef', optional => 1 };
16
17	1					179	bless +{
18							training_set => $training_set,
19							weak_classifier_generator => $weak_classifier_generator,
20							} => $class;
21							}
22
23							sub calculate_classifier_weight {
24	1000			1000	0	5025	args
25							my $self,
26							my $classifier => 'CodeRef',
27							my $distribution => 'ArrayRef[Num]';
28
29	1000					108661	my $error_ratio = $self->evaluate_error_ratio(
30							classifier => $classifier,
31							distribution => $distribution,
32							);
33	1000					8072	return log((1 - $error_ratio) / $error_ratio) / 2;
34							}
35
36							sub classify {
37	0			0	1	0	args_pos
38							my $self,
39							my $feature => 'Any';
40	0	0				0	Carp::croak 'Training phase is undone yet.' unless $self->trained;
41	0					0	$self->final_classifier->classify($feature);
42							}
43
44							sub construct_hardest_distribution {
45	1000			1000	0	4741	args
46							my $self,
47							my $classifier => 'CodeRef',
48							my $previous_distribution => 'ArrayRef[Num]',
49							my $training_set => 'ArrayRef[HashRef]',
50							my $weight => 'Num';
51
52	100000					5134882	my @distribution = map {
53	1000					192521	my $training_data = $training_set->[$_];
54	100000					327841	$previous_distribution->[$_]
55							* exp(-$weight * $training_data->{label}
56							* $classifier->($training_data->{feature}));
57							} 0 .. $#$previous_distribution;
58	1000					5972393	my $partition_function = List::Util::sum(@distribution);
59	1000					2467	[ map { $_ / $partition_function } @distribution ];
	100000					8197384
60							}
61
62							sub evaluate_error_ratio {
63	1000			1000	0	3726	args
64							my $self,
65							my $classifier => 'CodeRef',
66							my $distribution => 'ArrayRef[Num]';
67
68	1000					97187	my $accuracy = 0;
69	1000					5482	for my $i (0 .. $#$distribution) {
70	100000					2720949	my $training_data = $self->training_set->[$i];
71	100000	100				264243	if ($classifier->($training_data->{feature}) == $training_data->{label}) {
72	51581					2717393	$accuracy += $distribution->[$i];
73							}
74							}
75	1000					25703	return 1 - $accuracy;
76							}
77
78							sub final_classifier {
79	1			1	1	4	args my $self;
80	1	50				22	Carp::croak 'The classifier is not trained' unless $self->trained;
81	1					4	return $self->{final_classifier};
82							}
83
84							sub train {
85	1			1	1	11	args
86							my $self,
87							my $num_iterations => 'Int',
88							my $training_set => +{ isa => 'ArrayRef', optional => 1 },
89							my $weak_classifier_generator => +{ isa => 'CodeRef', optional => 1 };
90
91	1		33			154	$training_set //= $self->training_set
			33
92							// Carp::croak('Given no training set.');
93	1		33			8	$weak_classifier_generator //= $self->weak_classifier_generator
			33
94							// Carp::croak('Given no weak classifier generator.');
95	1					3	my $num_training_set = @$training_set;
96
97							# Initial distribution is uniform.
98	1					31	my $distribution = [ (1 / $num_training_set) x $num_training_set ];
99
100	1					3	my ($weak_classifier, $weight);
101	0					0	my @weak_classifiers;
102	1					6	while ($num_iterations--) {
103							# Construct a weak classifier which classifies data on the distribution.
104	1000					4003	$weak_classifier = $weak_classifier_generator->(
105							distribution => $distribution,
106							training_set => $training_set,
107							);
108	1000					29255799	$weight = $self->calculate_classifier_weight(
109							classifier => $weak_classifier,
110							distribution => $distribution,
111							);
112	1000					4571555	push @weak_classifiers, +{
113							classifier => $weak_classifier,
114							weight => $weight,
115							};
116							} continue {
117	1000					4151	$distribution = $self->construct_hardest_distribution(
118							classifier => $weak_classifier,
119							previous_distribution => $distribution,
120							training_set => $training_set,
121							weight => $weight,
122							);
123							}
124
125	1					16	return $self->{final_classifier} = Algorithm::AdaBoost::Classifier->new(
126							weak_classifiers => \@weak_classifiers,
127							);
128							}
129
130	3			3	1	28	sub trained { exists shift->{final_classifier} }
131
132	100001			100001	1	234838	sub training_set { shift->{training_set} }
133
134	1			1	1	6	sub weak_classifier_generator { shift->{weak_classifier_generator} }
135
136							1;
137							__END__
138
139							=head1 NAME
140
141							Algorithm::AdaBoost - AdaBoost learning algorithm
142
143							=head1 SYNOPSIS
144
145							use Algorithm::AdaBoost;
146
147							# Training phase.
148							my $learner = Alogrithm::AdaBoost->new(
149							training_set => [
150							+{ feature => [...], label => 1, },
151							+{ feature => [...], label => -1, },
152							+{ feature => [...], label => -1, },
153							...
154							],
155							weak_classifier_generator => \&my_poor_learning_algorithm,
156							);
157							$learner->train(num_iterations => 1_000);
158
159							# Now you have a boost-ed classifier (Algorithm::AdaBoost::Classifier).
160							my $classifier = $learner->final_classifier;
161							given ($classifier->classify([...])) {
162							when ($_ > 0) { say 'The data belongs to class 1.' }
163							when ($_ < 0) { say 'The data belongs to class 2.' }
164							default { warn 'The data cannot be classified.' }
165							}
166
167							=head1 DESCRIPTION
168
169							AdaBoost is a machine learning algorithm proposed by Freund and Schapire.
170							Using an arbitrary binary classification algorithm, The algorithm can construct a more accurate classifier (i.e. it is a meta-algorithm).
171
172							=head1 METHODS
173
174							=head2 new
175
176							Constructor. You can specify 2 optional attributes:
177
178							=over 2
179
180							=item training_set
181
182							An ArrayRef which is used as a training data set.
183
184							Each item is a HashRef having 2 keys: C<feature> and C<label>. C<feature> is a arbitrary input that classifier accepts and C<label> is a expected output label (C<+1> or C<-1>).
185
186							=item weak_classifier_generator
187
188							A CodeRef which is expected to generate a binary classifier function.
189
190							When the function is called, 2 named parameters are specified like this:
191
192							my $classifier = $generator->(
193							distribution => [...],
194							training_set => [...],
195							);
196
197							C<distribution> is an ArrayRef which each item is a probability of corresponding item in C<training_set>. i.e. C<distribution> is P(X = t_i) where t_i is i-th item in C<training_set>.
198
199							The generated classifier is expected to be a CodeRef which takes 1 argument (value of C<feature>) and return C<+1> or C<-1> as a output label.
200
201							=back
202
203							Either of both can be overriden temporarily with parameters for C<train>.
204
205							=head2 classify
206
207							Shorthand for C<< $learner->final_classifier->classify >>.
208
209							=head2 final_classifier
210
211							Returns the last constructed classifier.
212
213							=head2 train
214
215							Constructs a stronger classifier from given training set and weak learning algorithm.
216
217							This method takes 1 mandatory parameter:
218
219							=over 2
220
221							=item num_iterations
222
223							Specifies how many training iterations to be excuted (i.e., how many weak classifiers to be generated).
224
225							=back
226
227							and 2 optional parameters:
228
229							=over 2
230
231							=item training_set
232
233							=item weak_classifier_generator
234
235							=back
236
237							If the optional parameters are ommited, parameters specified to C<new> are used as defaults. If constructor parameters are ommited too, an exception will be raised.
238
239							=head2 trained
240
241							True if C<train> method have called, false otherwise.
242
243							=head1 AUTHOR
244
245							Koichi SATOH E<lt>sekia@cpan.orgE<gt>
246
247							=head1 SEE ALSO
248
249							L<A Short Introduction to Boosting\|http://www.site.uottawa.ca/~stan/csi5387/boost-tut-ppr.pdf>
250
251							=head1 LICENSE
252
253							The MIT License
254
255							Copyright (C) 2012 by Koichi SATOH
256
257							Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
258
259							The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
260
261							THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
262
263							=cut