File Coverage

blib/lib/Algorithm/AdaBoost.pm
Criterion Covered Total %
statement 52 56 92.8
branch 3 6 50.0
condition 4 12 33.3
subroutine 14 15 93.3
pod 7 10 70.0
total 80 99 80.8


line stmt bran cond sub pod time code
1             package Algorithm::AdaBoost;
2              
3 2     2   850 use 5.014;
  2         6  
  2         62  
4 2     2   874 use Algorithm::AdaBoost::Classifier;
  2         7  
  2         64  
5 2     2   24 use Carp qw//;
  2         4  
  2         36  
6 2     2   13 use List::Util;
  2         5  
  2         114  
7 2     2   11 use Smart::Args;
  2         2  
  2         1329  
8              
9             our $VERSION = '0.01';
10              
11             sub new {
12 1     1 1 12 args
13             my $class => 'ClassName',
14             my $training_set => +{ isa => 'ArrayRef', optional => 1 },
15             my $weak_classifier_generator => +{ isa => 'CodeRef', optional => 1 };
16              
17 1         179 bless +{
18             training_set => $training_set,
19             weak_classifier_generator => $weak_classifier_generator,
20             } => $class;
21             }
22              
23             sub calculate_classifier_weight {
24 1000     1000 0 5025 args
25             my $self,
26             my $classifier => 'CodeRef',
27             my $distribution => 'ArrayRef[Num]';
28              
29 1000         108661 my $error_ratio = $self->evaluate_error_ratio(
30             classifier => $classifier,
31             distribution => $distribution,
32             );
33 1000         8072 return log((1 - $error_ratio) / $error_ratio) / 2;
34             }
35              
36             sub classify {
37 0     0 1 0 args_pos
38             my $self,
39             my $feature => 'Any';
40 0 0       0 Carp::croak 'Training phase is undone yet.' unless $self->trained;
41 0         0 $self->final_classifier->classify($feature);
42             }
43              
44             sub construct_hardest_distribution {
45 1000     1000 0 4741 args
46             my $self,
47             my $classifier => 'CodeRef',
48             my $previous_distribution => 'ArrayRef[Num]',
49             my $training_set => 'ArrayRef[HashRef]',
50             my $weight => 'Num';
51              
52 100000         5134882 my @distribution = map {
53 1000         192521 my $training_data = $training_set->[$_];
54 100000         327841 $previous_distribution->[$_]
55             * exp(-$weight * $training_data->{label}
56             * $classifier->($training_data->{feature}));
57             } 0 .. $#$previous_distribution;
58 1000         5972393 my $partition_function = List::Util::sum(@distribution);
59 1000         2467 [ map { $_ / $partition_function } @distribution ];
  100000         8197384  
60             }
61              
62             sub evaluate_error_ratio {
63 1000     1000 0 3726 args
64             my $self,
65             my $classifier => 'CodeRef',
66             my $distribution => 'ArrayRef[Num]';
67              
68 1000         97187 my $accuracy = 0;
69 1000         5482 for my $i (0 .. $#$distribution) {
70 100000         2720949 my $training_data = $self->training_set->[$i];
71 100000 100       264243 if ($classifier->($training_data->{feature}) == $training_data->{label}) {
72 51581         2717393 $accuracy += $distribution->[$i];
73             }
74             }
75 1000         25703 return 1 - $accuracy;
76             }
77              
78             sub final_classifier {
79 1     1 1 4 args my $self;
80 1 50       22 Carp::croak 'The classifier is not trained' unless $self->trained;
81 1         4 return $self->{final_classifier};
82             }
83              
84             sub train {
85 1     1 1 11 args
86             my $self,
87             my $num_iterations => 'Int',
88             my $training_set => +{ isa => 'ArrayRef', optional => 1 },
89             my $weak_classifier_generator => +{ isa => 'CodeRef', optional => 1 };
90              
91 1   33     154 $training_set //= $self->training_set
      33        
92             // Carp::croak('Given no training set.');
93 1   33     8 $weak_classifier_generator //= $self->weak_classifier_generator
      33        
94             // Carp::croak('Given no weak classifier generator.');
95 1         3 my $num_training_set = @$training_set;
96              
97             # Initial distribution is uniform.
98 1         31 my $distribution = [ (1 / $num_training_set) x $num_training_set ];
99              
100 1         3 my ($weak_classifier, $weight);
101 0         0 my @weak_classifiers;
102 1         6 while ($num_iterations--) {
103             # Construct a weak classifier which classifies data on the distribution.
104 1000         4003 $weak_classifier = $weak_classifier_generator->(
105             distribution => $distribution,
106             training_set => $training_set,
107             );
108 1000         29255799 $weight = $self->calculate_classifier_weight(
109             classifier => $weak_classifier,
110             distribution => $distribution,
111             );
112 1000         4571555 push @weak_classifiers, +{
113             classifier => $weak_classifier,
114             weight => $weight,
115             };
116             } continue {
117 1000         4151 $distribution = $self->construct_hardest_distribution(
118             classifier => $weak_classifier,
119             previous_distribution => $distribution,
120             training_set => $training_set,
121             weight => $weight,
122             );
123             }
124              
125 1         16 return $self->{final_classifier} = Algorithm::AdaBoost::Classifier->new(
126             weak_classifiers => \@weak_classifiers,
127             );
128             }
129              
130 3     3 1 28 sub trained { exists shift->{final_classifier} }
131              
132 100001     100001 1 234838 sub training_set { shift->{training_set} }
133              
134 1     1 1 6 sub weak_classifier_generator { shift->{weak_classifier_generator} }
135              
136             1;
137             __END__
138              
139             =head1 NAME
140              
141             Algorithm::AdaBoost - AdaBoost learning algorithm
142              
143             =head1 SYNOPSIS
144              
145             use Algorithm::AdaBoost;
146              
147             # Training phase.
148             my $learner = Alogrithm::AdaBoost->new(
149             training_set => [
150             +{ feature => [...], label => 1, },
151             +{ feature => [...], label => -1, },
152             +{ feature => [...], label => -1, },
153             ...
154             ],
155             weak_classifier_generator => \&my_poor_learning_algorithm,
156             );
157             $learner->train(num_iterations => 1_000);
158              
159             # Now you have a boost-ed classifier (Algorithm::AdaBoost::Classifier).
160             my $classifier = $learner->final_classifier;
161             given ($classifier->classify([...])) {
162             when ($_ > 0) { say 'The data belongs to class 1.' }
163             when ($_ < 0) { say 'The data belongs to class 2.' }
164             default { warn 'The data cannot be classified.' }
165             }
166              
167             =head1 DESCRIPTION
168              
169             AdaBoost is a machine learning algorithm proposed by Freund and Schapire.
170             Using an arbitrary binary classification algorithm, The algorithm can construct a more accurate classifier (i.e. it is a meta-algorithm).
171              
172             =head1 METHODS
173              
174             =head2 new
175              
176             Constructor. You can specify 2 optional attributes:
177              
178             =over 2
179              
180             =item training_set
181              
182             An ArrayRef which is used as a training data set.
183              
184             Each item is a HashRef having 2 keys: C<feature> and C<label>. C<feature> is a arbitrary input that classifier accepts and C<label> is a expected output label (C<+1> or C<-1>).
185              
186             =item weak_classifier_generator
187              
188             A CodeRef which is expected to generate a binary classifier function.
189              
190             When the function is called, 2 named parameters are specified like this:
191              
192             my $classifier = $generator->(
193             distribution => [...],
194             training_set => [...],
195             );
196              
197             C<distribution> is an ArrayRef which each item is a probability of corresponding item in C<training_set>. i.e. C<distribution> is P(X = t_i) where t_i is i-th item in C<training_set>.
198              
199             The generated classifier is expected to be a CodeRef which takes 1 argument (value of C<feature>) and return C<+1> or C<-1> as a output label.
200              
201             =back
202              
203             Either of both can be overriden temporarily with parameters for C<train>.
204              
205             =head2 classify
206              
207             Shorthand for C<< $learner->final_classifier->classify >>.
208              
209             =head2 final_classifier
210              
211             Returns the last constructed classifier.
212              
213             =head2 train
214              
215             Constructs a stronger classifier from given training set and weak learning algorithm.
216              
217             This method takes 1 mandatory parameter:
218              
219             =over 2
220              
221             =item num_iterations
222              
223             Specifies how many training iterations to be excuted (i.e., how many weak classifiers to be generated).
224              
225             =back
226              
227             and 2 optional parameters:
228              
229             =over 2
230              
231             =item training_set
232              
233             =item weak_classifier_generator
234              
235             =back
236              
237             If the optional parameters are ommited, parameters specified to C<new> are used as defaults. If constructor parameters are ommited too, an exception will be raised.
238              
239             =head2 trained
240              
241             True if C<train> method have called, false otherwise.
242              
243             =head1 AUTHOR
244              
245             Koichi SATOH E<lt>sekia@cpan.orgE<gt>
246              
247             =head1 SEE ALSO
248              
249             L<A Short Introduction to Boosting|http://www.site.uottawa.ca/~stan/csi5387/boost-tut-ppr.pdf>
250              
251             =head1 LICENSE
252              
253             The MIT License
254              
255             Copyright (C) 2012 by Koichi SATOH
256              
257             Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
258              
259             The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
260              
261             THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
262              
263             =cut