File Coverage

blib/lib/Algorithm/LibLinear.pm

Criterion	Covered	Total	%
statement	4	6	66.6
branch			n/a
condition			n/a
subroutine	2	2	100.0
pod			n/a
total	6	8	75.0

line	stmt	sub	time	code
1				package Algorithm::LibLinear;
2
3	3	3	85	use 5.014;
	3		8
	3		107
4	3	3	1146	use Algorithm::LibLinear::DataSet;
	0
	0
5				use Algorithm::LibLinear::Model;
6				use Algorithm::LibLinear::Types;
7				use Smart::Args;
8				use XSLoader;
9
10				our $VERSION = '0.11';
11
12				XSLoader::load(__PACKAGE__, $VERSION);
13
14				my %default_eps = (
15				L2R_LR => 0.01,
16				L2R_L2LOSS_SVC_DUAL => 0.1,
17				L2R_L2LOSS_SVC => 0.01,
18				L2R_L1LOSS_SVC_DUAL => 0.1,
19				MCSVM_CS => 0.1,
20				L1R_L2LOSS_SVC => 0.01,
21				L1R_LR => 0.01,
22				L2R_LR_DUAL => 0.1,
23
24				# Solvers for regression problem
25				L2R_L2LOSS_SVR => 0.001,
26				L2R_L2LOSS_SVR_DUAL => 0.1,
27				L2R_L1LOSS_SVR_DUAL => 0.1,
28				);
29
30				my %solvers = (
31				# Solvers for classification problem
32				L2R_LR => 0,
33				L2R_L2LOSS_SVC_DUAL => 1,
34				L2R_L2LOSS_SVC => 2,
35				L2R_L1LOSS_SVC_DUAL => 3,
36				MCSVM_CS => 4,
37				L1R_L2LOSS_SVC => 5,
38				L1R_LR => 6,
39				L2R_LR_DUAL => 7,
40
41				# Solvers for regression problem
42				L2R_L2LOSS_SVR => 11,
43				L2R_L2LOSS_SVR_DUAL => 12,
44				L2R_L1LOSS_SVR_DUAL => 13,
45				);
46
47				sub new {
48				args
49				my $class => 'ClassName',
50				my $bias => +{ isa => 'Num', default => -1.0, },
51				my $cost => +{ isa => 'Num', default => 1, },
52				my $epsilon => +{ isa => 'Num', optional => 1, },
53				my $loss_sensitivity => +{ isa => 'Num', default => 0.1, },
54				my $solver => +{
55				isa => 'Algorithm::LibLinear::SolverDescriptor',
56				default => 'L2R_L2LOSS_SVC_DUAL',
57				},
58				my $weights => +{
59				isa => 'ArrayRef[Algorithm::LibLinear::TrainingParameter::ClassWeight]',
60				default => [],
61				};
62
63				$epsilon //= $default_eps{$solver};
64				my (@weight_labels, @weights);
65				for my $weight (@$weights) {
66				push @weight_labels, $weight->{label};
67				push @weights, $weight->{weight};
68				}
69				my $training_parameter = Algorithm::LibLinear::TrainingParameter->new(
70				$solvers{$solver},
71				$epsilon,
72				$cost,
73				\@weight_labels,
74				\@weights,
75				$loss_sensitivity,
76				);
77				bless +{
78				bias => $bias,
79				training_parameter => $training_parameter,
80				} => $class;
81				}
82
83				sub bias { $_[0]->{bias} }
84
85				sub cost { $_[0]->training_parameter->cost }
86
87				sub cross_validation {
88				args
89				my $self,
90				my $data_set => 'Algorithm::LibLinear::DataSet',
91				my $num_folds => 'Int';
92
93				my $targets = $self->training_parameter->cross_validation(
94				$data_set->as_problem(bias => $self->bias),
95				$num_folds,
96				);
97				my @labels = map { $_->{label} } @{ $data_set->as_arrayref };
98				if ($self->is_regression_solver) {
99				my $total_square_error = 0;
100				for my $i (0 .. $data_set->size - 1) {
101				$total_square_error += ($targets->[$i] - $labels[$i]) ** 2;
102				}
103				# Returns mean squared error.
104				# TODO: Squared correlation coefficient (see train.c in LIBLINEAR.)
105				return $total_square_error / $data_set->size;
106				} else {
107				my $num_corrects;
108				for my $i (0 .. $data_set->size - 1) {
109				++$num_corrects if $targets->[$i] == $labels[$i];
110				}
111				return $num_corrects / $data_set->size;
112				}
113				}
114
115				sub epsilon { $_[0]->training_parameter->epsilon }
116
117				sub is_regression_solver { $_[0]->training_parameter->is_regression_solver }
118
119				sub loss_sensitivity { $_[0]->training_parameter->loss_sensitivity }
120
121				sub training_parameter { $_[0]->{training_parameter} }
122
123				sub train {
124				args
125				my $self,
126				my $data_set => 'Algorithm::LibLinear::DataSet';
127
128				my $raw_model = Algorithm::LibLinear::Model::Raw->train(
129				$data_set->as_problem(bias => $self->bias),
130				$self->training_parameter,
131				);
132				Algorithm::LibLinear::Model->new(raw_model => $raw_model);
133				}
134
135				sub weights {
136				args
137				my $self;
138
139				my $labels = $self->training_parameter->weight_labels;
140				my $weights = $self->training_parameter->weights;
141				[ map {
142				+{ label => $labels->[$_], weight => $weights->[$_], }
143				} 0 .. $#$labels ];
144				}
145
146				1;
147				__END__
148
149				=head1 NAME
150
151				Algorithm::LibLinear - A Perl binding for LIBLINEAR, a library for classification/regression using linear SVM and logistic regression.
152
153				=head1 SYNOPSIS
154
155				use Algorithm::LibLinear;
156				# Constructs a model for L2-regularized L2 loss support vector classification.
157				my $learner = Algorithm::LibLinear->new(
158				cost => 1,
159				epsilon => 0.01,
160				solver => 'L2R_L2LOSS_SVC_DUAL',
161				weights => [
162				+{ label => 1, weight => 1, },
163				+{ label => -1, weight => 1, },
164				],
165				);
166				# Loads a training data set from DATA filehandle.
167				my $data_set = Algorithm::LibLinear::DataSet->load(fh => \*DATA);
168				# Executes cross validation.
169				my $accuracy = $learner->cross_validation(data_set => $data_set, num_folds => 5);
170				# Executes training.
171				my $classifier = $learner->train(data_set => $data_set);
172				# Determines which (+1 or -1) is the class for the given feature to belong.
173				my $class_label = $classifier->predict(feature => +{ 1 => 0.38, 2 => -0.5, ... });
174
175				__DATA__
176				+1 1:0.708333 2:1 3:1 4:-0.320755 5:-0.105023 6:-1 7:1 8:-0.419847 9:-1 10:-0.225806 12:1 13:-1
177				-1 1:0.583333 2:-1 3:0.333333 4:-0.603774 5:1 6:-1 7:1 8:0.358779 9:-1 10:-0.483871 12:-1 13:1
178				+1 1:0.166667 2:1 3:-0.333333 4:-0.433962 5:-0.383562 6:-1 7:-1 8:0.0687023 9:-1 10:-0.903226 11:-1 12:-1 13:1
179				-1 1:0.458333 2:1 3:1 4:-0.358491 5:-0.374429 6:-1 7:-1 8:-0.480916 9:1 10:-0.935484 12:-0.333333 13:1
180				-1 1:0.875 2:-1 3:-0.333333 4:-0.509434 5:-0.347032 6:-1 7:1 8:-0.236641 9:1 10:-0.935484 11:-1 12:-0.333333 13:-1
181				...
182
183				=head1 DESCRIPTION
184
185				Algorithm::LibLinear is an XS module that provides features of LIBLINEAR, a fast C library for classification and regression.
186
187				Current version is based on LIBLINEAR 1.95, released on Oct 27, 2014.
188
189				=head1 METHODS
190
191				=head2 new([bias => -1.0] [, cost => 1] [, epsilon => 0.1] [, loss_sensitivity => 0.1] [, solver => 'L2R_L2LOSS_SVC_DUAL'] [, weights => []])
192
193				Constructor. You can set several named parameters:
194
195				=over 4
196
197				=item bias
198
199				Bias term to be added to prediction result (i.e., C<-B> option for LIBLINEAR's C<train> command.).
200
201				This parameter makes sense only when its value is positive.
202
203				=item cost
204
205				Penalty cost for misclassification (C<-c>.)
206
207				=item epsilon
208
209				Termination criterion (C<-e>.)
210
211				Default value of this parameter depends on the value of C<solver>.
212
213				=item loss_sensitivity
214
215				Epsilon in loss function of SVR (C<-p>.)
216
217				=item solver
218
219				Kind of solver (C<-s>.)
220
221				For classification:
222
223				=over 4
224
225				=item 'L2R_LR' - L2-regularized logistic regression
226
227				=item 'L2R_L2LOSS_SVC_DUAL' - L2-regularized L2-loss SVC (dual problem)
228
229				=item 'L2R_L2LOSS_SVC' - L2-regularized L2-loss SVC (primal problem)
230
231				=item 'L2R_L1LOSS_SVC_DUAL' - L2-regularized L1-loss SVC (dual problem)
232
233				=item 'MCSVM_CS' - Crammer-Singer multiclass SVM
234
235				=item 'L1R_L2LOSS_SVC' - L1-regularized L2-loss SVC
236
237				=item 'L1R_LR' - L1-regularized logistic regression (primal problem)
238
239				=item 'L1R_LR_DUAL' - L1-regularized logistic regression (dual problem)
240
241				=back
242
243				For regression:
244
245				=over 4
246
247				=item 'L2R_L2LOSS_SVR' - L2-regularized L2-loss SVR (primal problem)
248
249				=item 'L2R_L2LOSS_SVR_DUAL' - L2-regularized L2-loss SVR (dual problem)
250
251				=item 'L2R_L1LOSS_SVR_DUAL' - L2-regularized L1-loss SVR (dual problem)
252
253				=back
254
255				=item weights
256
257				Weights adjust the cost parameter of different classes (C<-wi>.)
258
259				For example,
260
261				my $learner = Algorithm::LibLinear->new(
262				weights => [
263				+{ label => 1, weight => 0.5 },
264				+{ label => 2, weight => 1 },
265				+{ label => 3, weight => 0.5 },
266				],
267				);
268
269				is giving a doubling weight for class 2. This means that samples belonging to class 2 have stronger effect than other samples belonging class 1 or 3 on learning.
270
271				This option is useful when the number of training samples of each class is not balanced.
272
273				=back
274
275				=head2 cross_validation(data_set => $data_set, num_folds => $num_folds)
276
277				Evaluates training parameter using N-fold cross validation method.
278				Given data set will be split into N parts. N-1 of them will be used as a training set and the rest 1 part will be used as a test set.
279				The evaluation iterates N times using each different part as a test set. Then average accuracy is returned as result.
280
281				=head2 train(data_set => $data_set)
282
283				Executes training and returns a trained L<Algorithm::LibLinear::Model> instance.
284				C<data_set> is same as the C<cross_validation>'s.
285
286				=head1 AUTHOR
287
288				Koichi SATOH E<lt>sato@seesaa.co.jpE<gt>
289
290				=head1 SEE ALSO
291
292				L<Algorithm::LibLinear::DataSet>
293
294				L<Algorithm::LibLinear::FeatureScaling>
295
296				L<Algorithm::LibLinear::Model>
297
298				L<LIBLINEAR Homepage\|http://www.csie.ntu.edu.tw/~cjlin/liblinear/>
299
300				L<Algorithm::SVM> - A Perl binding to LIBSVM.
301
302				=head1 LICENSE
303
304				=head2 Algorithm::LibLinear
305
306				Copyright (c) 2013-2014 Koichi SATOH. All rights reserved.
307
308				The MIT License (MIT)
309
310				Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
311
312				The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
313
314				THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
315
316				=head2 LIBLINEAR
317
318				Copyright (c) 2007-2014 The LIBLINEAR Project.
319				All rights reserved.
320
321				Redistribution and use in source and binary forms, with or without
322				modification, are permitted provided that the following conditions
323				are met:
324
325				1. Redistributions of source code must retain the above copyright
326				notice, this list of conditions and the following disclaimer.
327
328				2. Redistributions in binary form must reproduce the above copyright
329				notice, this list of conditions and the following disclaimer in the
330				documentation and/or other materials provided with the distribution.
331
332				3. Neither name of copyright holders nor the names of its contributors
333				may be used to endorse or promote products derived from this software
334				without specific prior written permission.
335
336				THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
337				``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
338				LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
339				A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
340				CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
341				EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
342				PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
343				PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
344				LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
345				NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
346				SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
347
348				=cut