File Coverage

blib/lib/Algorithm/LibLinear.pm

Criterion	Covered	Total	%
statement	49	53	92.4
branch	3	4	75.0
condition	2	3	66.6
subroutine	15	15	100.0
pod	7	9	77.7
total	76	84	90.4

line	stmt	bran	cond	sub	pod	time	code
1							package Algorithm::LibLinear;
2
3	3			3		121	use 5.014;
	3					7
	3					88
4	3			3		1640	use Algorithm::LibLinear::DataSet;
	3					10
	3					88
5	3			3		1858	use Algorithm::LibLinear::Model;
	3					6
	3					76
6	3			3		16	use Algorithm::LibLinear::Types;
	3					5
	3					56
7	3			3		15	use Smart::Args;
	3					5
	3					128
8	3			3		15	use XSLoader;
	3					6
	3					1923
9
10							our $VERSION = '0.10';
11
12							XSLoader::load(__PACKAGE__, $VERSION);
13
14							my %default_eps = (
15							L2R_LR => 0.01,
16							L2R_L2LOSS_SVC_DUAL => 0.1,
17							L2R_L2LOSS_SVC => 0.01,
18							L2R_L1LOSS_SVC_DUAL => 0.1,
19							MCSVM_CS => 0.1,
20							L1R_L2LOSS_SVC => 0.01,
21							L1R_LR => 0.01,
22							L2R_LR_DUAL => 0.1,
23
24							# Solvers for regression problem
25							L2R_L2LOSS_SVR => 0.001,
26							L2R_L2LOSS_SVR_DUAL => 0.1,
27							L2R_L1LOSS_SVR_DUAL => 0.1,
28							);
29
30							my %solvers = (
31							# Solvers for classification problem
32							L2R_LR => 0,
33							L2R_L2LOSS_SVC_DUAL => 1,
34							L2R_L2LOSS_SVC => 2,
35							L2R_L1LOSS_SVC_DUAL => 3,
36							MCSVM_CS => 4,
37							L1R_L2LOSS_SVC => 5,
38							L1R_LR => 6,
39							L2R_LR_DUAL => 7,
40
41							# Solvers for regression problem
42							L2R_L2LOSS_SVR => 11,
43							L2R_L2LOSS_SVR_DUAL => 12,
44							L2R_L1LOSS_SVR_DUAL => 13,
45							);
46
47							sub new {
48	10			10	1	4810	args
49							my $class => 'ClassName',
50							my $cost => +{ isa => 'Num', default => 1, },
51							my $epsilon => +{ isa => 'Num', optional => 1, },
52							my $loss_sensitivity => +{ isa => 'Num', default => 0.1, },
53							my $solver => +{
54							isa => 'Algorithm::LibLinear::SolverDescriptor',
55							default => 'L2R_L2LOSS_SVC_DUAL',
56							},
57							my $weights => +{
58							isa => 'ArrayRef[Algorithm::LibLinear::TrainingParameter::ClassWeight]',
59							default => [],
60							};
61
62	10		66			46303	$epsilon //= $default_eps{$solver};
63	10					2562	my (@weight_labels, @weights);
64	10					2582	for my $weight (@$weights) {
65	12					4839	push @weight_labels, $weight->{label};
66	12					7075	push @weights, $weight->{weight};
67							}
68	10					2987	my $training_parameter = Algorithm::LibLinear::TrainingParameter->new(
69							$solvers{$solver},
70							$epsilon,
71							$cost,
72							\@weight_labels,
73							\@weights,
74							$loss_sensitivity,
75							);
76	7					5233	bless +{ training_parameter => $training_parameter } => $class;
77							}
78
79	2			2	1	65	sub cost { $_[0]->training_parameter->cost }
80
81							sub cross_validation {
82	1			1	1	4	args
83							my $self,
84							my $data_set => 'Algorithm::LibLinear::DataSet',
85							my $num_folds => 'Int';
86
87	1					85	my $targets = $self->training_parameter->cross_validation(
88							$data_set->as_problem,
89							$num_folds,
90							);
91	1					66	my @labels = map { $_->{label} } @{ $data_set->as_arrayref };
	270					423
	1					29
92	1	50				16	if ($self->is_regression_solver) {
93	0					0	my $total_square_error = 0;
94	0					0	for my $i (0 .. $data_set->size - 1) {
95	0					0	$total_square_error += ($targets->[$i] - $labels[$i]) ** 2;
96							}
97							# Returns mean squared error.
98							# TODO: Squared correlation coefficient (see train.c in LIBLINEAR.)
99	0					0	return $total_square_error / $data_set->size;
100							} else {
101	1					3	my $num_corrects;
102	1					6	for my $i (0 .. $data_set->size - 1) {
103	270	100				490	++$num_corrects if $targets->[$i] == $labels[$i];
104							}
105	1					7	return $num_corrects / $data_set->size;
106							}
107							}
108
109	2			2	1	29	sub epsilon { $_[0]->training_parameter->epsilon }
110
111	3			3	0	44	sub is_regression_solver { $_[0]->training_parameter->is_regression_solver }
112
113	1			1	1	15	sub loss_sensitivity { $_[0]->training_parameter->loss_sensitivity }
114
115	18			18	0	40867	sub training_parameter { $_[0]->{training_parameter} }
116
117							sub train {
118	5			5	1	6790	args
119							my $self,
120							my $data_set => 'Algorithm::LibLinear::DataSet';
121
122	5					175869	my $raw_model = Algorithm::LibLinear::Model::Raw->train(
123							$data_set->as_problem,
124							$self->training_parameter,
125							);
126	5					4400	Algorithm::LibLinear::Model->new(raw_model => $raw_model);
127							}
128
129							sub weights {
130	2			2	1	31	args
131							my $self;
132
133	2					62	my $labels = $self->training_parameter->weight_labels;
134	2					6	my $weights = $self->training_parameter->weights;
135	2					11	[ map {
136	2					8	+{ label => $labels->[$_], weight => $weights->[$_], }
137							} 0 .. $#$labels ];
138							}
139
140							1;
141							__END__
142
143							=head1 NAME
144
145							Algorithm::LibLinear - A Perl binding for LIBLINEAR, a library for classification/regression using linear SVM and logistic regression.
146
147							=head1 SYNOPSIS
148
149							use Algorithm::LibLinear;
150							# Constructs a model for L2-regularized L2 loss support vector classification.
151							my $learner = Algorithm::LibLinear->new(
152							cost => 1,
153							epsilon => 0.01,
154							solver => 'L2R_L2LOSS_SVC_DUAL',
155							weights => [
156							+{ label => 1, weight => 1, },
157							+{ label => -1, weight => 1, },
158							],
159							);
160							# Loads a training data set from DATA filehandle.
161							my $data_set = Algorithm::LibLinear::DataSet->load(fh => \*DATA);
162							# Executes cross validation.
163							my $accuracy = $learner->cross_validation(data_set => $data_set, num_folds => 5);
164							# Executes training.
165							my $classifier = $learner->train(data_set => $data_set);
166							# Determines which (+1 or -1) is the class for the given feature to belong.
167							my $class_label = $classifier->predict(feature => +{ 1 => 0.38, 2 => -0.5, ... });
168
169							__DATA__
170							+1 1:0.708333 2:1 3:1 4:-0.320755 5:-0.105023 6:-1 7:1 8:-0.419847 9:-1 10:-0.225806 12:1 13:-1
171							-1 1:0.583333 2:-1 3:0.333333 4:-0.603774 5:1 6:-1 7:1 8:0.358779 9:-1 10:-0.483871 12:-1 13:1
172							+1 1:0.166667 2:1 3:-0.333333 4:-0.433962 5:-0.383562 6:-1 7:-1 8:0.0687023 9:-1 10:-0.903226 11:-1 12:-1 13:1
173							-1 1:0.458333 2:1 3:1 4:-0.358491 5:-0.374429 6:-1 7:-1 8:-0.480916 9:1 10:-0.935484 12:-0.333333 13:1
174							-1 1:0.875 2:-1 3:-0.333333 4:-0.509434 5:-0.347032 6:-1 7:1 8:-0.236641 9:1 10:-0.935484 11:-1 12:-0.333333 13:-1
175							...
176
177							=head1 DESCRIPTION
178
179							Algorithm::LibLinear is an XS module that provides features of LIBLINEAR, a fast C library for classification and regression.
180
181							Current version is based on LIBLINEAR 1.94, released on Nov 12 2013.
182
183							=head1 METHODS
184
185							=head2 new([cost => 1] [, epsilon => 0.1] [, loss_sensitivity => 0.1] [, solver => 'L2R_L2LOSS_SVC_DUAL'] [, weights => []])
186
187							Constructor. You can set several named parameters:
188
189							=over 4
190
191							=item cost
192
193							Penalty cost for misclassification (i.e., C<-c> option for LIBLINEAR's C<train> command.)
194
195							=item epsilon
196
197							Termination criterion (C<-e>.)
198
199							Default value of this parameter depends on the value of C<solver>.
200
201							=item loss_sensitivity
202
203							Epsilon in loss function of SVR (C<-p>.)
204
205							=item solver
206
207							Kind of solver (C<-s>.)
208
209							For classification:
210
211							=over 4
212
213							=item 'L2R_LR' - L2-regularized logistic regression
214
215							=item 'L2R_L2LOSS_SVC_DUAL' - L2-regularized L2-loss SVC (dual problem)
216
217							=item 'L2R_L2LOSS_SVC' - L2-regularized L2-loss SVC (primal problem)
218
219							=item 'L2R_L1LOSS_SVC_DUAL' - L2-regularized L1-loss SVC (dual problem)
220
221							=item 'MCSVM_CS' - Crammer-Singer multiclass SVM
222
223							=item 'L1R_L2LOSS_SVC' - L1-regularized L2-loss SVC
224
225							=item 'L1R_LR' - L1-regularized logistic regression (primal problem)
226
227							=item 'L1R_LR_DUAL' - L1-regularized logistic regression (dual problem)
228
229							=back
230
231							For regression:
232
233							=over 4
234
235							=item 'L2R_L2LOSS_SVR' - L2-regularized L2-loss SVR (primal problem)
236
237							=item 'L2R_L2LOSS_SVR_DUAL' - L2-regularized L2-loss SVR (dual problem)
238
239							=item 'L2R_L1LOSS_SVR_DUAL' - L2-regularized L1-loss SVR (dual problem)
240
241							=back
242
243							=item weights
244
245							Weights adjust the cost parameter of different classes (C<-wi>.)
246
247							For example,
248
249							my $learner = Algorithm::LibLinear->new(
250							weights => [
251							+{ label => 1, weight => 0.5 },
252							+{ label => 2, weight => 1 },
253							+{ label => 3, weight => 0.5 },
254							],
255							);
256
257							is giving a doubling weight for class 2. This means that samples belonging to class 2 have stronger effect than other samples belonging class 1 or 3 on learning.
258
259							This option is useful when the number of training samples of each class is not balanced.
260
261							=back
262
263							=head2 cross_validation(data_set => $data_set, num_folds => $num_folds)
264
265							Evaluates training parameter using N-fold cross validation method.
266							Given data set will be split into N parts. N-1 of them will be used as a training set and the rest 1 part will be used as a test set.
267							The evaluation iterates N times using each different part as a test set. Then average accuracy is returned as result.
268
269							=head2 train(data_set => $data_set)
270
271							Executes training and returns a trained L<Algorithm::LibLinear::Model> instance.
272							C<data_set> is same as the C<cross_validation>'s.
273
274							=head1 AUTHOR
275
276							Koichi SATOH E<lt>sato@seesaa.co.jpE<gt>
277
278							=head1 SEE ALSO
279
280							L<Algorithm::LibLinear::DataSet>
281
282							L<Algorithm::LibLinear::FeatureScaling>
283
284							L<Algorithm::LibLinear::Model>
285
286							L<LIBLINEAR Homepage\|http://www.csie.ntu.edu.tw/~cjlin/liblinear/>
287
288							L<Algorithm::SVM> - A Perl binding to LIBSVM.
289
290							=head1 LICENSE
291
292							=head2 Algorithm::LibLinear
293
294							Copyright (c) 2013 Koichi SATOH. All rights reserved.
295
296							The MIT License (MIT)
297
298							Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
299
300							The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
301
302							THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
303
304							=head2 LIBLINEAR
305
306							Copyright (c) 2007-2013 The LIBLINEAR Project.
307							All rights reserved.
308
309							Redistribution and use in source and binary forms, with or without
310							modification, are permitted provided that the following conditions
311							are met:
312
313							1. Redistributions of source code must retain the above copyright
314							notice, this list of conditions and the following disclaimer.
315
316							2. Redistributions in binary form must reproduce the above copyright
317							notice, this list of conditions and the following disclaimer in the
318							documentation and/or other materials provided with the distribution.
319
320							3. Neither name of copyright holders nor the names of its contributors
321							may be used to endorse or promote products derived from this software
322							without specific prior written permission.
323
324							THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
325							``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
326							LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
327							A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
328							CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
329							EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
330							PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
331							PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
332							LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
333							NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
334							SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
335
336							=cut