File Coverage

blib/lib/Algorithm/LibLinear.pm
Criterion Covered Total %
statement 49 53 92.4
branch 3 4 75.0
condition 2 3 66.6
subroutine 15 15 100.0
pod 7 9 77.7
total 76 84 90.4


line stmt bran cond sub pod time code
1             package Algorithm::LibLinear;
2              
3 3     3   121 use 5.014;
  3         7  
  3         88  
4 3     3   1640 use Algorithm::LibLinear::DataSet;
  3         10  
  3         88  
5 3     3   1858 use Algorithm::LibLinear::Model;
  3         6  
  3         76  
6 3     3   16 use Algorithm::LibLinear::Types;
  3         5  
  3         56  
7 3     3   15 use Smart::Args;
  3         5  
  3         128  
8 3     3   15 use XSLoader;
  3         6  
  3         1923  
9              
10             our $VERSION = '0.10';
11              
12             XSLoader::load(__PACKAGE__, $VERSION);
13              
14             my %default_eps = (
15             L2R_LR => 0.01,
16             L2R_L2LOSS_SVC_DUAL => 0.1,
17             L2R_L2LOSS_SVC => 0.01,
18             L2R_L1LOSS_SVC_DUAL => 0.1,
19             MCSVM_CS => 0.1,
20             L1R_L2LOSS_SVC => 0.01,
21             L1R_LR => 0.01,
22             L2R_LR_DUAL => 0.1,
23              
24             # Solvers for regression problem
25             L2R_L2LOSS_SVR => 0.001,
26             L2R_L2LOSS_SVR_DUAL => 0.1,
27             L2R_L1LOSS_SVR_DUAL => 0.1,
28             );
29              
30             my %solvers = (
31             # Solvers for classification problem
32             L2R_LR => 0,
33             L2R_L2LOSS_SVC_DUAL => 1,
34             L2R_L2LOSS_SVC => 2,
35             L2R_L1LOSS_SVC_DUAL => 3,
36             MCSVM_CS => 4,
37             L1R_L2LOSS_SVC => 5,
38             L1R_LR => 6,
39             L2R_LR_DUAL => 7,
40              
41             # Solvers for regression problem
42             L2R_L2LOSS_SVR => 11,
43             L2R_L2LOSS_SVR_DUAL => 12,
44             L2R_L1LOSS_SVR_DUAL => 13,
45             );
46              
47             sub new {
48 10     10 1 4810 args
49             my $class => 'ClassName',
50             my $cost => +{ isa => 'Num', default => 1, },
51             my $epsilon => +{ isa => 'Num', optional => 1, },
52             my $loss_sensitivity => +{ isa => 'Num', default => 0.1, },
53             my $solver => +{
54             isa => 'Algorithm::LibLinear::SolverDescriptor',
55             default => 'L2R_L2LOSS_SVC_DUAL',
56             },
57             my $weights => +{
58             isa => 'ArrayRef[Algorithm::LibLinear::TrainingParameter::ClassWeight]',
59             default => [],
60             };
61              
62 10   66     46303 $epsilon //= $default_eps{$solver};
63 10         2562 my (@weight_labels, @weights);
64 10         2582 for my $weight (@$weights) {
65 12         4839 push @weight_labels, $weight->{label};
66 12         7075 push @weights, $weight->{weight};
67             }
68 10         2987 my $training_parameter = Algorithm::LibLinear::TrainingParameter->new(
69             $solvers{$solver},
70             $epsilon,
71             $cost,
72             \@weight_labels,
73             \@weights,
74             $loss_sensitivity,
75             );
76 7         5233 bless +{ training_parameter => $training_parameter } => $class;
77             }
78              
79 2     2 1 65 sub cost { $_[0]->training_parameter->cost }
80              
81             sub cross_validation {
82 1     1 1 4 args
83             my $self,
84             my $data_set => 'Algorithm::LibLinear::DataSet',
85             my $num_folds => 'Int';
86              
87 1         85 my $targets = $self->training_parameter->cross_validation(
88             $data_set->as_problem,
89             $num_folds,
90             );
91 1         66 my @labels = map { $_->{label} } @{ $data_set->as_arrayref };
  270         423  
  1         29  
92 1 50       16 if ($self->is_regression_solver) {
93 0         0 my $total_square_error = 0;
94 0         0 for my $i (0 .. $data_set->size - 1) {
95 0         0 $total_square_error += ($targets->[$i] - $labels[$i]) ** 2;
96             }
97             # Returns mean squared error.
98             # TODO: Squared correlation coefficient (see train.c in LIBLINEAR.)
99 0         0 return $total_square_error / $data_set->size;
100             } else {
101 1         3 my $num_corrects;
102 1         6 for my $i (0 .. $data_set->size - 1) {
103 270 100       490 ++$num_corrects if $targets->[$i] == $labels[$i];
104             }
105 1         7 return $num_corrects / $data_set->size;
106             }
107             }
108              
109 2     2 1 29 sub epsilon { $_[0]->training_parameter->epsilon }
110              
111 3     3 0 44 sub is_regression_solver { $_[0]->training_parameter->is_regression_solver }
112              
113 1     1 1 15 sub loss_sensitivity { $_[0]->training_parameter->loss_sensitivity }
114              
115 18     18 0 40867 sub training_parameter { $_[0]->{training_parameter} }
116              
117             sub train {
118 5     5 1 6790 args
119             my $self,
120             my $data_set => 'Algorithm::LibLinear::DataSet';
121              
122 5         175869 my $raw_model = Algorithm::LibLinear::Model::Raw->train(
123             $data_set->as_problem,
124             $self->training_parameter,
125             );
126 5         4400 Algorithm::LibLinear::Model->new(raw_model => $raw_model);
127             }
128              
129             sub weights {
130 2     2 1 31 args
131             my $self;
132              
133 2         62 my $labels = $self->training_parameter->weight_labels;
134 2         6 my $weights = $self->training_parameter->weights;
135 2         11 [ map {
136 2         8 +{ label => $labels->[$_], weight => $weights->[$_], }
137             } 0 .. $#$labels ];
138             }
139              
140             1;
141             __END__
142              
143             =head1 NAME
144              
145             Algorithm::LibLinear - A Perl binding for LIBLINEAR, a library for classification/regression using linear SVM and logistic regression.
146              
147             =head1 SYNOPSIS
148              
149             use Algorithm::LibLinear;
150             # Constructs a model for L2-regularized L2 loss support vector classification.
151             my $learner = Algorithm::LibLinear->new(
152             cost => 1,
153             epsilon => 0.01,
154             solver => 'L2R_L2LOSS_SVC_DUAL',
155             weights => [
156             +{ label => 1, weight => 1, },
157             +{ label => -1, weight => 1, },
158             ],
159             );
160             # Loads a training data set from DATA filehandle.
161             my $data_set = Algorithm::LibLinear::DataSet->load(fh => \*DATA);
162             # Executes cross validation.
163             my $accuracy = $learner->cross_validation(data_set => $data_set, num_folds => 5);
164             # Executes training.
165             my $classifier = $learner->train(data_set => $data_set);
166             # Determines which (+1 or -1) is the class for the given feature to belong.
167             my $class_label = $classifier->predict(feature => +{ 1 => 0.38, 2 => -0.5, ... });
168            
169             __DATA__
170             +1 1:0.708333 2:1 3:1 4:-0.320755 5:-0.105023 6:-1 7:1 8:-0.419847 9:-1 10:-0.225806 12:1 13:-1
171             -1 1:0.583333 2:-1 3:0.333333 4:-0.603774 5:1 6:-1 7:1 8:0.358779 9:-1 10:-0.483871 12:-1 13:1
172             +1 1:0.166667 2:1 3:-0.333333 4:-0.433962 5:-0.383562 6:-1 7:-1 8:0.0687023 9:-1 10:-0.903226 11:-1 12:-1 13:1
173             -1 1:0.458333 2:1 3:1 4:-0.358491 5:-0.374429 6:-1 7:-1 8:-0.480916 9:1 10:-0.935484 12:-0.333333 13:1
174             -1 1:0.875 2:-1 3:-0.333333 4:-0.509434 5:-0.347032 6:-1 7:1 8:-0.236641 9:1 10:-0.935484 11:-1 12:-0.333333 13:-1
175             ...
176              
177             =head1 DESCRIPTION
178              
179             Algorithm::LibLinear is an XS module that provides features of LIBLINEAR, a fast C library for classification and regression.
180              
181             Current version is based on LIBLINEAR 1.94, released on Nov 12 2013.
182              
183             =head1 METHODS
184              
185             =head2 new([cost => 1] [, epsilon => 0.1] [, loss_sensitivity => 0.1] [, solver => 'L2R_L2LOSS_SVC_DUAL'] [, weights => []])
186              
187             Constructor. You can set several named parameters:
188              
189             =over 4
190              
191             =item cost
192              
193             Penalty cost for misclassification (i.e., C<-c> option for LIBLINEAR's C<train> command.)
194              
195             =item epsilon
196              
197             Termination criterion (C<-e>.)
198              
199             Default value of this parameter depends on the value of C<solver>.
200              
201             =item loss_sensitivity
202              
203             Epsilon in loss function of SVR (C<-p>.)
204              
205             =item solver
206              
207             Kind of solver (C<-s>.)
208              
209             For classification:
210              
211             =over 4
212              
213             =item 'L2R_LR' - L2-regularized logistic regression
214              
215             =item 'L2R_L2LOSS_SVC_DUAL' - L2-regularized L2-loss SVC (dual problem)
216              
217             =item 'L2R_L2LOSS_SVC' - L2-regularized L2-loss SVC (primal problem)
218              
219             =item 'L2R_L1LOSS_SVC_DUAL' - L2-regularized L1-loss SVC (dual problem)
220              
221             =item 'MCSVM_CS' - Crammer-Singer multiclass SVM
222              
223             =item 'L1R_L2LOSS_SVC' - L1-regularized L2-loss SVC
224              
225             =item 'L1R_LR' - L1-regularized logistic regression (primal problem)
226              
227             =item 'L1R_LR_DUAL' - L1-regularized logistic regression (dual problem)
228              
229             =back
230              
231             For regression:
232              
233             =over 4
234              
235             =item 'L2R_L2LOSS_SVR' - L2-regularized L2-loss SVR (primal problem)
236              
237             =item 'L2R_L2LOSS_SVR_DUAL' - L2-regularized L2-loss SVR (dual problem)
238              
239             =item 'L2R_L1LOSS_SVR_DUAL' - L2-regularized L1-loss SVR (dual problem)
240              
241             =back
242              
243             =item weights
244              
245             Weights adjust the cost parameter of different classes (C<-wi>.)
246              
247             For example,
248              
249             my $learner = Algorithm::LibLinear->new(
250             weights => [
251             +{ label => 1, weight => 0.5 },
252             +{ label => 2, weight => 1 },
253             +{ label => 3, weight => 0.5 },
254             ],
255             );
256              
257             is giving a doubling weight for class 2. This means that samples belonging to class 2 have stronger effect than other samples belonging class 1 or 3 on learning.
258              
259             This option is useful when the number of training samples of each class is not balanced.
260              
261             =back
262              
263             =head2 cross_validation(data_set => $data_set, num_folds => $num_folds)
264              
265             Evaluates training parameter using N-fold cross validation method.
266             Given data set will be split into N parts. N-1 of them will be used as a training set and the rest 1 part will be used as a test set.
267             The evaluation iterates N times using each different part as a test set. Then average accuracy is returned as result.
268              
269             =head2 train(data_set => $data_set)
270              
271             Executes training and returns a trained L<Algorithm::LibLinear::Model> instance.
272             C<data_set> is same as the C<cross_validation>'s.
273              
274             =head1 AUTHOR
275              
276             Koichi SATOH E<lt>sato@seesaa.co.jpE<gt>
277              
278             =head1 SEE ALSO
279              
280             L<Algorithm::LibLinear::DataSet>
281              
282             L<Algorithm::LibLinear::FeatureScaling>
283              
284             L<Algorithm::LibLinear::Model>
285              
286             L<LIBLINEAR Homepage|http://www.csie.ntu.edu.tw/~cjlin/liblinear/>
287              
288             L<Algorithm::SVM> - A Perl binding to LIBSVM.
289              
290             =head1 LICENSE
291              
292             =head2 Algorithm::LibLinear
293              
294             Copyright (c) 2013 Koichi SATOH. All rights reserved.
295              
296             The MIT License (MIT)
297              
298             Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
299              
300             The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
301              
302             THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
303              
304             =head2 LIBLINEAR
305              
306             Copyright (c) 2007-2013 The LIBLINEAR Project.
307             All rights reserved.
308              
309             Redistribution and use in source and binary forms, with or without
310             modification, are permitted provided that the following conditions
311             are met:
312              
313             1. Redistributions of source code must retain the above copyright
314             notice, this list of conditions and the following disclaimer.
315              
316             2. Redistributions in binary form must reproduce the above copyright
317             notice, this list of conditions and the following disclaimer in the
318             documentation and/or other materials provided with the distribution.
319              
320             3. Neither name of copyright holders nor the names of its contributors
321             may be used to endorse or promote products derived from this software
322             without specific prior written permission.
323              
324             THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
325             ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
326             LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
327             A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
328             CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
329             EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
330             PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
331             PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
332             LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
333             NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
334             SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
335              
336             =cut