line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Algorithm::LibLinear::DataSet; |
2
|
|
|
|
|
|
|
|
3
|
5
|
|
|
5
|
|
720
|
use 5.014; |
|
5
|
|
|
|
|
17
|
|
|
5
|
|
|
|
|
190
|
|
4
|
5
|
|
|
5
|
|
2508
|
use Algorithm::LibLinear::Types; |
|
5
|
|
|
|
|
12
|
|
|
5
|
|
|
|
|
158
|
|
5
|
5
|
|
|
5
|
|
31
|
use Carp qw//; |
|
5
|
|
|
|
|
9
|
|
|
5
|
|
|
|
|
94
|
|
6
|
5
|
|
|
5
|
|
5197
|
use List::MoreUtils qw/none/; |
|
5
|
|
|
|
|
6282
|
|
|
5
|
|
|
|
|
510
|
|
7
|
5
|
|
|
5
|
|
6166
|
use Smart::Args; |
|
5
|
|
|
|
|
15128
|
|
|
5
|
|
|
|
|
3565
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
sub new { |
10
|
12
|
|
|
12
|
1
|
3020
|
args |
11
|
|
|
|
|
|
|
my $class => 'ClassName', |
12
|
|
|
|
|
|
|
my $data_set => 'ArrayRef[Algorithm::LibLinear::LabeledData]'; |
13
|
|
|
|
|
|
|
|
14
|
12
|
|
|
|
|
57661
|
bless +{ data_set => $data_set } => $class; |
15
|
|
|
|
|
|
|
} |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
sub load { |
18
|
7
|
|
|
7
|
1
|
5026
|
args |
19
|
|
|
|
|
|
|
my $class => 'ClassName', |
20
|
|
|
|
|
|
|
my $fh => +{ isa => 'FileHandle', optional => 1, }, |
21
|
|
|
|
|
|
|
my $filename => +{ isa => 'Str', optional => 1, }, |
22
|
|
|
|
|
|
|
my $string => +{ isa => 'Str', optional => 1, }; |
23
|
|
|
|
|
|
|
|
24
|
7
|
50
|
|
19
|
|
363954
|
if (none { defined } ($fh, $filename, $string)) { |
|
19
|
|
|
|
|
9028
|
|
25
|
0
|
|
|
|
|
0
|
Carp::croak('No source specified.'); |
26
|
|
|
|
|
|
|
} |
27
|
7
|
|
|
|
|
2234
|
my $source = $fh; |
28
|
7
|
|
66
|
|
|
2592
|
$source //= do { |
29
|
2
|
50
|
50
|
2
|
|
26
|
open my $fh, '<', +($filename // \$string) or Carp::croak($!); |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
19
|
|
|
6
|
|
|
|
|
2783
|
|
30
|
6
|
|
|
|
|
8225
|
$fh; |
31
|
|
|
|
|
|
|
}; |
32
|
7
|
|
|
|
|
2641
|
$class->new(data_set => $class->parse_input_file($source)); |
33
|
|
|
|
|
|
|
} |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
sub add_data { |
36
|
0
|
|
|
0
|
0
|
0
|
args |
37
|
|
|
|
|
|
|
my $self, |
38
|
|
|
|
|
|
|
my $data => 'Algorithm::LibLinear::LabeledData'; |
39
|
|
|
|
|
|
|
|
40
|
0
|
|
|
|
|
0
|
push @{ $self->data_set }, $data; |
|
0
|
|
|
|
|
0
|
|
41
|
|
|
|
|
|
|
} |
42
|
|
|
|
|
|
|
|
43
|
32
|
|
|
32
|
0
|
12272
|
sub as_arrayref { $_[0]->{data_set} } |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
sub as_problem { |
46
|
6
|
|
|
6
|
0
|
2951
|
args |
47
|
|
|
|
|
|
|
my $self, |
48
|
|
|
|
|
|
|
my $bias => +{ isa => 'Num', optional => 1, }; |
49
|
|
|
|
|
|
|
|
50
|
6
|
|
|
|
|
170597
|
my (@features, @labels); |
51
|
6
|
|
|
|
|
2871
|
for my $data (@{ $self->as_arrayref }) { |
|
6
|
|
|
|
|
3126
|
|
52
|
1620
|
|
|
|
|
941501
|
push @features, $data->{feature}; |
53
|
1620
|
|
|
|
|
961546
|
push @labels, $data->{label}; |
54
|
|
|
|
|
|
|
} |
55
|
|
|
|
|
|
|
Algorithm::LibLinear::Problem->new( |
56
|
6
|
50
|
|
|
|
36464
|
\@labels, \@features, defined $bias ? ($bias) : (), |
57
|
|
|
|
|
|
|
); |
58
|
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
sub as_string { |
61
|
1
|
|
|
1
|
1
|
5
|
args |
62
|
|
|
|
|
|
|
my $self; |
63
|
|
|
|
|
|
|
|
64
|
1
|
|
|
|
|
33
|
my $result = ''; |
65
|
1
|
|
|
|
|
3
|
for my $entry (@{ $self->as_arrayref }) { |
|
1
|
|
|
|
|
3
|
|
66
|
5
|
|
|
|
|
8
|
my $feature = $entry->{feature}; |
67
|
14
|
|
|
|
|
80
|
my @feature_dump = |
68
|
5
|
|
|
|
|
20
|
map { "$_:$feature->{$_}" } sort { $a <=> $b } keys %$feature; |
|
16
|
|
|
|
|
29
|
|
69
|
5
|
|
|
|
|
30
|
$result .= join(' ', $entry->{label}, @feature_dump) . "\n"; |
70
|
|
|
|
|
|
|
} |
71
|
1
|
|
|
|
|
6
|
return $result; |
72
|
|
|
|
|
|
|
} |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
sub parse_input_file { |
75
|
7
|
|
|
7
|
0
|
2581
|
args_pos |
76
|
|
|
|
|
|
|
my $class => 'ClassName', |
77
|
|
|
|
|
|
|
my $source => 'FileHandle'; |
78
|
|
|
|
|
|
|
|
79
|
7
|
|
|
|
|
218925
|
my @data_set; |
80
|
7
|
|
|
|
|
2996
|
while (defined(my $line = <$source>)) { |
81
|
1359
|
|
|
|
|
757320
|
chomp $line; |
82
|
1359
|
|
|
|
|
746617
|
my ($label, @feature) = split /\s+/, $line; |
83
|
1359
|
|
|
|
|
739448
|
$label += 0; |
84
|
16921
|
|
|
|
|
9336719
|
my %feature = map { |
85
|
1359
|
|
|
|
|
713673
|
my ($index, $value) = split /:/; |
86
|
16921
|
|
|
|
|
9410301
|
$index += 0; |
87
|
16921
|
|
|
|
|
9304045
|
$value += 0; |
88
|
16921
|
|
|
|
|
21681825
|
($index => $value); |
89
|
|
|
|
|
|
|
} @feature; |
90
|
1359
|
|
|
|
|
12205961
|
push @data_set, +{ feature => \%feature, label => $label, }; |
91
|
|
|
|
|
|
|
} |
92
|
7
|
|
|
|
|
5752
|
return \@data_set; |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
|
95
|
6
|
|
|
6
|
1
|
464
|
sub size { 0 + @{ $_[0]->as_arrayref } } |
|
6
|
|
|
|
|
27
|
|
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
1; |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
__DATA__ |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=head1 NAME |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
Algorithm::LibLinear::DataSet |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=head1 SYNOPSIS |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
use Algorithm::LibLinear::DataSet; |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
my $data_set = Algorithm::LibLinear::DataSet->new(data_set => [ |
110
|
|
|
|
|
|
|
+{ feature => +{ 1 => 0.708333, 2 => 1, 3 => 1, ... }, label => 1, }, |
111
|
|
|
|
|
|
|
+{ feature => +{ 1 => 0.583333, 2 => -1, 3 => 0.333333, ... }, label => -1, }, |
112
|
|
|
|
|
|
|
+{ feature => +{ 1 => 0.166667, 2 => 1, 3 => -0.333333, ... }, label => 1, }, |
113
|
|
|
|
|
|
|
... |
114
|
|
|
|
|
|
|
]); |
115
|
|
|
|
|
|
|
my $data_set = Algorithm::LibLinear::DataSet->load(fh => \*DATA); |
116
|
|
|
|
|
|
|
my $data_set = Algorithm::LibLinear::DataSet->load(filename => 'liblinear_file'); |
117
|
|
|
|
|
|
|
my $data_set = Algorithm::LibLinear::DataSet->load(string => "+1 1:0.70833 ..."); |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
say $data_set->size; |
120
|
|
|
|
|
|
|
say $data_set->as_string; # '+1 1:0.70833 2:1 3:1 ...' |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
__DATA__ |
123
|
|
|
|
|
|
|
+1 1:0.708333 2:1 3:1 4:-0.320755 5:-0.105023 6:-1 7:1 8:-0.419847 9:-1 10:-0.225806 12:1 13:-1 |
124
|
|
|
|
|
|
|
-1 1:0.583333 2:-1 3:0.333333 4:-0.603774 5:1 6:-1 7:1 8:0.358779 9:-1 10:-0.483871 12:-1 13:1 |
125
|
|
|
|
|
|
|
+1 1:0.166667 2:1 3:-0.333333 4:-0.433962 5:-0.383562 6:-1 7:-1 8:0.0687023 9:-1 10:-0.903226 11:-1 12:-1 13:1 |
126
|
|
|
|
|
|
|
... |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=head1 DESCRIPTION |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
This class represents set of feature vectors with gold answers. |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=head1 METHODS |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=head2 new(data_set => \@data_set) |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
Constructor. |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
C<data_set> is an ArrayRef of HashRef that has 2 keys: C<feature> and C<label>. |
139
|
|
|
|
|
|
|
The value of C<feature> is a HashRef which represents a (sparse) feature vector. Its key is an index and corresponding value is a real number. The indices must be >= 1. |
140
|
|
|
|
|
|
|
The value of C<label> is an integer that is class label the feature belonging. |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
=head2 load(fh => \*FH | filename => $path | string => $string) |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
Class method. Loads data set from LIBSVM/LIBLINEAR format file. |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=head2 as_string |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
Dumps the data set as a LIBSVM/LIBLINEAR format data. |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
=head2 size |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
The number of data. |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
=cut |