File Coverage

blib/lib/Algorithm/LibLinear/DataSet.pm

Criterion	Covered	Total	%
statement	13	15	86.6
branch			n/a
condition			n/a
subroutine	5	5	100.0
pod			n/a
total	18	20	90.0

line	stmt	sub	time	code
1				package Algorithm::LibLinear::DataSet;
2
3	5	5	569	use 5.014;
	5		13
	5		191
4	5	5	1821	use Algorithm::LibLinear::Types;
	5		13
	5		174
5	5	5	30	use Carp qw//;
	5		5
	5		88
6	5	5	3433	use List::MoreUtils qw/none/;
	5		5060
	5		524
7	5	5	2392	use Smart::Args;
	0
	0
8
9				sub new {
10				args
11				my $class => 'ClassName',
12				my $data_set => 'ArrayRef[Algorithm::LibLinear::LabeledData]';
13
14				bless +{ data_set => $data_set } => $class;
15				}
16
17				sub load {
18				args
19				my $class => 'ClassName',
20				my $fh => +{ isa => 'FileHandle', optional => 1, },
21				my $filename => +{ isa => 'Str', optional => 1, },
22				my $string => +{ isa => 'Str', optional => 1, };
23
24				if (none { defined } ($fh, $filename, $string)) {
25				Carp::croak('No source specified.');
26				}
27				my $source = $fh;
28				$source //= do {
29				open my $fh, '<', +($filename // \$string) or Carp::croak($!);
30				$fh;
31				};
32				$class->new(data_set => $class->parse_input_file($source));
33				}
34
35				sub add_data {
36				args
37				my $self,
38				my $data => 'Algorithm::LibLinear::LabeledData';
39
40				push @{ $self->data_set }, $data;
41				}
42
43				sub as_arrayref { $_[0]->{data_set} }
44
45				sub as_problem {
46				args
47				my $self,
48				my $bias => +{ isa => 'Num', default => -1.0, };
49
50				my (@features, @labels);
51				for my $data (@{ $self->as_arrayref }) {
52				push @features, $data->{feature};
53				push @labels, $data->{label};
54				}
55				Algorithm::LibLinear::Problem->new(\@labels, \@features, $bias);
56				}
57
58				sub as_string {
59				args
60				my $self;
61
62				my $result = '';
63				for my $entry (@{ $self->as_arrayref }) {
64				my $feature = $entry->{feature};
65				my @feature_dump =
66				map { "$_:$feature->{$_}" } sort { $a <=> $b } keys %$feature;
67				$result .= join(' ', $entry->{label}, @feature_dump) . "\n";
68				}
69				return $result;
70				}
71
72				sub parse_input_file {
73				args_pos
74				my $class => 'ClassName',
75				my $source => 'FileHandle';
76
77				my @data_set;
78				while (defined(my $line = <$source>)) {
79				chomp $line;
80				my ($label, @feature) = split /\s+/, $line;
81				$label += 0;
82				my %feature = map {
83				my ($index, $value) = split /:/;
84				$index += 0;
85				$value += 0;
86				($index => $value);
87				} @feature;
88				push @data_set, +{ feature => \%feature, label => $label, };
89				}
90				return \@data_set;
91				}
92
93				sub size { 0 + @{ $_[0]->as_arrayref } }
94
95				1;
96
97				__DATA__
98
99				=head1 NAME
100
101				Algorithm::LibLinear::DataSet
102
103				=head1 SYNOPSIS
104
105				use Algorithm::LibLinear::DataSet;
106
107				my $data_set = Algorithm::LibLinear::DataSet->new(data_set => [
108				+{ feature => +{ 1 => 0.708333, 2 => 1, 3 => 1, ... }, label => 1, },
109				+{ feature => +{ 1 => 0.583333, 2 => -1, 3 => 0.333333, ... }, label => -1, },
110				+{ feature => +{ 1 => 0.166667, 2 => 1, 3 => -0.333333, ... }, label => 1, },
111				...
112				]);
113				my $data_set = Algorithm::LibLinear::DataSet->load(fh => \*DATA);
114				my $data_set = Algorithm::LibLinear::DataSet->load(filename => 'liblinear_file');
115				my $data_set = Algorithm::LibLinear::DataSet->load(string => "+1 1:0.70833 ...");
116
117				say $data_set->size;
118				say $data_set->as_string; # '+1 1:0.70833 2:1 3:1 ...'
119
120				__DATA__
121				+1 1:0.708333 2:1 3:1 4:-0.320755 5:-0.105023 6:-1 7:1 8:-0.419847 9:-1 10:-0.225806 12:1 13:-1
122				-1 1:0.583333 2:-1 3:0.333333 4:-0.603774 5:1 6:-1 7:1 8:0.358779 9:-1 10:-0.483871 12:-1 13:1
123				+1 1:0.166667 2:1 3:-0.333333 4:-0.433962 5:-0.383562 6:-1 7:-1 8:0.0687023 9:-1 10:-0.903226 11:-1 12:-1 13:1
124				...
125
126				=head1 DESCRIPTION
127
128				This class represents set of feature vectors with gold answers.
129
130				=head1 METHODS
131
132				=head2 new(data_set => \@data_set)
133
134				Constructor.
135
136				C<data_set> is an ArrayRef of HashRef that has 2 keys: C<feature> and C<label>.
137				The value of C<feature> is a HashRef which represents a (sparse) feature vector. Its key is an index and corresponding value is a real number. The indices must be >= 1.
138				The value of C<label> is an integer that is class label the feature belonging.
139
140				=head2 load(fh => \*FH \| filename => $path \| string => $string)
141
142				Class method. Loads data set from LIBSVM/LIBLINEAR format file.
143
144				=head2 as_string
145
146				Dumps the data set as a LIBSVM/LIBLINEAR format data.
147
148				=head2 size
149
150				The number of data.
151
152				=cut