| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Algorithm::LibLinear::DataSet; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 5 |  |  | 5 |  | 569 | use 5.014; | 
|  | 5 |  |  |  |  | 13 |  | 
|  | 5 |  |  |  |  | 191 |  | 
| 4 | 5 |  |  | 5 |  | 1821 | use Algorithm::LibLinear::Types; | 
|  | 5 |  |  |  |  | 13 |  | 
|  | 5 |  |  |  |  | 174 |  | 
| 5 | 5 |  |  | 5 |  | 30 | use Carp qw//; | 
|  | 5 |  |  |  |  | 5 |  | 
|  | 5 |  |  |  |  | 88 |  | 
| 6 | 5 |  |  | 5 |  | 3433 | use List::MoreUtils qw/none/; | 
|  | 5 |  |  |  |  | 5060 |  | 
|  | 5 |  |  |  |  | 524 |  | 
| 7 | 5 |  |  | 5 |  | 2392 | use Smart::Args; | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 8 |  |  |  |  |  |  |  | 
| 9 |  |  |  |  |  |  | sub new { | 
| 10 |  |  |  |  |  |  | args | 
| 11 |  |  |  |  |  |  | my $class => 'ClassName', | 
| 12 |  |  |  |  |  |  | my $data_set => 'ArrayRef[Algorithm::LibLinear::LabeledData]'; | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | bless +{ data_set => $data_set } => $class; | 
| 15 |  |  |  |  |  |  | } | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  | sub load { | 
| 18 |  |  |  |  |  |  | args | 
| 19 |  |  |  |  |  |  | my $class => 'ClassName', | 
| 20 |  |  |  |  |  |  | my $fh => +{ isa => 'FileHandle', optional => 1, }, | 
| 21 |  |  |  |  |  |  | my $filename => +{ isa => 'Str', optional => 1, }, | 
| 22 |  |  |  |  |  |  | my $string => +{ isa => 'Str', optional => 1, }; | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | if (none { defined } ($fh, $filename, $string)) { | 
| 25 |  |  |  |  |  |  | Carp::croak('No source specified.'); | 
| 26 |  |  |  |  |  |  | } | 
| 27 |  |  |  |  |  |  | my $source = $fh; | 
| 28 |  |  |  |  |  |  | $source //= do { | 
| 29 |  |  |  |  |  |  | open my $fh, '<', +($filename // \$string) or Carp::croak($!); | 
| 30 |  |  |  |  |  |  | $fh; | 
| 31 |  |  |  |  |  |  | }; | 
| 32 |  |  |  |  |  |  | $class->new(data_set => $class->parse_input_file($source)); | 
| 33 |  |  |  |  |  |  | } | 
| 34 |  |  |  |  |  |  |  | 
| 35 |  |  |  |  |  |  | sub add_data { | 
| 36 |  |  |  |  |  |  | args | 
| 37 |  |  |  |  |  |  | my $self, | 
| 38 |  |  |  |  |  |  | my $data => 'Algorithm::LibLinear::LabeledData'; | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  | push @{ $self->data_set }, $data; | 
| 41 |  |  |  |  |  |  | } | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  | sub as_arrayref { $_[0]->{data_set} } | 
| 44 |  |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  | sub as_problem { | 
| 46 |  |  |  |  |  |  | args | 
| 47 |  |  |  |  |  |  | my $self, | 
| 48 |  |  |  |  |  |  | my $bias => +{ isa => 'Num', default => -1.0, }; | 
| 49 |  |  |  |  |  |  |  | 
| 50 |  |  |  |  |  |  | my (@features, @labels); | 
| 51 |  |  |  |  |  |  | for my $data (@{ $self->as_arrayref }) { | 
| 52 |  |  |  |  |  |  | push @features, $data->{feature}; | 
| 53 |  |  |  |  |  |  | push @labels, $data->{label}; | 
| 54 |  |  |  |  |  |  | } | 
| 55 |  |  |  |  |  |  | Algorithm::LibLinear::Problem->new(\@labels, \@features, $bias); | 
| 56 |  |  |  |  |  |  | } | 
| 57 |  |  |  |  |  |  |  | 
| 58 |  |  |  |  |  |  | sub as_string { | 
| 59 |  |  |  |  |  |  | args | 
| 60 |  |  |  |  |  |  | my $self; | 
| 61 |  |  |  |  |  |  |  | 
| 62 |  |  |  |  |  |  | my $result = ''; | 
| 63 |  |  |  |  |  |  | for my $entry (@{ $self->as_arrayref }) { | 
| 64 |  |  |  |  |  |  | my $feature = $entry->{feature}; | 
| 65 |  |  |  |  |  |  | my @feature_dump = | 
| 66 |  |  |  |  |  |  | map { "$_:$feature->{$_}" } sort { $a <=> $b } keys %$feature; | 
| 67 |  |  |  |  |  |  | $result .= join(' ', $entry->{label}, @feature_dump) . "\n"; | 
| 68 |  |  |  |  |  |  | } | 
| 69 |  |  |  |  |  |  | return $result; | 
| 70 |  |  |  |  |  |  | } | 
| 71 |  |  |  |  |  |  |  | 
| 72 |  |  |  |  |  |  | sub parse_input_file { | 
| 73 |  |  |  |  |  |  | args_pos | 
| 74 |  |  |  |  |  |  | my $class => 'ClassName', | 
| 75 |  |  |  |  |  |  | my $source => 'FileHandle'; | 
| 76 |  |  |  |  |  |  |  | 
| 77 |  |  |  |  |  |  | my @data_set; | 
| 78 |  |  |  |  |  |  | while (defined(my $line = <$source>)) { | 
| 79 |  |  |  |  |  |  | chomp $line; | 
| 80 |  |  |  |  |  |  | my ($label, @feature) = split /\s+/, $line; | 
| 81 |  |  |  |  |  |  | $label += 0; | 
| 82 |  |  |  |  |  |  | my %feature = map { | 
| 83 |  |  |  |  |  |  | my ($index, $value) = split /:/; | 
| 84 |  |  |  |  |  |  | $index += 0; | 
| 85 |  |  |  |  |  |  | $value += 0; | 
| 86 |  |  |  |  |  |  | ($index => $value); | 
| 87 |  |  |  |  |  |  | } @feature; | 
| 88 |  |  |  |  |  |  | push @data_set, +{ feature => \%feature, label => $label, }; | 
| 89 |  |  |  |  |  |  | } | 
| 90 |  |  |  |  |  |  | return \@data_set; | 
| 91 |  |  |  |  |  |  | } | 
| 92 |  |  |  |  |  |  |  | 
| 93 |  |  |  |  |  |  | sub size { 0 + @{ $_[0]->as_arrayref } } | 
| 94 |  |  |  |  |  |  |  | 
| 95 |  |  |  |  |  |  | 1; | 
| 96 |  |  |  |  |  |  |  | 
| 97 |  |  |  |  |  |  | __DATA__ | 
| 98 |  |  |  |  |  |  |  | 
| 99 |  |  |  |  |  |  | =head1 NAME | 
| 100 |  |  |  |  |  |  |  | 
| 101 |  |  |  |  |  |  | Algorithm::LibLinear::DataSet | 
| 102 |  |  |  |  |  |  |  | 
| 103 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 104 |  |  |  |  |  |  |  | 
| 105 |  |  |  |  |  |  | use Algorithm::LibLinear::DataSet; | 
| 106 |  |  |  |  |  |  |  | 
| 107 |  |  |  |  |  |  | my $data_set = Algorithm::LibLinear::DataSet->new(data_set => [ | 
| 108 |  |  |  |  |  |  | +{ feature => +{ 1 => 0.708333, 2 => 1, 3 => 1, ... }, label => 1, }, | 
| 109 |  |  |  |  |  |  | +{ feature => +{ 1 => 0.583333, 2 => -1, 3 => 0.333333, ... }, label => -1, }, | 
| 110 |  |  |  |  |  |  | +{ feature => +{ 1 => 0.166667, 2 => 1, 3 => -0.333333, ... }, label => 1, }, | 
| 111 |  |  |  |  |  |  | ... | 
| 112 |  |  |  |  |  |  | ]); | 
| 113 |  |  |  |  |  |  | my $data_set = Algorithm::LibLinear::DataSet->load(fh => \*DATA); | 
| 114 |  |  |  |  |  |  | my $data_set = Algorithm::LibLinear::DataSet->load(filename => 'liblinear_file'); | 
| 115 |  |  |  |  |  |  | my $data_set = Algorithm::LibLinear::DataSet->load(string => "+1 1:0.70833 ..."); | 
| 116 |  |  |  |  |  |  |  | 
| 117 |  |  |  |  |  |  | say $data_set->size; | 
| 118 |  |  |  |  |  |  | say $data_set->as_string;  # '+1 1:0.70833 2:1 3:1 ...' | 
| 119 |  |  |  |  |  |  |  | 
| 120 |  |  |  |  |  |  | __DATA__ | 
| 121 |  |  |  |  |  |  | +1 1:0.708333 2:1 3:1 4:-0.320755 5:-0.105023 6:-1 7:1 8:-0.419847 9:-1 10:-0.225806 12:1 13:-1 | 
| 122 |  |  |  |  |  |  | -1 1:0.583333 2:-1 3:0.333333 4:-0.603774 5:1 6:-1 7:1 8:0.358779 9:-1 10:-0.483871 12:-1 13:1 | 
| 123 |  |  |  |  |  |  | +1 1:0.166667 2:1 3:-0.333333 4:-0.433962 5:-0.383562 6:-1 7:-1 8:0.0687023 9:-1 10:-0.903226 11:-1 12:-1 13:1 | 
| 124 |  |  |  |  |  |  | ... | 
| 125 |  |  |  |  |  |  |  | 
| 126 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 127 |  |  |  |  |  |  |  | 
| 128 |  |  |  |  |  |  | This class represents set of feature vectors with gold answers. | 
| 129 |  |  |  |  |  |  |  | 
| 130 |  |  |  |  |  |  | =head1 METHODS | 
| 131 |  |  |  |  |  |  |  | 
| 132 |  |  |  |  |  |  | =head2 new(data_set => \@data_set) | 
| 133 |  |  |  |  |  |  |  | 
| 134 |  |  |  |  |  |  | Constructor. | 
| 135 |  |  |  |  |  |  |  | 
| 136 |  |  |  |  |  |  | C<data_set> is an ArrayRef of HashRef that has 2 keys: C<feature> and C<label>. | 
| 137 |  |  |  |  |  |  | The value of C<feature> is a HashRef which represents a (sparse) feature vector. Its key is an index and corresponding value is a real number. The indices must be >= 1. | 
| 138 |  |  |  |  |  |  | The value of C<label> is an integer that is class label the feature belonging. | 
| 139 |  |  |  |  |  |  |  | 
| 140 |  |  |  |  |  |  | =head2 load(fh => \*FH | filename => $path | string => $string) | 
| 141 |  |  |  |  |  |  |  | 
| 142 |  |  |  |  |  |  | Class method. Loads data set from LIBSVM/LIBLINEAR format file. | 
| 143 |  |  |  |  |  |  |  | 
| 144 |  |  |  |  |  |  | =head2 as_string | 
| 145 |  |  |  |  |  |  |  | 
| 146 |  |  |  |  |  |  | Dumps the data set as a LIBSVM/LIBLINEAR format data. | 
| 147 |  |  |  |  |  |  |  | 
| 148 |  |  |  |  |  |  | =head2 size | 
| 149 |  |  |  |  |  |  |  | 
| 150 |  |  |  |  |  |  | The number of data. | 
| 151 |  |  |  |  |  |  |  | 
| 152 |  |  |  |  |  |  | =cut |