line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Algorithm::AM::DataSet; |
2
|
10
|
|
|
10
|
|
76247
|
use strict; |
|
10
|
|
|
|
|
30
|
|
|
10
|
|
|
|
|
296
|
|
3
|
10
|
|
|
10
|
|
49
|
use warnings; |
|
10
|
|
|
|
|
18
|
|
|
10
|
|
|
|
|
457
|
|
4
|
|
|
|
|
|
|
our $VERSION = '3.12'; |
5
|
|
|
|
|
|
|
# ABSTRACT: Manage data used by Algorithm::AM |
6
|
10
|
|
|
10
|
|
117
|
use Carp; |
|
10
|
|
|
|
|
22
|
|
|
10
|
|
|
|
|
628
|
|
7
|
10
|
|
|
10
|
|
4357
|
use Algorithm::AM::DataSet::Item; |
|
10
|
|
|
|
|
23
|
|
|
10
|
|
|
|
|
441
|
|
8
|
10
|
|
|
10
|
|
7017
|
use Path::Tiny; |
|
10
|
|
|
|
|
99881
|
|
|
10
|
|
|
|
|
603
|
|
9
|
|
|
|
|
|
|
use Exporter::Easy ( |
10
|
10
|
|
|
|
|
69
|
OK => ['dataset_from_file'] |
11
|
10
|
|
|
10
|
|
94
|
); |
|
10
|
|
|
|
|
21
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
#pod =head1 SYNOPSIS |
14
|
|
|
|
|
|
|
#pod |
15
|
|
|
|
|
|
|
#pod use Algorithm::AM::DataSet 'dataset_from_file'; |
16
|
|
|
|
|
|
|
#pod use Algorithm::AM::DataSet::Item 'new_item'; |
17
|
|
|
|
|
|
|
#pod my $dataset = Algorithm::AM::DataSet->new(cardinality => 10); |
18
|
|
|
|
|
|
|
#pod # or |
19
|
|
|
|
|
|
|
#pod $dataset = dataset_from_file(path => 'finnverb', format => 'nocommas'); |
20
|
|
|
|
|
|
|
#pod $dataset->add_item( |
21
|
|
|
|
|
|
|
#pod new_item(features => [qw(a b c d e f g h i)])); |
22
|
|
|
|
|
|
|
#pod my $item = $dataset->get_item(2); |
23
|
|
|
|
|
|
|
#pod |
24
|
|
|
|
|
|
|
#pod =head1 DESCRIPTION |
25
|
|
|
|
|
|
|
#pod |
26
|
|
|
|
|
|
|
#pod This package contains a list of items that can be used by |
27
|
|
|
|
|
|
|
#pod L or L for classification. |
28
|
|
|
|
|
|
|
#pod DataSets can be made one item at a time via the L method, |
29
|
|
|
|
|
|
|
#pod or they can be read from files via the L function. |
30
|
|
|
|
|
|
|
#pod |
31
|
|
|
|
|
|
|
#pod =head2 C |
32
|
|
|
|
|
|
|
#pod |
33
|
|
|
|
|
|
|
#pod Creates a new DataSet object. You must provide a C argument |
34
|
|
|
|
|
|
|
#pod indicating the number of features to be contained in each data vector. |
35
|
|
|
|
|
|
|
#pod You can then add items via the add_item method. Each item will contain |
36
|
|
|
|
|
|
|
#pod a feature vector, and also optionally a class label and a comment |
37
|
|
|
|
|
|
|
#pod (also called a "spec"). |
38
|
|
|
|
|
|
|
#pod |
39
|
|
|
|
|
|
|
#pod =cut |
40
|
|
|
|
|
|
|
sub new { |
41
|
50
|
|
|
50
|
1
|
34249
|
my ($class, %opts) = @_; |
42
|
|
|
|
|
|
|
|
43
|
50
|
|
|
|
|
176
|
my $new_opts = _check_opts(%opts); |
44
|
|
|
|
|
|
|
|
45
|
48
|
|
|
|
|
96
|
my $self = bless $new_opts, $class; |
46
|
|
|
|
|
|
|
|
47
|
48
|
|
|
|
|
261
|
$self->_init; |
48
|
|
|
|
|
|
|
|
49
|
48
|
|
|
|
|
166
|
return $self; |
50
|
|
|
|
|
|
|
} |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# check the options for validity |
53
|
|
|
|
|
|
|
# Return an option hash to initialize $self with |
54
|
|
|
|
|
|
|
# For now only 'cardinality' is allowed/required. |
55
|
|
|
|
|
|
|
sub _check_opts { |
56
|
50
|
|
|
50
|
|
131
|
my (%opts) = @_; |
57
|
|
|
|
|
|
|
|
58
|
50
|
|
|
|
|
79
|
my %final_opts; |
59
|
|
|
|
|
|
|
|
60
|
50
|
100
|
|
|
|
141
|
if(!defined $opts{cardinality}){ |
61
|
1
|
|
|
|
|
16
|
croak q{Failed to provide 'cardinality' parameter}; |
62
|
|
|
|
|
|
|
} |
63
|
49
|
|
|
|
|
111
|
$final_opts{cardinality} = $opts{cardinality}; |
64
|
49
|
|
|
|
|
112
|
delete $opts{cardinality}; |
65
|
|
|
|
|
|
|
|
66
|
49
|
100
|
|
|
|
125
|
if(keys %opts){ |
67
|
|
|
|
|
|
|
# sort the keys in the error message to make testing possible |
68
|
1
|
|
|
|
|
15
|
croak 'Unknown parameters in DataSet constructor: ' . |
69
|
|
|
|
|
|
|
(join ', ', sort keys %opts); |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
|
72
|
48
|
|
|
|
|
111
|
return \%final_opts; |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
# initialize internal state |
76
|
|
|
|
|
|
|
sub _init { |
77
|
48
|
|
|
48
|
|
79
|
my ($self) = @_; |
78
|
|
|
|
|
|
|
# contains all of the items in the dataset |
79
|
48
|
|
|
|
|
171
|
$self->{items} = []; |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
# map unique class labels to unique integers; |
82
|
|
|
|
|
|
|
# these are the indices of the class labels in class_list below; |
83
|
|
|
|
|
|
|
# the indices must start at 1 for AM to work, as 0 is reserved |
84
|
|
|
|
|
|
|
# for heterogeneity. |
85
|
48
|
|
|
|
|
115
|
$self->{class_num_index} = {}; |
86
|
|
|
|
|
|
|
# contains the list of class strings in an order that matches |
87
|
|
|
|
|
|
|
# the indices in class_num_index |
88
|
48
|
|
|
|
|
83
|
$self->{class_list} = []; |
89
|
|
|
|
|
|
|
# the total number of different classes contained in the data set |
90
|
48
|
|
|
|
|
76
|
$self->{num_classes} = 0; |
91
|
48
|
|
|
|
|
66
|
return; |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
#pod =head2 C |
95
|
|
|
|
|
|
|
#pod |
96
|
|
|
|
|
|
|
#pod Returns the number of features contained in the feature vector of a |
97
|
|
|
|
|
|
|
#pod single item. |
98
|
|
|
|
|
|
|
#pod |
99
|
|
|
|
|
|
|
#pod =cut |
100
|
|
|
|
|
|
|
sub cardinality { |
101
|
881
|
|
|
881
|
1
|
1362
|
my ($self) = @_; |
102
|
881
|
|
|
|
|
2218
|
return $self->{cardinality}; |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
#pod =head2 C |
106
|
|
|
|
|
|
|
#pod |
107
|
|
|
|
|
|
|
#pod Returns the number of items in the data set. |
108
|
|
|
|
|
|
|
#pod |
109
|
|
|
|
|
|
|
#pod =cut |
110
|
|
|
|
|
|
|
sub size { |
111
|
830
|
|
|
830
|
1
|
61262
|
my ($self) = @_; |
112
|
830
|
|
|
|
|
1053
|
return scalar @{$self->{items}}; |
|
830
|
|
|
|
|
2530
|
|
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
#pod =head2 C |
116
|
|
|
|
|
|
|
#pod |
117
|
|
|
|
|
|
|
#pod Returns the list of all unique class labels in the data set. |
118
|
|
|
|
|
|
|
#pod |
119
|
|
|
|
|
|
|
#pod =cut |
120
|
|
|
|
|
|
|
sub classes { |
121
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
122
|
0
|
|
|
|
|
0
|
return @{ $self->{class_list} }; |
|
0
|
|
|
|
|
0
|
|
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
#pod =head2 C |
126
|
|
|
|
|
|
|
#pod |
127
|
|
|
|
|
|
|
#pod Adds a new item to the data set. The input may be either an |
128
|
|
|
|
|
|
|
#pod L object, or the arguments to create |
129
|
|
|
|
|
|
|
#pod one via its constructor (features, class, comment). This method will |
130
|
|
|
|
|
|
|
#pod croak if the cardinality of the item does not match L. |
131
|
|
|
|
|
|
|
#pod |
132
|
|
|
|
|
|
|
#pod =cut |
133
|
|
|
|
|
|
|
sub add_item { |
134
|
276
|
|
|
276
|
1
|
745
|
my ($self, @args) = @_; |
135
|
276
|
|
|
|
|
314
|
my $item; |
136
|
276
|
100
|
|
|
|
512
|
if('Algorithm::AM::DataSet::Item' eq ref $args[0]){ |
137
|
219
|
|
|
|
|
255
|
$item = $args[0]; |
138
|
|
|
|
|
|
|
}else{ |
139
|
57
|
|
|
|
|
156
|
$item = Algorithm::AM::DataSet::Item->new(@args); |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
|
142
|
275
|
100
|
|
|
|
456
|
if($self->cardinality != $item->cardinality){ |
143
|
|
|
|
|
|
|
croak 'Expected ' . $self->cardinality . |
144
|
|
|
|
|
|
|
' features, but found ' . (scalar $item->cardinality) . |
145
|
1
|
|
|
|
|
4
|
' in ' . (join ' ', @{$item->features}) . |
|
1
|
|
|
|
|
2
|
|
146
|
|
|
|
|
|
|
' (' . $item->comment . ')'; |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
|
149
|
274
|
100
|
|
|
|
569
|
if(defined $item->class){ |
150
|
270
|
|
|
|
|
437
|
$self->_update_class_vars($item->class); |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
# store the new item |
154
|
274
|
|
|
|
|
335
|
push @{$self->{items}}, $item; |
|
274
|
|
|
|
|
516
|
|
155
|
274
|
|
|
|
|
502
|
return; |
156
|
|
|
|
|
|
|
} |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
# keep track of classes; needs updating for new item |
159
|
|
|
|
|
|
|
sub _update_class_vars { |
160
|
270
|
|
|
270
|
|
404
|
my ($self, $class) = @_; |
161
|
|
|
|
|
|
|
|
162
|
270
|
100
|
|
|
|
549
|
if(!$self->{class_num_index}->{$class}){ |
163
|
47
|
|
|
|
|
74
|
$self->{num_classes}++; |
164
|
47
|
|
|
|
|
100
|
$self->{class_num_index}->{$class} = $self->{num_classes}; |
165
|
47
|
|
|
|
|
62
|
push @{$self->{class_list}}, $class; |
|
47
|
|
|
|
|
162
|
|
166
|
|
|
|
|
|
|
} |
167
|
270
|
|
|
|
|
348
|
return; |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
#pod =head2 C |
171
|
|
|
|
|
|
|
#pod |
172
|
|
|
|
|
|
|
#pod Return the item at the given index. This will be a |
173
|
|
|
|
|
|
|
#pod L object. |
174
|
|
|
|
|
|
|
#pod |
175
|
|
|
|
|
|
|
#pod =cut |
176
|
|
|
|
|
|
|
sub get_item { |
177
|
60306
|
|
|
60306
|
1
|
76637
|
my ($self, $index) = @_; |
178
|
60306
|
|
|
|
|
103392
|
return $self->{items}->[$index]; |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
#pod =head2 C |
182
|
|
|
|
|
|
|
#pod |
183
|
|
|
|
|
|
|
#pod Returns the number of different classification labels contained in |
184
|
|
|
|
|
|
|
#pod the data set. |
185
|
|
|
|
|
|
|
#pod |
186
|
|
|
|
|
|
|
#pod =cut |
187
|
|
|
|
|
|
|
sub num_classes { |
188
|
388
|
|
|
388
|
1
|
1635
|
my ($self) = @_; |
189
|
388
|
|
|
|
|
1379
|
return $self->{num_classes}; |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
# Used by AM. Return an arrayref containing all of the |
193
|
|
|
|
|
|
|
# classes for the data set (ordered the same as the data set). |
194
|
|
|
|
|
|
|
sub _data_classes { |
195
|
194
|
|
|
194
|
|
282
|
my ($self) = @_; |
196
|
|
|
|
|
|
|
my @classes = map { |
197
|
30018
|
50
|
|
|
|
42696
|
defined $_->class ? |
198
|
|
|
|
|
|
|
$self->_index_for_class($_->class) : |
199
|
|
|
|
|
|
|
undef |
200
|
194
|
|
|
|
|
321
|
} @{$self->{items}}; |
|
194
|
|
|
|
|
446
|
|
201
|
194
|
|
|
|
|
1097
|
return \@classes; |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
# Used by AM. Return the integer mapped to the given class string. |
205
|
|
|
|
|
|
|
sub _index_for_class { |
206
|
60053
|
|
|
60053
|
|
75647
|
my ($self, $class) = @_; |
207
|
60053
|
|
|
|
|
102113
|
return $self->{class_num_index}->{$class}; |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
# Used by Result, which traverses data structures from |
211
|
|
|
|
|
|
|
# AM's guts. |
212
|
|
|
|
|
|
|
sub _class_for_index { |
213
|
364
|
|
|
364
|
|
1857
|
my ($self, $index) = @_; |
214
|
364
|
|
|
|
|
1295
|
return $self->{class_list}->[$index - 1]; |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
#pod =head2 C |
218
|
|
|
|
|
|
|
#pod |
219
|
|
|
|
|
|
|
#pod This function may be exported. Given 'path' and 'format' arguments, |
220
|
|
|
|
|
|
|
#pod it reads a file containing a dataset and returns a new DataSet object |
221
|
|
|
|
|
|
|
#pod with the given data. The 'path' argument should be the path to the |
222
|
|
|
|
|
|
|
#pod file. The 'format' argument should be 'commas' or 'nocommas', |
223
|
|
|
|
|
|
|
#pod indicating one of the following formats. You may also specify 'unknown' |
224
|
|
|
|
|
|
|
#pod and 'null' arguments to indicate the strings meant to represent an |
225
|
|
|
|
|
|
|
#pod unknown class value and null feature values. By default these are |
226
|
|
|
|
|
|
|
#pod 'UNK' and '='. |
227
|
|
|
|
|
|
|
#pod |
228
|
|
|
|
|
|
|
#pod The 'commas' file format is shown below: |
229
|
|
|
|
|
|
|
#pod |
230
|
|
|
|
|
|
|
#pod class , f eat u re s , your comment here |
231
|
|
|
|
|
|
|
#pod |
232
|
|
|
|
|
|
|
#pod The commas separate the class label, feature values, and comments, |
233
|
|
|
|
|
|
|
#pod and the whitespace around the commas is optional. Each feature value |
234
|
|
|
|
|
|
|
#pod is separated with whitespace. |
235
|
|
|
|
|
|
|
#pod |
236
|
|
|
|
|
|
|
#pod The 'nocommas' file format is shown below: |
237
|
|
|
|
|
|
|
#pod |
238
|
|
|
|
|
|
|
#pod class features your comment here |
239
|
|
|
|
|
|
|
#pod |
240
|
|
|
|
|
|
|
#pod Here the class, feature values, and comments are separated by |
241
|
|
|
|
|
|
|
#pod whitespace. Each feature value must be a single character with no |
242
|
|
|
|
|
|
|
#pod separating characters, so here the features are f, e, a, t, u, r, |
243
|
|
|
|
|
|
|
#pod e, and s. |
244
|
|
|
|
|
|
|
#pod |
245
|
|
|
|
|
|
|
#pod Lines beginning with a pound character (C<#>) are ignored. |
246
|
|
|
|
|
|
|
#pod |
247
|
|
|
|
|
|
|
#pod =cut |
248
|
|
|
|
|
|
|
sub dataset_from_file {## no critic (RequireArgUnpacking) |
249
|
12
|
|
|
12
|
1
|
9579
|
my (%opts) = ( |
250
|
|
|
|
|
|
|
unknown => 'UNK', |
251
|
|
|
|
|
|
|
null => '=', |
252
|
|
|
|
|
|
|
@_ |
253
|
|
|
|
|
|
|
); |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
croak q[Failed to provide 'path' parameter] |
256
|
12
|
100
|
|
|
|
56
|
unless exists $opts{path}; |
257
|
|
|
|
|
|
|
croak q[Failed to provide 'format' parameter] |
258
|
11
|
100
|
|
|
|
42
|
unless exists $opts{format}; |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
my ($path, $format, $unknown, $null) = ( |
261
|
10
|
|
|
|
|
33
|
path($opts{path}), @opts{'format', 'unknown', 'null'}); |
262
|
|
|
|
|
|
|
|
263
|
10
|
100
|
|
|
|
286
|
croak "Could not find file $path" |
264
|
|
|
|
|
|
|
unless $path->exists; |
265
|
|
|
|
|
|
|
|
266
|
9
|
|
|
|
|
298
|
my ($field_sep, $feature_sep); |
267
|
9
|
100
|
|
|
|
38
|
if($format eq 'commas'){ |
|
|
100
|
|
|
|
|
|
268
|
|
|
|
|
|
|
# class/features/comment separated by a comma |
269
|
4
|
|
|
|
|
24
|
$field_sep = qr{\s*,\s*}; |
270
|
|
|
|
|
|
|
# features separated by space |
271
|
4
|
|
|
|
|
14
|
$feature_sep = qr{\s+}; |
272
|
|
|
|
|
|
|
}elsif($format eq 'nocommas'){ |
273
|
|
|
|
|
|
|
# class/features/comment separated by space |
274
|
4
|
|
|
|
|
30
|
$field_sep = qr{\s+}; |
275
|
|
|
|
|
|
|
# no seps for features; each is a single character |
276
|
4
|
|
|
|
|
16
|
$feature_sep = qr{}; |
277
|
|
|
|
|
|
|
}else{ |
278
|
1
|
|
|
|
|
16
|
croak "Unknown value $format for format parameter " . |
279
|
|
|
|
|
|
|
q{(should be 'commas' or 'nocommas')}; |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
|
282
|
8
|
50
|
|
|
|
26
|
if(!defined $unknown){ |
283
|
0
|
|
|
|
|
0
|
croak q[Must provide a defined value for 'unknown' parameter]; |
284
|
|
|
|
|
|
|
} |
285
|
|
|
|
|
|
|
|
286
|
8
|
|
|
|
|
43
|
my $reader = _read_data_sub( |
287
|
|
|
|
|
|
|
$path, $unknown, $null, $field_sep, $feature_sep); |
288
|
8
|
|
|
|
|
22
|
my $item = $reader->(); |
289
|
8
|
50
|
|
|
|
39
|
if(!$item){ |
290
|
0
|
|
|
|
|
0
|
croak "No data found in file $path"; |
291
|
|
|
|
|
|
|
} |
292
|
8
|
|
|
|
|
33
|
my $dataset = __PACKAGE__->new(cardinality => $item->cardinality); |
293
|
8
|
|
|
|
|
30
|
$dataset->add_item($item); |
294
|
8
|
|
|
|
|
19
|
while($item = $reader->()){ |
295
|
189
|
|
|
|
|
363
|
$dataset->add_item($item); |
296
|
|
|
|
|
|
|
} |
297
|
7
|
|
|
|
|
236
|
return $dataset; |
298
|
|
|
|
|
|
|
} |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
# return a sub that returns one Item per call from the given FH, |
301
|
|
|
|
|
|
|
# and returns undef once the file is done being read. Throws errors |
302
|
|
|
|
|
|
|
# on bad file contents. |
303
|
|
|
|
|
|
|
# Input is file (Path::Tiny), string representing unknown class, |
304
|
|
|
|
|
|
|
# string representing null feature, field separator (class, |
305
|
|
|
|
|
|
|
# features, comment) and feature separator |
306
|
|
|
|
|
|
|
sub _read_data_sub { |
307
|
8
|
|
|
8
|
|
27
|
my ($data_file, $unknown, $null, |
308
|
|
|
|
|
|
|
$field_sep, $feature_sep) = @_; |
309
|
8
|
|
|
|
|
34
|
my $data_fh = $data_file->openr_utf8; |
310
|
8
|
|
|
|
|
49553
|
my $line_num = 0; |
311
|
|
|
|
|
|
|
return sub { |
312
|
205
|
|
|
205
|
|
250
|
my $line; |
313
|
|
|
|
|
|
|
# grab the next non-blank line from the file |
314
|
205
|
|
|
|
|
911
|
while($line = <$data_fh>){ |
315
|
201
|
|
|
|
|
360
|
$line_num++; |
316
|
|
|
|
|
|
|
# skip comments |
317
|
201
|
100
|
|
|
|
448
|
next if $line =~ m/^\s*#/; |
318
|
|
|
|
|
|
|
# cross-platform chomp |
319
|
199
|
|
|
|
|
849
|
$line =~ s/\R$//; |
320
|
199
|
|
|
|
|
958
|
$line =~ s/^\s+|\s+$//g; |
321
|
199
|
100
|
|
|
|
352
|
last if $line; |
322
|
|
|
|
|
|
|
} |
323
|
205
|
100
|
|
|
|
360
|
return unless $line; |
324
|
198
|
|
|
|
|
1040
|
my ($class, $feats, $comment) = split /$field_sep/, $line, 3; |
325
|
|
|
|
|
|
|
# the line has to have at least the class label and features |
326
|
198
|
100
|
|
|
|
404
|
if(!defined $feats){ |
327
|
1
|
|
|
|
|
6
|
croak "Couldn't read data at line $line_num in $data_file"; |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
# if the class is specified as unknown, set it to undef to |
330
|
|
|
|
|
|
|
# indicate this to Item |
331
|
197
|
100
|
|
|
|
312
|
if($class eq $unknown){ |
332
|
4
|
|
|
|
|
9
|
undef $class; |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
|
335
|
197
|
|
|
|
|
1380
|
my @data_vars = split /$feature_sep/, $feats; |
336
|
|
|
|
|
|
|
# set null features to '' |
337
|
197
|
100
|
|
|
|
414
|
@data_vars = map {$_ eq $null ? '' : $_} @data_vars; |
|
1913
|
|
|
|
|
3297
|
|
338
|
|
|
|
|
|
|
|
339
|
197
|
|
|
|
|
625
|
return Algorithm::AM::DataSet::Item->new( |
340
|
|
|
|
|
|
|
features=> \@data_vars, |
341
|
|
|
|
|
|
|
class => $class, |
342
|
|
|
|
|
|
|
comment => $comment |
343
|
|
|
|
|
|
|
); |
344
|
8
|
|
|
|
|
69
|
}; |
345
|
|
|
|
|
|
|
} |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
1; |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
__END__ |