| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Bio::Graphics::Wiggle; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
Bio::Graphics::Wiggle -- Binary storage for dense genomic features |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
# all positions are 1-based |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
my $wig = Bio::Graphics::Wiggle->new('./test.wig', |
|
12
|
|
|
|
|
|
|
$writeable, |
|
13
|
|
|
|
|
|
|
{ seqid => $seqid, |
|
14
|
|
|
|
|
|
|
start => $start, |
|
15
|
|
|
|
|
|
|
step => $step, |
|
16
|
|
|
|
|
|
|
min => $min, |
|
17
|
|
|
|
|
|
|
max => $max }); |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
$wig->erase; |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
my $seqid = $wig->seqid('new_id'); |
|
22
|
|
|
|
|
|
|
my $max = $wig->max($new_max); |
|
23
|
|
|
|
|
|
|
my $min = $wig->min($new_min); |
|
24
|
|
|
|
|
|
|
my $step = $wig->step($new_step); # data stored at modulus step == 0; all else is blank |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
$wig->set_value($position => $value); # store $value at position |
|
27
|
|
|
|
|
|
|
$wig->set_values($position => \@values); # store array of values at position |
|
28
|
|
|
|
|
|
|
$wig->set_range($start=>$end,$value); # store the same $value from $start to $end |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
my $value = $wig->value($position); # fetch value from position |
|
31
|
|
|
|
|
|
|
my $values = $wig->values($start,$end); # fetch range of data from $start to $end |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
$wig->window(100); # sample window size |
|
34
|
|
|
|
|
|
|
$wig->smoothing('mean'); # when sampling, compute the mean value across sample window |
|
35
|
|
|
|
|
|
|
my $values = $wig->values($start,$end,$samples); # fetch $samples data points from $start to $end |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
IMPORTANT NOTE: This implementation is still not right. See |
|
41
|
|
|
|
|
|
|
http://genomewiki.ucsc.edu/index.php/Wiggle for a more space-efficient |
|
42
|
|
|
|
|
|
|
implementation. |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
This module stores "wiggle" style quantitative genome data for display |
|
45
|
|
|
|
|
|
|
in a genome browser application. The data for each chromosome (or |
|
46
|
|
|
|
|
|
|
contig, or other reference sequence) is stored in a single file in the |
|
47
|
|
|
|
|
|
|
following format: |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
256 byte header |
|
50
|
|
|
|
|
|
|
50 bytes seqid, zero-terminated C string |
|
51
|
|
|
|
|
|
|
4 byte long integer, value of "step" (explained later) |
|
52
|
|
|
|
|
|
|
4 byte perl native float, the "min" value |
|
53
|
|
|
|
|
|
|
4 byte perl native float, the "max" value |
|
54
|
|
|
|
|
|
|
4 byte long integer, value of "span" |
|
55
|
|
|
|
|
|
|
4 byte perl native float, the mean |
|
56
|
|
|
|
|
|
|
4 byte perl native float, the standard deviation |
|
57
|
|
|
|
|
|
|
2 byte unsigned short, the version number (currently version 0) |
|
58
|
|
|
|
|
|
|
4 byte long integer, sequence start position (in 0-based coordinates) |
|
59
|
|
|
|
|
|
|
null padding to 256 bytes for future use |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
The remainder of the file consists of 8-bit unsigned scaled integer |
|
62
|
|
|
|
|
|
|
values. This means that all quantitative data will be scaled to 8-bit |
|
63
|
|
|
|
|
|
|
precision! |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
For a convenient method of creating Wiggle files from UCSC-type WIG |
|
66
|
|
|
|
|
|
|
input and creating GFF3 output, please see |
|
67
|
|
|
|
|
|
|
L. |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=head1 METHODS |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=head2 Constructor and Accessors |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=over 4 |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=item $wig = Bio::Graphics::Wiggle->new($filename,$writeable,{options}) |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
Open/create a wiggle-format data file: |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
$filename -- path to the file to open/create |
|
80
|
|
|
|
|
|
|
$writeable -- boolean value indicating whether file is |
|
81
|
|
|
|
|
|
|
writeable. Missing files will only be created |
|
82
|
|
|
|
|
|
|
if $writeable set to a true value. If path is |
|
83
|
|
|
|
|
|
|
empty (undef or empty string) and writeable is true, |
|
84
|
|
|
|
|
|
|
new() will create a temporary file that will be |
|
85
|
|
|
|
|
|
|
deleted when the object goes out of scope. |
|
86
|
|
|
|
|
|
|
{options} -- hash ref of the following named options, only valid |
|
87
|
|
|
|
|
|
|
when creating a new wig file with $writeable true. |
|
88
|
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
option name description default |
|
90
|
|
|
|
|
|
|
----------- ----- ------- |
|
91
|
|
|
|
|
|
|
seqid name/id of sequence empty name |
|
92
|
|
|
|
|
|
|
min minimum value of data points 0 |
|
93
|
|
|
|
|
|
|
max maximum value of data points 255 |
|
94
|
|
|
|
|
|
|
step interval between data points 1 |
|
95
|
|
|
|
|
|
|
span width of data points value of "step" |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
The "step" can be used to create sparse files to save space. By |
|
98
|
|
|
|
|
|
|
default, step is set to 1, in which case a data value will be stored |
|
99
|
|
|
|
|
|
|
at each base of the sequence. By setting step to 10, then each value |
|
100
|
|
|
|
|
|
|
is taken to correspond to 10 bp, and the file will be 10x smaller. |
|
101
|
|
|
|
|
|
|
For example, consider this step 5 data set: |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 |
|
104
|
|
|
|
|
|
|
20 . . . . 60 . . . . 80 . . . |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
We have stored the values "20" "60" and "80" at positions 1, 6 and 11, |
|
107
|
|
|
|
|
|
|
respectively. When retrieving this data, it will appear as if |
|
108
|
|
|
|
|
|
|
positions 1 through 5 have a value of 20, positions 6-10 have a value |
|
109
|
|
|
|
|
|
|
of 60, and positions 11-14 have a value of 80. In the data file, we |
|
110
|
|
|
|
|
|
|
store, positions 1,6,and 11 in adjacent bytes. |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
Note that no locking is performed by this module. If you wish to allow |
|
113
|
|
|
|
|
|
|
multi-user write access to the databases files, you will need to |
|
114
|
|
|
|
|
|
|
flock() the files yourself. |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=item $seqid = $wig->seqid(['new_id']) |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=item $max = $wig->max([$new_max]) |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=item $min = $wig->min([$new_min]) |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=item $step = $wig->step([$new_step]) |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=item $span = $wig->span([$new_span]) |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=item $mean = $wig->mean([$new_mean]); |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=item $stdev = $wig->stdev([$new_stdev]); |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
These accessors get or set the corresponding values. Setting is only |
|
131
|
|
|
|
|
|
|
allowed if the file was opened for writing. Note that changing the |
|
132
|
|
|
|
|
|
|
min, max and step after writing data to the file under another |
|
133
|
|
|
|
|
|
|
parameter set will produce unexpected (and invalid) results, as the |
|
134
|
|
|
|
|
|
|
existing data is not automatically updated to be consistent. |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=item $trim = $wig->trim([$new_trim]); |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
The trim method sets the trimming method, which can be used to trim |
|
139
|
|
|
|
|
|
|
out extreme values. Three methods are currently supported: |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
none No trimming |
|
142
|
|
|
|
|
|
|
stdev Trim 1 standard deviation above and below mean |
|
143
|
|
|
|
|
|
|
stdevN Trim N standard deviations above and below the mean |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
In "stdevN", any can be any positive integer. |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=back |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
=head2 Setting Data |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=over 4 |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=item $wig->set_value($position => $value) |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
This method sets the value at $position to $value. If a step>1 is in |
|
156
|
|
|
|
|
|
|
force, then $position will be rounded down to the nearest multiple of |
|
157
|
|
|
|
|
|
|
step. |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=item $wig->set_range($start=>$end, $value) |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
This method sets the value of all bases between $start and $end to |
|
162
|
|
|
|
|
|
|
$value, honoring step. |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=item $sig->set_values($position => \@values) |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
This method writes an array of values into the datababase beginning at |
|
167
|
|
|
|
|
|
|
$position (or the nearest lower multiple of step). If step>1, then |
|
168
|
|
|
|
|
|
|
values will be written at step intervals. |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=back |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
=head2 Retrieving Data |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
=over 4 |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=item $value = $wig->value($position) |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
Retrieve the single data item at position $position, or the nearest |
|
179
|
|
|
|
|
|
|
lower multiple of $step if step>1. |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
=item $values = $wig->values($start=>$end) |
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
Retrieve the values in the range $start to $end and return them as an |
|
184
|
|
|
|
|
|
|
array ref. Note that you will always get an array of size |
|
185
|
|
|
|
|
|
|
($end-$start+1) even if step>1; the data in between the step intervals |
|
186
|
|
|
|
|
|
|
will be filled in. |
|
187
|
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=item $values = $wig->values($start=>$end,$samples) |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
Retrieve a sampling of the values between $start and $end. Nothing |
|
191
|
|
|
|
|
|
|
very sophisticated is done here; the code simply returns the number of |
|
192
|
|
|
|
|
|
|
values indicated in $samples, smoothed according to the smoothing |
|
193
|
|
|
|
|
|
|
method selected (default to "mean"), then selected at even intervals |
|
194
|
|
|
|
|
|
|
from the range $start to $end. The return value is an arrayref of |
|
195
|
|
|
|
|
|
|
exactly $samples values. |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
=item $string = $wig->export_to_wif($start,$end) |
|
198
|
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
=item $string = $wig->export_to_wif64($start,$end) |
|
200
|
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
Export the region from start to end in the "wif" format. This data can |
|
202
|
|
|
|
|
|
|
later be imported into another Bio::Graphics::Wiggle object. The first |
|
203
|
|
|
|
|
|
|
version returns a binary string. The second version returns a base64 |
|
204
|
|
|
|
|
|
|
encoded version that is safe for ascii-oriented formata such as GFF3 |
|
205
|
|
|
|
|
|
|
and XML. |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=item $wig->import_from_wif($string) |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=item $wig->import_from_wif64($string) |
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
Import a wif format data string into the Bio::Graphics::Wiggle |
|
212
|
|
|
|
|
|
|
object. The first version expects a binary string. The second version |
|
213
|
|
|
|
|
|
|
expects a base64 encoded version that is safe for ascii-oriented |
|
214
|
|
|
|
|
|
|
formata such as GFF3 and XML. |
|
215
|
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
=back |
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
=cut |
|
220
|
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
# read/write genome tiling data, to be compatible with Jim Kent's WIG format |
|
222
|
1
|
|
|
1
|
|
5
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
33
|
|
|
223
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
35
|
|
|
224
|
1
|
|
|
1
|
|
730
|
use IO::File; |
|
|
1
|
|
|
|
|
1142
|
|
|
|
1
|
|
|
|
|
155
|
|
|
225
|
1
|
|
|
1
|
|
8
|
use Carp 'croak','carp','confess'; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
71
|
|
|
226
|
|
|
|
|
|
|
|
|
227
|
1
|
|
|
1
|
|
5
|
use constant HEADER_LEN => 256; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
101
|
|
|
228
|
|
|
|
|
|
|
# seqid, step, min, max, span, mean, stdev, version, start |
|
229
|
1
|
|
|
1
|
|
6
|
use constant HEADER => '(Z50LFFLFFSL)@'.HEADER_LEN; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
51
|
|
|
230
|
1
|
|
|
1
|
|
5
|
use constant BODY => 'C'; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
105
|
|
|
231
|
1
|
|
|
1
|
|
6
|
use constant DEBUG => 0; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
65
|
|
|
232
|
1
|
|
|
1
|
|
10
|
use constant DEFAULT_SMOOTHING => 'mean'; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
56
|
|
|
233
|
1
|
|
|
1
|
|
5
|
use constant VERSION => 0; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
4927
|
|
|
234
|
|
|
|
|
|
|
our $VERSION = '1.0'; |
|
235
|
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
sub new { |
|
237
|
2
|
|
|
2
|
1
|
1132
|
my $class = shift; |
|
238
|
2
|
|
|
|
|
5
|
my ($path,$write,$options) = @_; |
|
239
|
2
|
|
50
|
|
|
7
|
$path ||= ''; # to avoid uninit warning |
|
240
|
2
|
50
|
|
|
|
73
|
my $mode = $write ? -e $path # if file already exists... |
|
|
|
100
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
? '+<' # ...open for read/write |
|
242
|
|
|
|
|
|
|
: '+>' # ...else clobber and open a new one |
|
243
|
|
|
|
|
|
|
: '<'; # read only |
|
244
|
2
|
|
|
|
|
8
|
my $fh = $class->new_fh($path,$mode); |
|
245
|
2
|
50
|
0
|
|
|
418
|
$fh or die (($path||'temporary file').": $!"); |
|
246
|
|
|
|
|
|
|
|
|
247
|
2
|
|
100
|
|
|
11
|
$options ||= {}; |
|
248
|
|
|
|
|
|
|
|
|
249
|
2
|
|
33
|
|
|
26
|
my $self = bless {fh => $fh, |
|
250
|
|
|
|
|
|
|
write => $write, |
|
251
|
|
|
|
|
|
|
dirty => scalar keys %$options |
|
252
|
|
|
|
|
|
|
}, ref $class || $class; |
|
253
|
|
|
|
|
|
|
|
|
254
|
2
|
|
100
|
|
|
5
|
my $stored_options = eval {$self->_readoptions} || {}; |
|
255
|
2
|
50
|
|
|
|
56
|
$options->{start}-- if defined $options->{start}; # 1-based ==> 0-based coordinates |
|
256
|
2
|
|
|
|
|
19
|
my %merged_options = (%$stored_options,%$options); |
|
257
|
|
|
|
|
|
|
# warn "merged options = ",join ' ',%merged_options; |
|
258
|
2
|
|
50
|
|
|
11
|
$merged_options{version}||= 0; |
|
259
|
2
|
|
50
|
|
|
5
|
$merged_options{seqid} ||= 'chrUnknown'; |
|
260
|
2
|
|
50
|
|
|
6
|
$merged_options{min} ||= 0; |
|
261
|
2
|
|
50
|
|
|
6
|
$merged_options{max} ||= 255; |
|
262
|
2
|
|
50
|
|
|
4
|
$merged_options{mean} ||= 128; |
|
263
|
2
|
|
50
|
|
|
7
|
$merged_options{stdev} ||= 255; |
|
264
|
2
|
|
100
|
|
|
8
|
$merged_options{trim} ||= 'none'; |
|
265
|
2
|
|
50
|
|
|
5
|
$merged_options{step} ||= 1; |
|
266
|
2
|
|
50
|
|
|
8
|
$merged_options{start} ||= 0; |
|
267
|
2
|
|
33
|
|
|
5
|
$merged_options{span} ||= $merged_options{step}; |
|
268
|
2
|
|
|
|
|
4
|
$self->{options} = \%merged_options; |
|
269
|
2
|
100
|
|
|
|
8
|
$self->_do_trim unless $self->trim eq 'none'; |
|
270
|
2
|
|
|
|
|
10
|
return $self; |
|
271
|
|
|
|
|
|
|
} |
|
272
|
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
sub new_fh { |
|
274
|
2
|
|
|
2
|
0
|
3
|
my $self = shift; |
|
275
|
2
|
|
|
|
|
4
|
my ($path,$mode) = @_; |
|
276
|
2
|
50
|
|
|
|
25
|
return $path ? IO::File->new($path,$mode) |
|
277
|
|
|
|
|
|
|
: IO::File->new_tmpfile; |
|
278
|
|
|
|
|
|
|
} |
|
279
|
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
sub end { |
|
281
|
4
|
|
|
4
|
0
|
5
|
my $self = shift; |
|
282
|
4
|
100
|
|
|
|
8
|
unless (defined $self->{end}) { |
|
283
|
1
|
|
|
|
|
4
|
my $size = (stat($self->fh))[7]; |
|
284
|
1
|
|
|
|
|
2
|
my $data_len = $size - HEADER_LEN(); |
|
285
|
1
|
50
|
|
|
|
3
|
return unless $data_len>0; # undef end |
|
286
|
1
|
|
|
|
|
3
|
$self->{end} = ($self->start-1) + $data_len * $self->step; |
|
287
|
|
|
|
|
|
|
} |
|
288
|
4
|
|
|
|
|
11
|
return $self->{end}; |
|
289
|
|
|
|
|
|
|
} |
|
290
|
|
|
|
|
|
|
|
|
291
|
2
|
|
|
2
|
|
1140
|
sub DESTROY { shift->write } |
|
292
|
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
sub erase { |
|
294
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
295
|
0
|
|
|
|
|
0
|
$self->fh->truncate(HEADER_LEN); |
|
296
|
|
|
|
|
|
|
} |
|
297
|
|
|
|
|
|
|
|
|
298
|
7631
|
|
|
7631
|
0
|
25213
|
sub fh { shift->{fh} } |
|
299
|
3813
|
|
|
3813
|
0
|
5063
|
sub seek { shift->fh->seek(shift,0) } |
|
300
|
0
|
|
|
0
|
0
|
0
|
sub tell { shift->fh->tell() } |
|
301
|
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
sub _option { |
|
303
|
7660
|
|
|
7660
|
|
6416
|
my $self = shift; |
|
304
|
7660
|
|
|
|
|
6351
|
my $option = shift; |
|
305
|
7660
|
|
|
|
|
8043
|
my $d = $self->{options}{$option}; |
|
306
|
7660
|
100
|
|
|
|
11095
|
if (@_) { |
|
307
|
2
|
|
|
|
|
3
|
$self->{dirty}++; |
|
308
|
2
|
|
|
|
|
3
|
$self->{options}{$option} = shift; |
|
309
|
2
|
50
|
66
|
|
|
12
|
delete $self->{scale} if $option eq 'min' or $option eq 'max'; |
|
310
|
|
|
|
|
|
|
} |
|
311
|
7660
|
|
|
|
|
9799
|
return $d; |
|
312
|
|
|
|
|
|
|
} |
|
313
|
|
|
|
|
|
|
|
|
314
|
0
|
|
|
0
|
0
|
0
|
sub version { shift->_option('version',@_) } |
|
315
|
2
|
|
|
2
|
1
|
338
|
sub seqid { shift->_option('seqid',@_) } |
|
316
|
4
|
|
|
4
|
1
|
8
|
sub min { shift->_option('min',@_) } |
|
317
|
4
|
|
|
4
|
1
|
7
|
sub max { shift->_option('max',@_) } |
|
318
|
3818
|
|
|
3818
|
1
|
5729
|
sub step { shift->_option('step',@_) } |
|
319
|
9
|
|
|
9
|
1
|
16
|
sub span { shift->_option('span',@_) } |
|
320
|
1
|
|
|
1
|
1
|
4
|
sub mean { shift->_option('mean',@_) } |
|
321
|
1
|
|
|
1
|
1
|
4
|
sub stdev { shift->_option('stdev',@_) } |
|
322
|
3
|
|
|
3
|
1
|
9
|
sub trim { shift->_option('trim',@_) } |
|
323
|
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
sub start { # slightly different because we have to deal with 1 vs 0-based coordinates |
|
325
|
3818
|
|
|
3818
|
0
|
3199
|
my $self = shift; |
|
326
|
3818
|
|
|
|
|
4856
|
my $start = $self->_option('start'); |
|
327
|
3818
|
|
|
|
|
3586
|
$start++; # convert into 1-based coordinates |
|
328
|
3818
|
50
|
|
|
|
5380
|
if (@_) { |
|
329
|
0
|
|
|
|
|
0
|
my $newstart = shift; |
|
330
|
0
|
|
|
|
|
0
|
$self->_option('start',$newstart-1); # store in zero-based coordinates |
|
331
|
|
|
|
|
|
|
} |
|
332
|
3818
|
|
|
|
|
4117
|
return $start; |
|
333
|
|
|
|
|
|
|
} |
|
334
|
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
sub smoothing { |
|
336
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
337
|
0
|
|
0
|
|
|
0
|
my $d = $self->{smoothing} || DEFAULT_SMOOTHING; |
|
338
|
0
|
0
|
|
|
|
0
|
$self->{smoothing} = shift if @_; |
|
339
|
0
|
|
|
|
|
0
|
$d; |
|
340
|
|
|
|
|
|
|
} |
|
341
|
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
sub write { |
|
343
|
3
|
|
|
3
|
0
|
5
|
my $self = shift; |
|
344
|
3
|
50
|
66
|
|
|
240
|
if ($self->{dirty} && $self->{write}) { |
|
345
|
1
|
|
|
|
|
6
|
$self->_writeoptions($self->{options}); |
|
346
|
1
|
|
|
|
|
10
|
undef $self->{dirty}; |
|
347
|
1
|
|
|
|
|
2
|
$self->fh->flush; |
|
348
|
|
|
|
|
|
|
} |
|
349
|
|
|
|
|
|
|
} |
|
350
|
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
sub _readoptions { |
|
352
|
2
|
|
|
2
|
|
3
|
my $self = shift; |
|
353
|
2
|
|
|
|
|
7
|
my $fh = $self->fh; |
|
354
|
2
|
|
|
|
|
5
|
my $header; |
|
355
|
2
|
|
|
|
|
25
|
$fh->seek(0,0); |
|
356
|
2
|
100
|
|
|
|
23
|
return unless $fh->read($header,HEADER_LEN) == HEADER_LEN; |
|
357
|
1
|
|
|
|
|
19
|
return $self->_parse_header($header); |
|
358
|
|
|
|
|
|
|
} |
|
359
|
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
sub _parse_header { |
|
361
|
1
|
|
|
1
|
|
2
|
my $self = shift; |
|
362
|
1
|
|
|
|
|
3
|
my $header = shift; |
|
363
|
1
|
|
|
|
|
12
|
my ($seqid,$step,$min,$max,$span, |
|
364
|
|
|
|
|
|
|
$mean,$stdev,$version,$start) = unpack(HEADER,$header); |
|
365
|
1
|
|
|
|
|
11
|
return { seqid => $seqid, |
|
366
|
|
|
|
|
|
|
step => $step, |
|
367
|
|
|
|
|
|
|
span => $span, |
|
368
|
|
|
|
|
|
|
min => $min, |
|
369
|
|
|
|
|
|
|
max => $max, |
|
370
|
|
|
|
|
|
|
mean => $mean, |
|
371
|
|
|
|
|
|
|
stdev => $stdev, |
|
372
|
|
|
|
|
|
|
version => $version, |
|
373
|
|
|
|
|
|
|
start => $start, |
|
374
|
|
|
|
|
|
|
}; |
|
375
|
|
|
|
|
|
|
} |
|
376
|
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
sub _generate_header { |
|
378
|
1
|
|
|
1
|
|
2
|
my $self = shift; |
|
379
|
1
|
|
|
|
|
1
|
my $options = shift; |
|
380
|
1
|
|
|
|
|
3
|
return pack(HEADER,@{$options}{qw(seqid step min max span mean stdev version start)}); |
|
|
1
|
|
|
|
|
16
|
|
|
381
|
|
|
|
|
|
|
} |
|
382
|
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
sub _writeoptions { |
|
384
|
1
|
|
|
1
|
|
2
|
my $self = shift; |
|
385
|
1
|
|
|
|
|
2
|
my $options = shift; |
|
386
|
1
|
|
|
|
|
3
|
my $fh = $self->fh; |
|
387
|
1
|
|
|
|
|
5
|
my $header = $self->_generate_header($options); |
|
388
|
1
|
|
|
|
|
6
|
$fh->seek(0,0); |
|
389
|
1
|
50
|
|
|
|
56
|
$fh->print($header) or die "write failed: $!"; |
|
390
|
|
|
|
|
|
|
} |
|
391
|
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
sub _do_trim { |
|
393
|
1
|
|
|
1
|
|
2
|
my $self = shift; |
|
394
|
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
# don't trim if there is no score range |
|
396
|
1
|
50
|
|
|
|
4
|
($self->max - $self->min) or return; |
|
397
|
|
|
|
|
|
|
|
|
398
|
1
|
|
|
|
|
4
|
my $trim = lc $self->trim; |
|
399
|
1
|
|
|
|
|
2
|
my ($method,$arg); |
|
400
|
1
|
50
|
|
|
|
9
|
if ($trim =~ /([a-z]+)(\d+)/) { |
|
401
|
1
|
|
|
|
|
6
|
$method = "_trim_${1}"; |
|
402
|
1
|
|
|
|
|
4
|
$arg = $2; |
|
403
|
|
|
|
|
|
|
} |
|
404
|
|
|
|
|
|
|
else { |
|
405
|
0
|
|
|
|
|
0
|
$method = "_trim_${trim}"; |
|
406
|
|
|
|
|
|
|
} |
|
407
|
1
|
50
|
|
|
|
6
|
unless ($self->can($method)) { |
|
408
|
0
|
|
|
|
|
0
|
carp "invalid trim method $trim"; |
|
409
|
0
|
|
|
|
|
0
|
return; |
|
410
|
|
|
|
|
|
|
} |
|
411
|
|
|
|
|
|
|
|
|
412
|
1
|
|
|
|
|
4
|
$self->$method($arg); |
|
413
|
|
|
|
|
|
|
} |
|
414
|
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
# trim n standard deviations from the mean |
|
416
|
|
|
|
|
|
|
sub _trim_stdev { |
|
417
|
1
|
|
|
1
|
|
2
|
my $self = shift; |
|
418
|
1
|
|
50
|
|
|
3
|
my $factor = shift || 1; |
|
419
|
1
|
|
|
|
|
3
|
my $mean = $self->mean; |
|
420
|
1
|
|
|
|
|
3
|
my $stdev = $self->stdev * $factor; |
|
421
|
1
|
50
|
|
|
|
2
|
my $min = $self->min > $mean - $stdev ? $self->min : $mean - $stdev; |
|
422
|
1
|
50
|
|
|
|
4
|
my $max = $self->max < $mean + $stdev ? $self->max : $mean + $stdev; |
|
423
|
1
|
|
|
|
|
1
|
warn "_trim_stdev (* $factor) : setting min to $min, max to $max (was ",$self->min,',',$self->max,')' |
|
424
|
|
|
|
|
|
|
if DEBUG; |
|
425
|
1
|
|
|
|
|
3
|
$self->min($min); |
|
426
|
1
|
|
|
|
|
2
|
$self->max($max); |
|
427
|
|
|
|
|
|
|
} |
|
428
|
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
sub set_value { |
|
430
|
3809
|
|
|
3809
|
1
|
3566
|
my $self = shift; |
|
431
|
3809
|
50
|
|
|
|
6543
|
croak "usage: \$wig->set_value(\$position => \$value)" |
|
432
|
|
|
|
|
|
|
unless @_ == 2; |
|
433
|
3809
|
|
|
|
|
5789
|
$self->value(@_); |
|
434
|
|
|
|
|
|
|
} |
|
435
|
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
sub set_range { |
|
437
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
438
|
0
|
0
|
|
|
|
0
|
croak "usage: \$wig->set_range(\$start_position => \$end_position, \$value)" |
|
439
|
|
|
|
|
|
|
unless @_ == 3; |
|
440
|
0
|
|
|
|
|
0
|
$self->value(@_); |
|
441
|
|
|
|
|
|
|
} |
|
442
|
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
sub value { |
|
444
|
3809
|
|
|
3809
|
1
|
3288
|
my $self = shift; |
|
445
|
3809
|
|
|
|
|
3266
|
my $position = shift; |
|
446
|
|
|
|
|
|
|
|
|
447
|
3809
|
|
|
|
|
4909
|
my $offset = $self->_calculate_offset($position); |
|
448
|
3809
|
50
|
|
|
|
6804
|
$offset >= HEADER_LEN or die "Tried to retrieve data from before start position"; |
|
449
|
3809
|
50
|
|
|
|
5691
|
$self->seek($offset) or die "Seek failed: $!"; |
|
450
|
|
|
|
|
|
|
|
|
451
|
3809
|
50
|
|
|
|
65624
|
if (@_ == 2) { |
|
|
|
50
|
|
|
|
|
|
|
452
|
0
|
|
|
|
|
0
|
my $end = shift; |
|
453
|
0
|
|
|
|
|
0
|
my $new_value = shift; |
|
454
|
0
|
|
|
|
|
0
|
my $step = $self->step; |
|
455
|
0
|
|
|
|
|
0
|
my $scaled_value = $self->scale($new_value); |
|
456
|
0
|
0
|
|
|
|
0
|
$self->fh->print(pack('C*',($scaled_value)x(($end-$position+1)/$step))) or die "Write failed: $!"; |
|
457
|
0
|
0
|
0
|
|
|
0
|
$self->{end} = $end if !exists $self->{end} || $self->{end} < $end; |
|
458
|
|
|
|
|
|
|
} |
|
459
|
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
elsif (@_==1) { |
|
461
|
3809
|
|
|
|
|
4299
|
my $new_value = shift; |
|
462
|
3809
|
|
|
|
|
6428
|
my $scaled_value = $self->scale($new_value); |
|
463
|
3809
|
50
|
|
|
|
5529
|
$self->fh->print(pack('C*',$scaled_value)) or die "Write failed: $!"; |
|
464
|
3809
|
50
|
66
|
|
|
34923
|
$self->{end} = $position if !exists $self->{end} || $self->{end} < $position; |
|
465
|
3809
|
|
|
|
|
7451
|
return $new_value; |
|
466
|
|
|
|
|
|
|
} |
|
467
|
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
else { # retrieving data |
|
469
|
0
|
|
|
|
|
0
|
my $buffer; |
|
470
|
0
|
0
|
|
|
|
0
|
$self->fh->read($buffer,1) or die "Read failed: $!"; |
|
471
|
0
|
|
|
|
|
0
|
my $scaled_value = unpack('C*',$buffer); |
|
472
|
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
# missing data, so look back at most span values to get it |
|
474
|
0
|
0
|
0
|
|
|
0
|
if ($scaled_value == 0 && (my $span = $self->span) > 1) { |
|
475
|
0
|
|
|
|
|
0
|
$offset = $self->_calculate_offset($position-$span+1); |
|
476
|
0
|
0
|
|
|
|
0
|
$offset >= HEADER_LEN or die "Tried to retrieve data from before start position"; |
|
477
|
0
|
0
|
|
|
|
0
|
$self->seek($offset) or die "Seek failed: $!"; |
|
478
|
|
|
|
|
|
|
|
|
479
|
0
|
|
|
|
|
0
|
$self->fh->read($buffer,$span/$self->step); |
|
480
|
0
|
|
|
|
|
0
|
for (my $i=length($buffer)-2;$i>=0;$i--) { |
|
481
|
0
|
|
|
|
|
0
|
my $val = substr($buffer,$i,1); |
|
482
|
0
|
0
|
|
|
|
0
|
next if $val eq "\0"; |
|
483
|
0
|
|
|
|
|
0
|
$scaled_value = unpack('C*',$val); |
|
484
|
0
|
|
|
|
|
0
|
last; |
|
485
|
|
|
|
|
|
|
} |
|
486
|
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
} |
|
488
|
0
|
|
|
|
|
0
|
return $self->unscale($scaled_value); |
|
489
|
|
|
|
|
|
|
} |
|
490
|
|
|
|
|
|
|
} |
|
491
|
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
sub _calculate_offset { |
|
493
|
3813
|
|
|
3813
|
|
3591
|
my $self = shift; |
|
494
|
3813
|
|
|
|
|
3019
|
my $position = shift; |
|
495
|
3813
|
|
|
|
|
5983
|
my $step = $self->step; |
|
496
|
3813
|
|
|
|
|
5451
|
my $start = $self->start; |
|
497
|
3813
|
|
|
|
|
8629
|
return HEADER_LEN + int(($position-$start)/$step); |
|
498
|
|
|
|
|
|
|
} |
|
499
|
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
sub set_values { |
|
501
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
502
|
0
|
0
|
0
|
|
|
0
|
croak "usage: \$wig->set_values(\$position => \@values)" |
|
503
|
|
|
|
|
|
|
unless @_ == 2 and ref $_[1] eq 'ARRAY'; |
|
504
|
0
|
|
|
|
|
0
|
$self->values(@_); |
|
505
|
|
|
|
|
|
|
} |
|
506
|
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
# read or write a series of values |
|
508
|
|
|
|
|
|
|
sub values { |
|
509
|
4
|
|
|
4
|
1
|
6
|
my $self = shift; |
|
510
|
4
|
|
|
|
|
3
|
my $start = shift; |
|
511
|
4
|
50
|
33
|
|
|
15
|
if (ref $_[0] && ref $_[0] eq 'ARRAY') { |
|
512
|
0
|
|
|
|
|
0
|
$self->_store_values($start,@_); |
|
513
|
|
|
|
|
|
|
} else { |
|
514
|
4
|
|
|
|
|
14
|
$self->_retrieve_values($start,@_); |
|
515
|
|
|
|
|
|
|
} |
|
516
|
|
|
|
|
|
|
} |
|
517
|
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
sub export_to_wif64 { |
|
519
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
520
|
0
|
|
|
|
|
0
|
my $data = $self->export_to_wif(@_); |
|
521
|
0
|
0
|
|
|
|
0
|
eval "require MIME::Base64" |
|
522
|
|
|
|
|
|
|
unless MIME::Base64->can('encode_base64'); |
|
523
|
0
|
|
|
|
|
0
|
return MIME::Base64::encode_base64($data); |
|
524
|
|
|
|
|
|
|
} |
|
525
|
|
|
|
|
|
|
sub import_from_wif64 { |
|
526
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
527
|
0
|
|
|
|
|
0
|
my $data = shift; |
|
528
|
|
|
|
|
|
|
|
|
529
|
0
|
0
|
|
|
|
0
|
eval "require MIME::Base64" |
|
530
|
|
|
|
|
|
|
unless MIME::Base64->can('decode_base64'); |
|
531
|
0
|
|
|
|
|
0
|
return $self->import_from_wif(MIME::Base64::decode_base64($data)); |
|
532
|
|
|
|
|
|
|
} |
|
533
|
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
# subregion in "wiggle interchange format" (wif) |
|
535
|
|
|
|
|
|
|
sub export_to_wif { |
|
536
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
537
|
0
|
|
|
|
|
0
|
my ($start,$end) = @_; |
|
538
|
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
# get the 256 byte header |
|
540
|
0
|
|
|
|
|
0
|
my $data = $self->_generate_header($self->{options}); |
|
541
|
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
# add the range to the data (8 bytes overhead) |
|
543
|
0
|
|
|
|
|
0
|
$data .= pack("L",$start); |
|
544
|
0
|
|
|
|
|
0
|
$data .= pack("L",$end); |
|
545
|
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
# add the packed data for this range |
|
547
|
0
|
|
|
|
|
0
|
$data .= $self->_retrieve_packed_range($start,$end-$start+1,$self->step); |
|
548
|
0
|
|
|
|
|
0
|
return $data; |
|
549
|
|
|
|
|
|
|
} |
|
550
|
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
sub export_to_bedgraph { |
|
552
|
1
|
|
|
1
|
0
|
613
|
my $self = shift; |
|
553
|
1
|
|
|
|
|
3
|
my ($start,$end,$fh) = @_; |
|
554
|
1
|
|
|
|
|
2
|
my $max_range = 100_000; |
|
555
|
|
|
|
|
|
|
|
|
556
|
1
|
|
50
|
|
|
4
|
$start ||= 1; |
|
557
|
1
|
|
33
|
|
|
3
|
$end ||= $self->end; |
|
558
|
|
|
|
|
|
|
|
|
559
|
1
|
|
|
|
|
2
|
my $lines; |
|
560
|
1
|
|
|
|
|
4
|
for (my $s=$start;$s<$end;$s+=$max_range) { |
|
561
|
1
|
|
|
|
|
3
|
my $e = $s + $max_range - 1; |
|
562
|
1
|
50
|
|
|
|
4
|
$e = $end if $e > $end; |
|
563
|
1
|
|
|
|
|
4
|
my $b = $self->values($s,$e); |
|
564
|
1
|
|
|
|
|
8
|
$lines .= $self->_bedgraph_lines($s,$b,$fh); |
|
565
|
|
|
|
|
|
|
} |
|
566
|
|
|
|
|
|
|
|
|
567
|
1
|
|
|
|
|
8
|
return $lines; |
|
568
|
|
|
|
|
|
|
} |
|
569
|
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
sub _bedgraph_lines { |
|
571
|
1
|
|
|
1
|
|
3
|
my $self = shift; |
|
572
|
1
|
|
|
|
|
3
|
my ($start,$values,$fh) = @_; |
|
573
|
1
|
|
|
|
|
6
|
my $seqid = $self->seqid; |
|
574
|
1
|
|
|
|
|
1
|
my $result; |
|
575
|
|
|
|
|
|
|
|
|
576
|
1
|
|
|
|
|
2
|
my ($last_val,$last_start,$end); |
|
577
|
1
|
|
|
|
|
2
|
$last_start = $start-1; # 0 based indexing |
|
578
|
1
|
|
|
|
|
7
|
for (my $i=0;$i<@$values;$i++) { |
|
579
|
5000
|
|
|
|
|
3603
|
my $v = $values->[$i]; |
|
580
|
|
|
|
|
|
|
|
|
581
|
5000
|
100
|
|
|
|
6165
|
if (!defined $v) { |
|
582
|
2168
|
100
|
|
|
|
2587
|
if (defined $last_val) { |
|
583
|
56
|
|
|
|
|
103
|
$result .= $self->_append_or_print_bedgraph($fh,$seqid,$last_start,$start+$i-1,$last_val); |
|
584
|
56
|
|
|
|
|
61
|
undef $last_val; |
|
585
|
|
|
|
|
|
|
} |
|
586
|
2168
|
|
|
|
|
1418
|
$last_start = $start+$i; |
|
587
|
2168
|
|
|
|
|
3045
|
next; |
|
588
|
|
|
|
|
|
|
} |
|
589
|
|
|
|
|
|
|
|
|
590
|
2832
|
50
|
66
|
|
|
7524
|
if (defined $last_val && $last_val != $v) { |
|
591
|
0
|
|
|
|
|
0
|
$result .= $self->_append_or_print_bedgraph($fh,$seqid,$last_start,$start+$i-1,$last_val); |
|
592
|
0
|
|
|
|
|
0
|
$last_start = $start+$i-1; |
|
593
|
|
|
|
|
|
|
} |
|
594
|
|
|
|
|
|
|
|
|
595
|
2832
|
|
|
|
|
1856
|
$last_val = $v; |
|
596
|
2832
|
|
|
|
|
4046
|
$end = $start+$i-1; |
|
597
|
|
|
|
|
|
|
} |
|
598
|
1
|
50
|
|
|
|
12
|
$result .= $self->_append_or_print_bedgraph($fh,$seqid,$last_start,$end+1,$last_val) if $last_val; |
|
599
|
1
|
|
|
|
|
85
|
return $result; |
|
600
|
|
|
|
|
|
|
} |
|
601
|
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
sub _append_or_print_bedgraph { |
|
603
|
57
|
|
|
57
|
|
49
|
my $self = shift; |
|
604
|
57
|
|
|
|
|
64
|
my ($fh,$seqid,$start,$end,$val) = @_; |
|
605
|
57
|
|
|
|
|
309
|
my $data = join("\t",$seqid,$start,$end,sprintf("%.2f",$val))."\n"; |
|
606
|
57
|
50
|
|
|
|
79
|
if ($fh) { |
|
607
|
0
|
|
|
|
|
0
|
print $fh $data; |
|
608
|
0
|
|
|
|
|
0
|
return ''; |
|
609
|
|
|
|
|
|
|
} else { |
|
610
|
57
|
|
|
|
|
103
|
return $data; |
|
611
|
|
|
|
|
|
|
} |
|
612
|
|
|
|
|
|
|
} |
|
613
|
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
sub import_from_wif { |
|
615
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
616
|
0
|
|
|
|
|
0
|
my $wifdata = shift; |
|
617
|
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
# BUG: should check that header is compatible |
|
619
|
0
|
|
|
|
|
0
|
my $header = substr($wifdata,0,HEADER_LEN); |
|
620
|
0
|
|
|
|
|
0
|
my $start = unpack('L',substr($wifdata,HEADER_LEN, 4)); |
|
621
|
0
|
|
|
|
|
0
|
my $end = unpack('L',substr($wifdata,HEADER_LEN+4,4)); |
|
622
|
|
|
|
|
|
|
|
|
623
|
0
|
|
|
|
|
0
|
my $options = $self->_parse_header($header); |
|
624
|
0
|
|
0
|
|
|
0
|
my $stored_options = eval {$self->_readoptions} || {}; |
|
625
|
0
|
|
|
|
|
0
|
my %merged_options = (%$stored_options,%$options); |
|
626
|
0
|
|
|
|
|
0
|
$self->{options} = \%merged_options; |
|
627
|
0
|
|
|
|
|
0
|
$self->{dirty}++; |
|
628
|
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
# write the data |
|
630
|
0
|
|
|
|
|
0
|
$self->seek($self->_calculate_offset($start)); |
|
631
|
0
|
0
|
|
|
|
0
|
$self->fh->print(substr($wifdata,HEADER_LEN+8)) or die "write failed: $!"; |
|
632
|
0
|
0
|
0
|
|
|
0
|
$self->{end} = $end if !defined $self->{end} or $self->{end} < $end; |
|
633
|
|
|
|
|
|
|
} |
|
634
|
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
sub _retrieve_values { |
|
636
|
4
|
|
|
4
|
|
3
|
my $self = shift; |
|
637
|
4
|
|
|
|
|
7
|
my ($start,$end,$samples) = @_; |
|
638
|
|
|
|
|
|
|
|
|
639
|
4
|
|
|
|
|
6
|
my $data_start = $self->start; |
|
640
|
4
|
|
|
|
|
10
|
my $step = $self->step; |
|
641
|
4
|
|
|
|
|
9
|
my $span = $self->span; |
|
642
|
|
|
|
|
|
|
|
|
643
|
4
|
50
|
|
|
|
11
|
croak "Value of start position ($start) is less than data start of $data_start" |
|
644
|
|
|
|
|
|
|
unless $start >= $data_start; |
|
645
|
4
|
50
|
|
|
|
9
|
croak "Value of end position ($end) is greater than data end of ",$self->end+$span, |
|
646
|
|
|
|
|
|
|
unless $end <= $self->end + $span; |
|
647
|
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
# generate list of positions to sample from |
|
649
|
4
|
|
|
|
|
7
|
my $length = $end-$start+1; |
|
650
|
4
|
|
33
|
|
|
14
|
$samples ||= $length; |
|
651
|
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
# warn "samples = $samples, length=$length, span=$span, step=$step"; |
|
653
|
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
# if the length is grossly greater than the samples, then we won't even |
|
655
|
|
|
|
|
|
|
# bother fetching all the data, but just sample into the disk file |
|
656
|
4
|
50
|
33
|
|
|
13
|
if ($length/$samples > 100 && $step == 1) { |
|
657
|
0
|
|
|
|
|
0
|
my @result; |
|
658
|
|
|
|
|
|
|
# my $window = 20*($span/$step); |
|
659
|
0
|
|
|
|
|
0
|
my $interval = $length/$samples; |
|
660
|
|
|
|
|
|
|
# my $window = 100*$interval/$span; |
|
661
|
0
|
|
|
|
|
0
|
my $window = $interval/2; |
|
662
|
|
|
|
|
|
|
# warn "window = $window, interval = $interval"; |
|
663
|
0
|
|
|
|
|
0
|
for (my $i=0;$i<$samples;$i++) { |
|
664
|
0
|
|
|
|
|
0
|
my $packed_data = $self->_retrieve_packed_range(int($start+$i*$interval-$window), |
|
665
|
|
|
|
|
|
|
int($window), |
|
666
|
|
|
|
|
|
|
$step); |
|
667
|
0
|
|
|
|
|
0
|
my @bases= grep {$_} unpack('C*',$packed_data); |
|
|
0
|
|
|
|
|
0
|
|
|
668
|
0
|
0
|
|
|
|
0
|
if (@bases) { |
|
669
|
0
|
|
|
|
|
0
|
local $^W = 0; |
|
670
|
0
|
|
|
|
|
0
|
my $arry = $self->unscale(\@bases); |
|
671
|
0
|
|
|
|
|
0
|
my $n = @$arry; |
|
672
|
0
|
|
|
|
|
0
|
my $total = 0; |
|
673
|
0
|
|
|
|
|
0
|
$total += $_ foreach @$arry; |
|
674
|
0
|
|
|
|
|
0
|
my $mean = $total/$n; |
|
675
|
0
|
|
|
|
|
0
|
my $max; |
|
676
|
0
|
0
|
0
|
|
|
0
|
for (@$arry) { $max = $_ if !defined $max || $max < $_ } |
|
|
0
|
|
|
|
|
0
|
|
|
677
|
|
|
|
|
|
|
# warn $start+$i*$interval,': ',join(',',map {int($_)} @$arry), |
|
678
|
|
|
|
|
|
|
# " mean = $mean, max = $max"; |
|
679
|
|
|
|
|
|
|
# push @result,$mean; |
|
680
|
0
|
|
|
|
|
0
|
push @result,$max; |
|
681
|
|
|
|
|
|
|
} else { |
|
682
|
0
|
|
|
|
|
0
|
push @result,0; |
|
683
|
|
|
|
|
|
|
} |
|
684
|
|
|
|
|
|
|
} |
|
685
|
0
|
|
|
|
|
0
|
return \@result; |
|
686
|
|
|
|
|
|
|
} |
|
687
|
|
|
|
|
|
|
|
|
688
|
4
|
|
|
|
|
8
|
my $packed_data = $self->_retrieve_packed_range($start,$length,$step); |
|
689
|
|
|
|
|
|
|
|
|
690
|
4
|
|
|
|
|
5
|
my @bases; |
|
691
|
4
|
|
|
|
|
44
|
$#bases = $length-1; |
|
692
|
|
|
|
|
|
|
|
|
693
|
4
|
50
|
|
|
|
8
|
if ($step == $span) { |
|
694
|
|
|
|
|
|
|
# in this case, we do not have any partially-empty |
|
695
|
|
|
|
|
|
|
# steps, so can operate on the step-length data structure |
|
696
|
|
|
|
|
|
|
# directly |
|
697
|
0
|
|
|
|
|
0
|
@bases = unpack('C*',$packed_data); |
|
698
|
|
|
|
|
|
|
} |
|
699
|
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
else { |
|
701
|
|
|
|
|
|
|
# In this case some regions may have partially missing data, |
|
702
|
|
|
|
|
|
|
# so we create an array equal to the length of the requested region, |
|
703
|
|
|
|
|
|
|
# fill it in, and then sample it |
|
704
|
4
|
|
|
|
|
11
|
for (my $i=0; $i
|
|
705
|
5202
|
|
|
|
|
3698
|
my $index = $i * $step; |
|
706
|
5202
|
|
|
|
|
4845
|
my $value = unpack('C',substr($packed_data,$i,1)); |
|
707
|
5202
|
100
|
|
|
|
10143
|
next unless $value; # ignore 0 values |
|
708
|
61
|
|
|
|
|
628
|
@bases[$index..$index+$span-1] = ($value) x $span; |
|
709
|
|
|
|
|
|
|
} |
|
710
|
4
|
|
|
|
|
14
|
$#bases = $length-1; |
|
711
|
|
|
|
|
|
|
} |
|
712
|
|
|
|
|
|
|
|
|
713
|
4
|
|
|
|
|
17
|
my $r = $self->unscale(\@bases); |
|
714
|
4
|
|
|
|
|
15
|
$r = $self->sample($r,$samples); |
|
715
|
4
|
50
|
33
|
|
|
108
|
$r = $self->smooth($r,$self->window * $samples/@bases) |
|
716
|
|
|
|
|
|
|
if defined $self->window && $self->window>1; |
|
717
|
4
|
|
|
|
|
84
|
return $r; |
|
718
|
|
|
|
|
|
|
} |
|
719
|
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
sub _retrieve_packed_range { |
|
721
|
4
|
|
|
4
|
|
5
|
my $self = shift; |
|
722
|
4
|
|
|
|
|
5
|
my ($start,$length,$step) = @_; |
|
723
|
4
|
|
|
|
|
7
|
my $span = $self->span; |
|
724
|
|
|
|
|
|
|
|
|
725
|
4
|
|
|
|
|
7
|
my $offset = $self->_calculate_offset($start); |
|
726
|
|
|
|
|
|
|
|
|
727
|
4
|
|
|
|
|
8
|
$self->seek($offset); |
|
728
|
4
|
|
|
|
|
139
|
my $packed_data; |
|
729
|
4
|
|
|
|
|
8
|
$self->fh->read($packed_data,$length/$step); |
|
730
|
|
|
|
|
|
|
|
|
731
|
|
|
|
|
|
|
# pad data up to required amount |
|
732
|
4
|
50
|
|
|
|
63
|
$packed_data .= "\0" x ($length/$step-length($packed_data)) |
|
733
|
|
|
|
|
|
|
if length $packed_data < $length/$step; |
|
734
|
4
|
|
|
|
|
9
|
return $packed_data; |
|
735
|
|
|
|
|
|
|
} |
|
736
|
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
sub sample { |
|
739
|
4
|
|
|
4
|
0
|
5
|
my $self = shift; |
|
740
|
4
|
|
|
|
|
10
|
my ($values,$samples) = @_; |
|
741
|
4
|
|
|
|
|
5
|
my $length = @$values; |
|
742
|
4
|
|
|
|
|
7
|
my $window_size = $length/$samples; |
|
743
|
|
|
|
|
|
|
|
|
744
|
4
|
|
|
|
|
3
|
my @samples; |
|
745
|
4
|
|
|
|
|
43
|
$#samples = $samples-1; |
|
746
|
|
|
|
|
|
|
|
|
747
|
4
|
50
|
|
|
|
11
|
if ($window_size < 2) { # no data smoothing needed |
|
748
|
4
|
|
|
|
|
56
|
@samples = map { $values->[$_*$window_size] } (0..$samples-1); |
|
|
5202
|
|
|
|
|
4985
|
|
|
749
|
|
|
|
|
|
|
} |
|
750
|
|
|
|
|
|
|
else { |
|
751
|
0
|
|
|
|
|
0
|
my $smoothsub = $self->smoothsub; |
|
752
|
0
|
|
|
|
|
0
|
for (my $i=0; $i<$samples; $i++) { |
|
753
|
0
|
|
|
|
|
0
|
my $start = $i * $window_size; |
|
754
|
0
|
|
|
|
|
0
|
my $end = $start + $window_size - 1; |
|
755
|
0
|
|
|
|
|
0
|
my @window = @{$values}[$start..$end]; |
|
|
0
|
|
|
|
|
0
|
|
|
756
|
|
|
|
|
|
|
|
|
757
|
0
|
|
|
|
|
0
|
my $value = $smoothsub->(\@window); |
|
758
|
0
|
|
|
|
|
0
|
$samples[$i] = $value; |
|
759
|
|
|
|
|
|
|
} |
|
760
|
|
|
|
|
|
|
} |
|
761
|
|
|
|
|
|
|
|
|
762
|
4
|
|
|
|
|
271
|
return \@samples; |
|
763
|
|
|
|
|
|
|
} |
|
764
|
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
sub smoothsub { |
|
766
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
767
|
|
|
|
|
|
|
|
|
768
|
0
|
|
|
|
|
0
|
my $smoothing = $self->smoothing; |
|
769
|
0
|
0
|
|
|
|
0
|
my $smoothsub = $smoothing eq 'mean' ? \&sample_mean |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
:$smoothing eq 'max' ? \&sample_max |
|
771
|
|
|
|
|
|
|
:$smoothing eq 'min' ? \&sample_min |
|
772
|
|
|
|
|
|
|
:$smoothing eq 'none' ? \&sample_center |
|
773
|
|
|
|
|
|
|
:croak("invalid smoothing type '$smoothing'"); |
|
774
|
0
|
|
|
|
|
0
|
return $smoothsub; |
|
775
|
|
|
|
|
|
|
} |
|
776
|
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
sub smooth { |
|
778
|
0
|
|
|
0
|
0
|
0
|
my ($self,$data,$window) = @_; |
|
779
|
|
|
|
|
|
|
|
|
780
|
0
|
|
|
|
|
0
|
my $smoothing = $self->smoothing; |
|
781
|
0
|
|
0
|
|
|
0
|
$window ||= $self->window; |
|
782
|
|
|
|
|
|
|
|
|
783
|
0
|
0
|
0
|
|
|
0
|
return $data if $smoothing eq 'none' || $window < 2; |
|
784
|
|
|
|
|
|
|
|
|
785
|
0
|
|
|
|
|
0
|
my @data = @$data; |
|
786
|
0
|
|
|
|
|
0
|
my $smoother = $self->smoothsub; |
|
787
|
0
|
0
|
|
|
|
0
|
$window++ unless $window % 2; |
|
788
|
0
|
|
|
|
|
0
|
my $offset = int($window/2); |
|
789
|
|
|
|
|
|
|
|
|
790
|
0
|
|
|
|
|
0
|
for (my $i=$offset; $i<@$data-$offset; $i++) { |
|
791
|
0
|
|
|
|
|
0
|
my $start = $i - $offset; |
|
792
|
0
|
|
|
|
|
0
|
my $end = $i + $offset; |
|
793
|
0
|
|
|
|
|
0
|
my @subset = @data[$start..$end]; |
|
794
|
0
|
|
|
|
|
0
|
$data->[$i] = $smoother->(\@subset); |
|
795
|
|
|
|
|
|
|
} |
|
796
|
|
|
|
|
|
|
|
|
797
|
0
|
|
|
|
|
0
|
return $data; |
|
798
|
|
|
|
|
|
|
} |
|
799
|
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
sub window { |
|
801
|
4
|
|
|
4
|
0
|
5
|
my $self = shift; |
|
802
|
4
|
|
|
|
|
8
|
my $d = $self->{window}; |
|
803
|
4
|
50
|
|
|
|
12
|
$self->{window} = shift if @_; |
|
804
|
4
|
|
|
|
|
15
|
$d; |
|
805
|
|
|
|
|
|
|
} |
|
806
|
|
|
|
|
|
|
|
|
807
|
|
|
|
|
|
|
sub sample_mean { |
|
808
|
0
|
|
|
0
|
0
|
0
|
my $values = shift; |
|
809
|
0
|
|
|
|
|
0
|
my ($total,$items); |
|
810
|
0
|
|
|
|
|
0
|
for my $v (@$values) { |
|
811
|
0
|
0
|
|
|
|
0
|
next unless defined $v; |
|
812
|
0
|
|
|
|
|
0
|
$items++; |
|
813
|
0
|
|
|
|
|
0
|
$total+=$v; |
|
814
|
|
|
|
|
|
|
} |
|
815
|
0
|
0
|
|
|
|
0
|
return $items ? $total/$items : undef; |
|
816
|
|
|
|
|
|
|
} |
|
817
|
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
sub sample_max { |
|
819
|
0
|
|
|
0
|
0
|
0
|
my $values = shift; |
|
820
|
0
|
|
|
|
|
0
|
my $max; |
|
821
|
0
|
|
|
|
|
0
|
for my $v (@$values) { |
|
822
|
0
|
0
|
|
|
|
0
|
next unless defined $v; |
|
823
|
0
|
0
|
0
|
|
|
0
|
$max = $v if !defined $max or $max < $v; |
|
824
|
|
|
|
|
|
|
} |
|
825
|
0
|
|
|
|
|
0
|
return $max; |
|
826
|
|
|
|
|
|
|
} |
|
827
|
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
sub sample_min { |
|
829
|
0
|
|
|
0
|
0
|
0
|
my $values = shift; |
|
830
|
0
|
|
|
|
|
0
|
my $min; |
|
831
|
0
|
|
|
|
|
0
|
for my $v (@$values) { |
|
832
|
0
|
0
|
|
|
|
0
|
next unless defined $v; |
|
833
|
0
|
0
|
0
|
|
|
0
|
$min = $v if !defined $min or $min > $v; |
|
834
|
|
|
|
|
|
|
} |
|
835
|
0
|
|
|
|
|
0
|
return $min; |
|
836
|
|
|
|
|
|
|
} |
|
837
|
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
sub sample_center { |
|
839
|
0
|
|
|
0
|
0
|
0
|
my $values = shift; |
|
840
|
0
|
|
|
|
|
0
|
return $values->[@$values/2]; |
|
841
|
|
|
|
|
|
|
} |
|
842
|
|
|
|
|
|
|
|
|
843
|
|
|
|
|
|
|
sub _store_values { |
|
844
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
|
845
|
0
|
|
|
|
|
0
|
my ($position,$data) = @_; |
|
846
|
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
# where does data start |
|
848
|
0
|
|
|
|
|
0
|
my $offset = $self->_calculate_offset($position); |
|
849
|
0
|
|
|
|
|
0
|
my $fh = $self->fh; |
|
850
|
0
|
|
|
|
|
0
|
my $step = $self->step; |
|
851
|
|
|
|
|
|
|
|
|
852
|
0
|
|
|
|
|
0
|
my $scaled = $self->scale($data); |
|
853
|
|
|
|
|
|
|
|
|
854
|
0
|
|
|
|
|
0
|
$self->seek($offset); |
|
855
|
0
|
|
|
|
|
0
|
my $packed_data = pack('C*',@$scaled); |
|
856
|
0
|
0
|
|
|
|
0
|
$fh->print($packed_data) or die "Write failed: $!"; |
|
857
|
|
|
|
|
|
|
|
|
858
|
0
|
|
|
|
|
0
|
my $new_end = $position+@$data-1; |
|
859
|
0
|
0
|
0
|
|
|
0
|
$self->{end} = $new_end if !exists $self->{end} || $self->{end} < $new_end; |
|
860
|
|
|
|
|
|
|
} |
|
861
|
|
|
|
|
|
|
|
|
862
|
|
|
|
|
|
|
# zero means "no data" |
|
863
|
|
|
|
|
|
|
# everything else is scaled from 1-255 |
|
864
|
|
|
|
|
|
|
sub scale { |
|
865
|
3809
|
|
|
3809
|
0
|
3427
|
my $self = shift; |
|
866
|
3809
|
|
|
|
|
3328
|
my $values = shift; |
|
867
|
3809
|
|
|
|
|
4838
|
my $scale = $self->_get_scale; |
|
868
|
3809
|
|
|
|
|
4888
|
my $min = $self->{options}{min}; |
|
869
|
3809
|
50
|
33
|
|
|
7720
|
if (ref $values && ref $values eq 'ARRAY') { |
|
870
|
|
|
|
|
|
|
my @return = map { |
|
871
|
0
|
|
|
|
|
0
|
my $i = ($_ - $min)/$scale; |
|
|
0
|
|
|
|
|
0
|
|
|
872
|
0
|
|
|
|
|
0
|
my $v = 1 + int($i+0.5*($i<=>0)); # avoid call to round() |
|
873
|
0
|
0
|
|
|
|
0
|
$v = 1 if $v < 1; |
|
874
|
0
|
0
|
|
|
|
0
|
$v = 255 if $v > 255; |
|
875
|
0
|
|
|
|
|
0
|
$v; |
|
876
|
|
|
|
|
|
|
} @$values; |
|
877
|
0
|
|
|
|
|
0
|
return \@return; |
|
878
|
|
|
|
|
|
|
} else { |
|
879
|
3809
|
|
|
|
|
9771
|
my $v = 1 + round (($values - $min)/$scale); |
|
880
|
3809
|
50
|
|
|
|
6490
|
$v = 1 if $v < 1; |
|
881
|
3809
|
50
|
|
|
|
5453
|
$v = 255 if $v > 255; |
|
882
|
3809
|
|
|
|
|
5958
|
return $v; |
|
883
|
|
|
|
|
|
|
} |
|
884
|
|
|
|
|
|
|
} |
|
885
|
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
sub unscale { |
|
887
|
4
|
|
|
4
|
0
|
5
|
my $self = shift; |
|
888
|
4
|
|
|
|
|
4
|
my $values = shift; |
|
889
|
4
|
|
|
|
|
9
|
my $scale = $self->_get_scale; |
|
890
|
4
|
|
|
|
|
8
|
my $min = $self->{options}{min}; |
|
891
|
|
|
|
|
|
|
|
|
892
|
4
|
50
|
33
|
|
|
28
|
if (ref $values && ref $values eq 'ARRAY') { |
|
893
|
4
|
100
|
|
|
|
133
|
my @return = map {$_ ? (($_-1) * $scale + $min) : undef} @$values; |
|
|
5202
|
|
|
|
|
6152
|
|
|
894
|
4
|
|
|
|
|
110
|
return \@return; |
|
895
|
|
|
|
|
|
|
} else { |
|
896
|
0
|
0
|
|
|
|
0
|
return $values ? ($values-1) * $scale + $min : undef; |
|
897
|
|
|
|
|
|
|
} |
|
898
|
|
|
|
|
|
|
} |
|
899
|
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
sub _get_scale { |
|
901
|
3813
|
|
|
3813
|
|
3388
|
my $self = shift; |
|
902
|
3813
|
100
|
|
|
|
6844
|
unless ($self->{scale}) { |
|
903
|
2
|
|
|
|
|
4
|
my $min = $self->{options}{min}; |
|
904
|
2
|
|
|
|
|
4
|
my $max = $self->{options}{max}; |
|
905
|
2
|
|
50
|
|
|
7
|
my $range = $max - $min || 0.001; # can't be zero! |
|
906
|
2
|
|
|
|
|
5
|
$self->{scale} = $range/254; |
|
907
|
|
|
|
|
|
|
} |
|
908
|
3813
|
|
|
|
|
5149
|
return $self->{scale}; |
|
909
|
|
|
|
|
|
|
} |
|
910
|
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
sub round { |
|
912
|
3809
|
|
|
3809
|
0
|
7714
|
return int($_[0]+0.5*($_[0]<=>0)); |
|
913
|
|
|
|
|
|
|
} |
|
914
|
|
|
|
|
|
|
|
|
915
|
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
1; |
|
917
|
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
__END__ |