line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::Graphics::Wiggle; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
Bio::Graphics::Wiggle -- Binary storage for dense genomic features |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
# all positions are 1-based |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
my $wig = Bio::Graphics::Wiggle->new('./test.wig', |
12
|
|
|
|
|
|
|
$writeable, |
13
|
|
|
|
|
|
|
{ seqid => $seqid, |
14
|
|
|
|
|
|
|
start => $start, |
15
|
|
|
|
|
|
|
step => $step, |
16
|
|
|
|
|
|
|
min => $min, |
17
|
|
|
|
|
|
|
max => $max }); |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
$wig->erase; |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
my $seqid = $wig->seqid('new_id'); |
22
|
|
|
|
|
|
|
my $max = $wig->max($new_max); |
23
|
|
|
|
|
|
|
my $min = $wig->min($new_min); |
24
|
|
|
|
|
|
|
my $step = $wig->step($new_step); # data stored at modulus step == 0; all else is blank |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
$wig->set_value($position => $value); # store $value at position |
27
|
|
|
|
|
|
|
$wig->set_values($position => \@values); # store array of values at position |
28
|
|
|
|
|
|
|
$wig->set_range($start=>$end,$value); # store the same $value from $start to $end |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
my $value = $wig->value($position); # fetch value from position |
31
|
|
|
|
|
|
|
my $values = $wig->values($start,$end); # fetch range of data from $start to $end |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
$wig->window(100); # sample window size |
34
|
|
|
|
|
|
|
$wig->smoothing('mean'); # when sampling, compute the mean value across sample window |
35
|
|
|
|
|
|
|
my $values = $wig->values($start,$end,$samples); # fetch $samples data points from $start to $end |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=head1 DESCRIPTION |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
IMPORTANT NOTE: This implementation is still not right. See |
41
|
|
|
|
|
|
|
http://genomewiki.ucsc.edu/index.php/Wiggle for a more space-efficient |
42
|
|
|
|
|
|
|
implementation. |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
This module stores "wiggle" style quantitative genome data for display |
45
|
|
|
|
|
|
|
in a genome browser application. The data for each chromosome (or |
46
|
|
|
|
|
|
|
contig, or other reference sequence) is stored in a single file in the |
47
|
|
|
|
|
|
|
following format: |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
256 byte header |
50
|
|
|
|
|
|
|
50 bytes seqid, zero-terminated C string |
51
|
|
|
|
|
|
|
4 byte long integer, value of "step" (explained later) |
52
|
|
|
|
|
|
|
4 byte perl native float, the "min" value |
53
|
|
|
|
|
|
|
4 byte perl native float, the "max" value |
54
|
|
|
|
|
|
|
4 byte long integer, value of "span" |
55
|
|
|
|
|
|
|
4 byte perl native float, the mean |
56
|
|
|
|
|
|
|
4 byte perl native float, the standard deviation |
57
|
|
|
|
|
|
|
2 byte unsigned short, the version number (currently version 0) |
58
|
|
|
|
|
|
|
4 byte long integer, sequence start position (in 0-based coordinates) |
59
|
|
|
|
|
|
|
null padding to 256 bytes for future use |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
The remainder of the file consists of 8-bit unsigned scaled integer |
62
|
|
|
|
|
|
|
values. This means that all quantitative data will be scaled to 8-bit |
63
|
|
|
|
|
|
|
precision! |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
For a convenient method of creating Wiggle files from UCSC-type WIG |
66
|
|
|
|
|
|
|
input and creating GFF3 output, please see |
67
|
|
|
|
|
|
|
L<Bio::Graphics::Wiggle::Loader>. |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=head1 METHODS |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=head2 Constructor and Accessors |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=over 4 |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=item $wig = Bio::Graphics::Wiggle->new($filename,$writeable,{options}) |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
Open/create a wiggle-format data file: |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
$filename -- path to the file to open/create |
80
|
|
|
|
|
|
|
$writeable -- boolean value indicating whether file is |
81
|
|
|
|
|
|
|
writeable. Missing files will only be created |
82
|
|
|
|
|
|
|
if $writeable set to a true value. If path is |
83
|
|
|
|
|
|
|
empty (undef or empty string) and writeable is true, |
84
|
|
|
|
|
|
|
new() will create a temporary file that will be |
85
|
|
|
|
|
|
|
deleted when the object goes out of scope. |
86
|
|
|
|
|
|
|
{options} -- hash ref of the following named options, only valid |
87
|
|
|
|
|
|
|
when creating a new wig file with $writeable true. |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
option name description default |
90
|
|
|
|
|
|
|
----------- ----- ------- |
91
|
|
|
|
|
|
|
seqid name/id of sequence empty name |
92
|
|
|
|
|
|
|
min minimum value of data points 0 |
93
|
|
|
|
|
|
|
max maximum value of data points 255 |
94
|
|
|
|
|
|
|
step interval between data points 1 |
95
|
|
|
|
|
|
|
span width of data points value of "step" |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
The "step" can be used to create sparse files to save space. By |
98
|
|
|
|
|
|
|
default, step is set to 1, in which case a data value will be stored |
99
|
|
|
|
|
|
|
at each base of the sequence. By setting step to 10, then each value |
100
|
|
|
|
|
|
|
is taken to correspond to 10 bp, and the file will be 10x smaller. |
101
|
|
|
|
|
|
|
For example, consider this step 5 data set: |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 |
104
|
|
|
|
|
|
|
20 . . . . 60 . . . . 80 . . . |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
We have stored the values "20" "60" and "80" at positions 1, 6 and 11, |
107
|
|
|
|
|
|
|
respectively. When retrieving this data, it will appear as if |
108
|
|
|
|
|
|
|
positions 1 through 5 have a value of 20, positions 6-10 have a value |
109
|
|
|
|
|
|
|
of 60, and positions 11-14 have a value of 80. In the data file, we |
110
|
|
|
|
|
|
|
store, positions 1,6,and 11 in adjacent bytes. |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
Note that no locking is performed by this module. If you wish to allow |
113
|
|
|
|
|
|
|
multi-user write access to the databases files, you will need to |
114
|
|
|
|
|
|
|
flock() the files yourself. |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=item $seqid = $wig->seqid(['new_id']) |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=item $max = $wig->max([$new_max]) |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=item $min = $wig->min([$new_min]) |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=item $step = $wig->step([$new_step]) |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=item $span = $wig->span([$new_span]) |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=item $mean = $wig->mean([$new_mean]); |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=item $stdev = $wig->stdev([$new_stdev]); |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
These accessors get or set the corresponding values. Setting is only |
131
|
|
|
|
|
|
|
allowed if the file was opened for writing. Note that changing the |
132
|
|
|
|
|
|
|
min, max and step after writing data to the file under another |
133
|
|
|
|
|
|
|
parameter set will produce unexpected (and invalid) results, as the |
134
|
|
|
|
|
|
|
existing data is not automatically updated to be consistent. |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=item $trim = $wig->trim([$new_trim]); |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
The trim method sets the trimming method, which can be used to trim |
139
|
|
|
|
|
|
|
out extreme values. Three methods are currently supported: |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
none No trimming |
142
|
|
|
|
|
|
|
stdev Trim 1 standard deviation above and below mean |
143
|
|
|
|
|
|
|
stdevN Trim N standard deviations above and below the mean |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
In "stdevN", any can be any positive integer. |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=back |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
=head2 Setting Data |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=over 4 |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=item $wig->set_value($position => $value) |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
This method sets the value at $position to $value. If a step>1 is in |
156
|
|
|
|
|
|
|
force, then $position will be rounded down to the nearest multiple of |
157
|
|
|
|
|
|
|
step. |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=item $wig->set_range($start=>$end, $value) |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
This method sets the value of all bases between $start and $end to |
162
|
|
|
|
|
|
|
$value, honoring step. |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=item $sig->set_values($position => \@values) |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
This method writes an array of values into the datababase beginning at |
167
|
|
|
|
|
|
|
$position (or the nearest lower multiple of step). If step>1, then |
168
|
|
|
|
|
|
|
values will be written at step intervals. |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=back |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
=head2 Retrieving Data |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
=over 4 |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=item $value = $wig->value($position) |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
Retrieve the single data item at position $position, or the nearest |
179
|
|
|
|
|
|
|
lower multiple of $step if step>1. |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
=item $values = $wig->values($start=>$end) |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
Retrieve the values in the range $start to $end and return them as an |
184
|
|
|
|
|
|
|
array ref. Note that you will always get an array of size |
185
|
|
|
|
|
|
|
($end-$start+1) even if step>1; the data in between the step intervals |
186
|
|
|
|
|
|
|
will be filled in. |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=item $values = $wig->values($start=>$end,$samples) |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
Retrieve a sampling of the values between $start and $end. Nothing |
191
|
|
|
|
|
|
|
very sophisticated is done here; the code simply returns the number of |
192
|
|
|
|
|
|
|
values indicated in $samples, smoothed according to the smoothing |
193
|
|
|
|
|
|
|
method selected (default to "mean"), then selected at even intervals |
194
|
|
|
|
|
|
|
from the range $start to $end. The return value is an arrayref of |
195
|
|
|
|
|
|
|
exactly $samples values. |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
=item $string = $wig->export_to_wif($start,$end) |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
=item $string = $wig->export_to_wif64($start,$end) |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
Export the region from start to end in the "wif" format. This data can |
202
|
|
|
|
|
|
|
later be imported into another Bio::Graphics::Wiggle object. The first |
203
|
|
|
|
|
|
|
version returns a binary string. The second version returns a base64 |
204
|
|
|
|
|
|
|
encoded version that is safe for ascii-oriented formata such as GFF3 |
205
|
|
|
|
|
|
|
and XML. |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=item $wig->import_from_wif($string) |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=item $wig->import_from_wif64($string) |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
Import a wif format data string into the Bio::Graphics::Wiggle |
212
|
|
|
|
|
|
|
object. The first version expects a binary string. The second version |
213
|
|
|
|
|
|
|
expects a base64 encoded version that is safe for ascii-oriented |
214
|
|
|
|
|
|
|
formata such as GFF3 and XML. |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
=back |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
=cut |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
# read/write genome tiling data, to be compatible with Jim Kent's WIG format |
222
|
1
|
|
|
1
|
|
5
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
34
|
|
223
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
31
|
|
224
|
1
|
|
|
1
|
|
910
|
use IO::File; |
|
1
|
|
|
|
|
1156
|
|
|
1
|
|
|
|
|
153
|
|
225
|
1
|
|
|
1
|
|
7
|
use Carp 'croak','carp','confess'; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
55
|
|
226
|
|
|
|
|
|
|
|
227
|
1
|
|
|
1
|
|
6
|
use constant HEADER_LEN => 256; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
79
|
|
228
|
|
|
|
|
|
|
# seqid, step, min, max, span, mean, stdev, version, start |
229
|
1
|
|
|
1
|
|
5
|
use constant HEADER => '(Z50LFFLFFSL)@'.HEADER_LEN; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
39
|
|
230
|
1
|
|
|
1
|
|
5
|
use constant BODY => 'C'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
69
|
|
231
|
1
|
|
|
1
|
|
10
|
use constant DEBUG => 0; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
52
|
|
232
|
1
|
|
|
1
|
|
5
|
use constant DEFAULT_SMOOTHING => 'mean'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
36
|
|
233
|
1
|
|
|
1
|
|
5
|
use constant VERSION => 0; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
5306
|
|
234
|
|
|
|
|
|
|
our $VERSION = '1.0'; |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
sub new { |
237
|
2
|
|
|
2
|
1
|
6
|
my $class = shift; |
238
|
2
|
|
|
|
|
6
|
my ($path,$write,$options) = @_; |
239
|
2
|
|
50
|
|
|
7
|
$path ||= ''; # to avoid uninit warning |
240
|
2
|
50
|
|
|
|
152
|
my $mode = $write ? -e $path # if file already exists... |
|
|
100
|
|
|
|
|
|
241
|
|
|
|
|
|
|
? '+<' # ...open for read/write |
242
|
|
|
|
|
|
|
: '+>' # ...else clobber and open a new one |
243
|
|
|
|
|
|
|
: '<'; # read only |
244
|
2
|
|
|
|
|
13
|
my $fh = $class->new_fh($path,$mode); |
245
|
2
|
50
|
0
|
|
|
370
|
$fh or die (($path||'temporary file').": $!"); |
246
|
|
|
|
|
|
|
|
247
|
2
|
|
100
|
|
|
10
|
$options ||= {}; |
248
|
|
|
|
|
|
|
|
249
|
2
|
|
33
|
|
|
32
|
my $self = bless {fh => $fh, |
250
|
|
|
|
|
|
|
write => $write, |
251
|
|
|
|
|
|
|
dirty => scalar keys %$options |
252
|
|
|
|
|
|
|
}, ref $class || $class; |
253
|
|
|
|
|
|
|
|
254
|
2
|
|
100
|
|
|
5
|
my $stored_options = eval {$self->_readoptions} || {}; |
255
|
2
|
50
|
|
|
|
49
|
$options->{start}-- if defined $options->{start}; # 1-based ==> 0-based coordinates |
256
|
2
|
|
|
|
|
19
|
my %merged_options = (%$stored_options,%$options); |
257
|
|
|
|
|
|
|
# warn "merged options = ",join ' ',%merged_options; |
258
|
2
|
|
50
|
|
|
14
|
$merged_options{version}||= 0; |
259
|
2
|
|
50
|
|
|
9
|
$merged_options{seqid} ||= 'chrUnknown'; |
260
|
2
|
|
50
|
|
|
7
|
$merged_options{min} ||= 0; |
261
|
2
|
|
50
|
|
|
7
|
$merged_options{max} ||= 255; |
262
|
2
|
|
50
|
|
|
6
|
$merged_options{mean} ||= 128; |
263
|
2
|
|
50
|
|
|
52
|
$merged_options{stdev} ||= 255; |
264
|
2
|
|
100
|
|
|
10
|
$merged_options{trim} ||= 'none'; |
265
|
2
|
|
50
|
|
|
10
|
$merged_options{step} ||= 1; |
266
|
2
|
|
50
|
|
|
12
|
$merged_options{start} ||= 0; |
267
|
2
|
|
33
|
|
|
7
|
$merged_options{span} ||= $merged_options{step}; |
268
|
2
|
|
|
|
|
8
|
$self->{options} = \%merged_options; |
269
|
2
|
100
|
|
|
|
8
|
$self->_do_trim unless $self->trim eq 'none'; |
270
|
2
|
|
|
|
|
12
|
return $self; |
271
|
|
|
|
|
|
|
} |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
sub new_fh { |
274
|
2
|
|
|
2
|
0
|
4
|
my $self = shift; |
275
|
2
|
|
|
|
|
5
|
my ($path,$mode) = @_; |
276
|
2
|
50
|
|
|
|
23
|
return $path ? IO::File->new($path,$mode) |
277
|
|
|
|
|
|
|
: IO::File->new_tmpfile; |
278
|
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
sub end { |
281
|
4
|
|
|
4
|
0
|
5
|
my $self = shift; |
282
|
4
|
100
|
|
|
|
13
|
unless (defined $self->{end}) { |
283
|
1
|
|
|
|
|
4
|
my $size = (stat($self->fh))[7]; |
284
|
1
|
|
|
|
|
4
|
my $data_len = $size - HEADER_LEN(); |
285
|
1
|
50
|
|
|
|
4
|
return unless $data_len>0; # undef end |
286
|
1
|
|
|
|
|
4
|
$self->{end} = ($self->start-1) + $data_len * $self->step; |
287
|
|
|
|
|
|
|
} |
288
|
4
|
|
|
|
|
15
|
return $self->{end}; |
289
|
|
|
|
|
|
|
} |
290
|
|
|
|
|
|
|
|
291
|
2
|
|
|
2
|
|
5565
|
sub DESTROY { shift->write } |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
sub erase { |
294
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
295
|
0
|
|
|
|
|
0
|
$self->fh->truncate(HEADER_LEN); |
296
|
|
|
|
|
|
|
} |
297
|
|
|
|
|
|
|
|
298
|
7631
|
|
|
7631
|
0
|
31428
|
sub fh { shift->{fh} } |
299
|
3813
|
|
|
3813
|
0
|
5916
|
sub seek { shift->fh->seek(shift,0) } |
300
|
0
|
|
|
0
|
0
|
0
|
sub tell { shift->fh->tell() } |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
sub _option { |
303
|
7660
|
|
|
7660
|
|
8763
|
my $self = shift; |
304
|
7660
|
|
|
|
|
9325
|
my $option = shift; |
305
|
7660
|
|
|
|
|
14091
|
my $d = $self->{options}{$option}; |
306
|
7660
|
100
|
|
|
|
14676
|
if (@_) { |
307
|
2
|
|
|
|
|
5
|
$self->{dirty}++; |
308
|
2
|
|
|
|
|
4
|
$self->{options}{$option} = shift; |
309
|
2
|
50
|
66
|
|
|
11
|
delete $self->{scale} if $option eq 'min' or $option eq 'max'; |
310
|
|
|
|
|
|
|
} |
311
|
7660
|
|
|
|
|
14806
|
return $d; |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
|
314
|
0
|
|
|
0
|
0
|
0
|
sub version { shift->_option('version',@_) } |
315
|
2
|
|
|
2
|
1
|
633
|
sub seqid { shift->_option('seqid',@_) } |
316
|
4
|
|
|
4
|
1
|
9
|
sub min { shift->_option('min',@_) } |
317
|
4
|
|
|
4
|
1
|
10
|
sub max { shift->_option('max',@_) } |
318
|
3818
|
|
|
3818
|
1
|
7471
|
sub step { shift->_option('step',@_) } |
319
|
9
|
|
|
9
|
1
|
25
|
sub span { shift->_option('span',@_) } |
320
|
1
|
|
|
1
|
1
|
4
|
sub mean { shift->_option('mean',@_) } |
321
|
1
|
|
|
1
|
1
|
4
|
sub stdev { shift->_option('stdev',@_) } |
322
|
3
|
|
|
3
|
1
|
11
|
sub trim { shift->_option('trim',@_) } |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
sub start { # slightly different because we have to deal with 1 vs 0-based coordinates |
325
|
3818
|
|
|
3818
|
0
|
4775
|
my $self = shift; |
326
|
3818
|
|
|
|
|
6633
|
my $start = $self->_option('start'); |
327
|
3818
|
|
|
|
|
4607
|
$start++; # convert into 1-based coordinates |
328
|
3818
|
50
|
|
|
|
7061
|
if (@_) { |
329
|
0
|
|
|
|
|
0
|
my $newstart = shift; |
330
|
0
|
|
|
|
|
0
|
$self->_option('start',$newstart-1); # store in zero-based coordinates |
331
|
|
|
|
|
|
|
} |
332
|
3818
|
|
|
|
|
5792
|
return $start; |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
sub smoothing { |
336
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
337
|
0
|
|
0
|
|
|
0
|
my $d = $self->{smoothing} || DEFAULT_SMOOTHING; |
338
|
0
|
0
|
|
|
|
0
|
$self->{smoothing} = shift if @_; |
339
|
0
|
|
|
|
|
0
|
$d; |
340
|
|
|
|
|
|
|
} |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
sub write { |
343
|
3
|
|
|
3
|
0
|
6
|
my $self = shift; |
344
|
3
|
100
|
66
|
|
|
261
|
if ($self->{dirty} && $self->{write}) { |
345
|
1
|
|
|
|
|
6
|
$self->_writeoptions($self->{options}); |
346
|
1
|
|
|
|
|
12
|
undef $self->{dirty}; |
347
|
1
|
|
|
|
|
4
|
$self->fh->flush; |
348
|
|
|
|
|
|
|
} |
349
|
|
|
|
|
|
|
} |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
sub _readoptions { |
352
|
2
|
|
|
2
|
|
5
|
my $self = shift; |
353
|
2
|
|
|
|
|
7
|
my $fh = $self->fh; |
354
|
2
|
|
|
|
|
4
|
my $header; |
355
|
2
|
|
|
|
|
22
|
$fh->seek(0,0); |
356
|
2
|
100
|
|
|
|
33
|
return unless $fh->read($header,HEADER_LEN) == HEADER_LEN; |
357
|
1
|
|
|
|
|
46
|
return $self->_parse_header($header); |
358
|
|
|
|
|
|
|
} |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
sub _parse_header { |
361
|
1
|
|
|
1
|
|
2
|
my $self = shift; |
362
|
1
|
|
|
|
|
3
|
my $header = shift; |
363
|
1
|
|
|
|
|
13
|
my ($seqid,$step,$min,$max,$span, |
364
|
|
|
|
|
|
|
$mean,$stdev,$version,$start) = unpack(HEADER,$header); |
365
|
1
|
|
|
|
|
13
|
return { seqid => $seqid, |
366
|
|
|
|
|
|
|
step => $step, |
367
|
|
|
|
|
|
|
span => $span, |
368
|
|
|
|
|
|
|
min => $min, |
369
|
|
|
|
|
|
|
max => $max, |
370
|
|
|
|
|
|
|
mean => $mean, |
371
|
|
|
|
|
|
|
stdev => $stdev, |
372
|
|
|
|
|
|
|
version => $version, |
373
|
|
|
|
|
|
|
start => $start, |
374
|
|
|
|
|
|
|
}; |
375
|
|
|
|
|
|
|
} |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
sub _generate_header { |
378
|
1
|
|
|
1
|
|
3
|
my $self = shift; |
379
|
1
|
|
|
|
|
2
|
my $options = shift; |
380
|
1
|
|
|
|
|
3
|
return pack(HEADER,@{$options}{qw(seqid step min max span mean stdev version start)}); |
|
1
|
|
|
|
|
14
|
|
381
|
|
|
|
|
|
|
} |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
sub _writeoptions { |
384
|
1
|
|
|
1
|
|
2
|
my $self = shift; |
385
|
1
|
|
|
|
|
3
|
my $options = shift; |
386
|
1
|
|
|
|
|
3
|
my $fh = $self->fh; |
387
|
1
|
|
|
|
|
5
|
my $header = $self->_generate_header($options); |
388
|
1
|
|
|
|
|
7
|
$fh->seek(0,0); |
389
|
1
|
50
|
|
|
|
30
|
$fh->print($header) or die "write failed: $!"; |
390
|
|
|
|
|
|
|
} |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
sub _do_trim { |
393
|
1
|
|
|
1
|
|
3
|
my $self = shift; |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
# don't trim if there is no score range |
396
|
1
|
50
|
|
|
|
4
|
($self->max - $self->min) or return; |
397
|
|
|
|
|
|
|
|
398
|
1
|
|
|
|
|
6
|
my $trim = lc $self->trim; |
399
|
1
|
|
|
|
|
1
|
my ($method,$arg); |
400
|
1
|
50
|
|
|
|
7
|
if ($trim =~ /([a-z]+)(\d+)/) { |
401
|
1
|
|
|
|
|
4
|
$method = "_trim_${1}"; |
402
|
1
|
|
|
|
|
4
|
$arg = $2; |
403
|
|
|
|
|
|
|
} |
404
|
|
|
|
|
|
|
else { |
405
|
0
|
|
|
|
|
0
|
$method = "_trim_${trim}"; |
406
|
|
|
|
|
|
|
} |
407
|
1
|
50
|
|
|
|
7
|
unless ($self->can($method)) { |
408
|
0
|
|
|
|
|
0
|
carp "invalid trim method $trim"; |
409
|
0
|
|
|
|
|
0
|
return; |
410
|
|
|
|
|
|
|
} |
411
|
|
|
|
|
|
|
|
412
|
1
|
|
|
|
|
4
|
$self->$method($arg); |
413
|
|
|
|
|
|
|
} |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
# trim n standard deviations from the mean |
416
|
|
|
|
|
|
|
sub _trim_stdev { |
417
|
1
|
|
|
1
|
|
2
|
my $self = shift; |
418
|
1
|
|
50
|
|
|
5
|
my $factor = shift || 1; |
419
|
1
|
|
|
|
|
4
|
my $mean = $self->mean; |
420
|
1
|
|
|
|
|
4
|
my $stdev = $self->stdev * $factor; |
421
|
1
|
50
|
|
|
|
3
|
my $min = $self->min > $mean - $stdev ? $self->min : $mean - $stdev; |
422
|
1
|
50
|
|
|
|
3
|
my $max = $self->max < $mean + $stdev ? $self->max : $mean + $stdev; |
423
|
1
|
|
|
|
|
3
|
warn "_trim_stdev (* $factor) : setting min to $min, max to $max (was ",$self->min,',',$self->max,')' |
424
|
|
|
|
|
|
|
if DEBUG; |
425
|
1
|
|
|
|
|
4
|
$self->min($min); |
426
|
1
|
|
|
|
|
4
|
$self->max($max); |
427
|
|
|
|
|
|
|
} |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
sub set_value { |
430
|
3809
|
|
|
3809
|
1
|
4472
|
my $self = shift; |
431
|
3809
|
50
|
|
|
|
8451
|
croak "usage: \$wig->set_value(\$position => \$value)" |
432
|
|
|
|
|
|
|
unless @_ == 2; |
433
|
3809
|
|
|
|
|
8076
|
$self->value(@_); |
434
|
|
|
|
|
|
|
} |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
sub set_range { |
437
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
438
|
0
|
0
|
|
|
|
0
|
croak "usage: \$wig->set_range(\$start_position => \$end_position, \$value)" |
439
|
|
|
|
|
|
|
unless @_ == 3; |
440
|
0
|
|
|
|
|
0
|
$self->value(@_); |
441
|
|
|
|
|
|
|
} |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
sub value { |
444
|
3809
|
|
|
3809
|
1
|
4109
|
my $self = shift; |
445
|
3809
|
|
|
|
|
4453
|
my $position = shift; |
446
|
|
|
|
|
|
|
|
447
|
3809
|
|
|
|
|
6153
|
my $offset = $self->_calculate_offset($position); |
448
|
3809
|
50
|
|
|
|
8341
|
$offset >= HEADER_LEN or die "Tried to retrieve data from before start position"; |
449
|
3809
|
50
|
|
|
|
7386
|
$self->seek($offset) or die "Seek failed: $!"; |
450
|
|
|
|
|
|
|
|
451
|
3809
|
50
|
|
|
|
84831
|
if (@_ == 2) { |
|
|
50
|
|
|
|
|
|
452
|
0
|
|
|
|
|
0
|
my $end = shift; |
453
|
0
|
|
|
|
|
0
|
my $new_value = shift; |
454
|
0
|
|
|
|
|
0
|
my $step = $self->step; |
455
|
0
|
|
|
|
|
0
|
my $scaled_value = $self->scale($new_value); |
456
|
0
|
0
|
|
|
|
0
|
$self->fh->print(pack('C*',($scaled_value)x(($end-$position+1)/$step))) or die "Write failed: $!"; |
457
|
0
|
0
|
0
|
|
|
0
|
$self->{end} = $end if !exists $self->{end} || $self->{end} < $end; |
458
|
|
|
|
|
|
|
} |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
elsif (@_==1) { |
461
|
3809
|
|
|
|
|
6223
|
my $new_value = shift; |
462
|
3809
|
|
|
|
|
7360
|
my $scaled_value = $self->scale($new_value); |
463
|
3809
|
50
|
|
|
|
13694
|
$self->fh->print(pack('C*',$scaled_value)) or die "Write failed: $!"; |
464
|
3809
|
50
|
66
|
|
|
47208
|
$self->{end} = $position if !exists $self->{end} || $self->{end} < $position; |
465
|
3809
|
|
|
|
|
11010
|
return $new_value; |
466
|
|
|
|
|
|
|
} |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
else { # retrieving data |
469
|
0
|
|
|
|
|
0
|
my $buffer; |
470
|
0
|
0
|
|
|
|
0
|
$self->fh->read($buffer,1) or die "Read failed: $!"; |
471
|
0
|
|
|
|
|
0
|
my $scaled_value = unpack('C*',$buffer); |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
# missing data, so look back at most span values to get it |
474
|
0
|
0
|
0
|
|
|
0
|
if ($scaled_value == 0 && (my $span = $self->span) > 1) { |
475
|
0
|
|
|
|
|
0
|
$offset = $self->_calculate_offset($position-$span+1); |
476
|
0
|
0
|
|
|
|
0
|
$offset >= HEADER_LEN or die "Tried to retrieve data from before start position"; |
477
|
0
|
0
|
|
|
|
0
|
$self->seek($offset) or die "Seek failed: $!"; |
478
|
|
|
|
|
|
|
|
479
|
0
|
|
|
|
|
0
|
$self->fh->read($buffer,$span/$self->step); |
480
|
0
|
|
|
|
|
0
|
for (my $i=length($buffer)-2;$i>=0;$i--) { |
481
|
0
|
|
|
|
|
0
|
my $val = substr($buffer,$i,1); |
482
|
0
|
0
|
|
|
|
0
|
next if $val eq "\0"; |
483
|
0
|
|
|
|
|
0
|
$scaled_value = unpack('C*',$val); |
484
|
0
|
|
|
|
|
0
|
last; |
485
|
|
|
|
|
|
|
} |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
} |
488
|
0
|
|
|
|
|
0
|
return $self->unscale($scaled_value); |
489
|
|
|
|
|
|
|
} |
490
|
|
|
|
|
|
|
} |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
sub _calculate_offset { |
493
|
3813
|
|
|
3813
|
|
4870
|
my $self = shift; |
494
|
3813
|
|
|
|
|
4457
|
my $position = shift; |
495
|
3813
|
|
|
|
|
7291
|
my $step = $self->step; |
496
|
3813
|
|
|
|
|
7878
|
my $start = $self->start; |
497
|
3813
|
|
|
|
|
10199
|
return HEADER_LEN + int(($position-$start)/$step); |
498
|
|
|
|
|
|
|
} |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
sub set_values { |
501
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
502
|
0
|
0
|
0
|
|
|
0
|
croak "usage: \$wig->set_values(\$position => \@values)" |
503
|
|
|
|
|
|
|
unless @_ == 2 and ref $_[1] eq 'ARRAY'; |
504
|
0
|
|
|
|
|
0
|
$self->values(@_); |
505
|
|
|
|
|
|
|
} |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
# read or write a series of values |
508
|
|
|
|
|
|
|
sub values { |
509
|
4
|
|
|
4
|
1
|
11
|
my $self = shift; |
510
|
4
|
|
|
|
|
7
|
my $start = shift; |
511
|
4
|
50
|
33
|
|
|
18
|
if (ref $_[0] && ref $_[0] eq 'ARRAY') { |
512
|
0
|
|
|
|
|
0
|
$self->_store_values($start,@_); |
513
|
|
|
|
|
|
|
} else { |
514
|
4
|
|
|
|
|
18
|
$self->_retrieve_values($start,@_); |
515
|
|
|
|
|
|
|
} |
516
|
|
|
|
|
|
|
} |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
sub export_to_wif64 { |
519
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
520
|
0
|
|
|
|
|
0
|
my $data = $self->export_to_wif(@_); |
521
|
0
|
0
|
|
|
|
0
|
eval "require MIME::Base64" |
522
|
|
|
|
|
|
|
unless MIME::Base64->can('encode_base64'); |
523
|
0
|
|
|
|
|
0
|
return MIME::Base64::encode_base64($data); |
524
|
|
|
|
|
|
|
} |
525
|
|
|
|
|
|
|
sub import_from_wif64 { |
526
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
527
|
0
|
|
|
|
|
0
|
my $data = shift; |
528
|
|
|
|
|
|
|
|
529
|
0
|
0
|
|
|
|
0
|
eval "require MIME::Base64" |
530
|
|
|
|
|
|
|
unless MIME::Base64->can('decode_base64'); |
531
|
0
|
|
|
|
|
0
|
return $self->import_from_wif(MIME::Base64::decode_base64($data)); |
532
|
|
|
|
|
|
|
} |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
# subregion in "wiggle interchange format" (wif) |
535
|
|
|
|
|
|
|
sub export_to_wif { |
536
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
537
|
0
|
|
|
|
|
0
|
my ($start,$end) = @_; |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
# get the 256 byte header |
540
|
0
|
|
|
|
|
0
|
my $data = $self->_generate_header($self->{options}); |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
# add the range to the data (8 bytes overhead) |
543
|
0
|
|
|
|
|
0
|
$data .= pack("L",$start); |
544
|
0
|
|
|
|
|
0
|
$data .= pack("L",$end); |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
# add the packed data for this range |
547
|
0
|
|
|
|
|
0
|
$data .= $self->_retrieve_packed_range($start,$end-$start+1,$self->step); |
548
|
0
|
|
|
|
|
0
|
return $data; |
549
|
|
|
|
|
|
|
} |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
sub export_to_bedgraph { |
552
|
1
|
|
|
1
|
0
|
1162
|
my $self = shift; |
553
|
1
|
|
|
|
|
2
|
my ($start,$end,$fh) = @_; |
554
|
1
|
|
|
|
|
3
|
my $max_range = 100_000; |
555
|
|
|
|
|
|
|
|
556
|
1
|
|
50
|
|
|
7
|
$start ||= 1; |
557
|
1
|
|
33
|
|
|
4
|
$end ||= $self->end; |
558
|
|
|
|
|
|
|
|
559
|
1
|
|
|
|
|
2
|
my $lines; |
560
|
1
|
|
|
|
|
6
|
for (my $s=$start;$s<$end;$s+=$max_range) { |
561
|
1
|
|
|
|
|
3
|
my $e = $s + $max_range - 1; |
562
|
1
|
50
|
|
|
|
4
|
$e = $end if $e > $end; |
563
|
1
|
|
|
|
|
5
|
my $b = $self->values($s,$e); |
564
|
1
|
|
|
|
|
13
|
$lines .= $self->_bedgraph_lines($s,$b,$fh); |
565
|
|
|
|
|
|
|
} |
566
|
|
|
|
|
|
|
|
567
|
1
|
|
|
|
|
8
|
return $lines; |
568
|
|
|
|
|
|
|
} |
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
sub _bedgraph_lines { |
571
|
1
|
|
|
1
|
|
4
|
my $self = shift; |
572
|
1
|
|
|
|
|
3
|
my ($start,$values,$fh) = @_; |
573
|
1
|
|
|
|
|
8
|
my $seqid = $self->seqid; |
574
|
1
|
|
|
|
|
2
|
my $result; |
575
|
|
|
|
|
|
|
|
576
|
1
|
|
|
|
|
3
|
my ($last_val,$last_start,$end); |
577
|
1
|
|
|
|
|
4
|
$last_start = $start-1; # 0 based indexing |
578
|
1
|
|
|
|
|
6
|
for (my $i=0;$i<@$values;$i++) { |
579
|
5000
|
|
|
|
|
19311
|
my $v = $values->[$i]; |
580
|
|
|
|
|
|
|
|
581
|
5000
|
100
|
|
|
|
47273
|
if (!defined $v) { |
582
|
2168
|
100
|
|
|
|
13169
|
if (defined $last_val) { |
583
|
56
|
|
|
|
|
354
|
$result .= $self->_append_or_print_bedgraph($fh,$seqid,$last_start,$start+$i-1,$last_val); |
584
|
56
|
|
|
|
|
136
|
undef $last_val; |
585
|
|
|
|
|
|
|
} |
586
|
2168
|
|
|
|
|
4394
|
$last_start = $start+$i; |
587
|
2168
|
|
|
|
|
23099
|
next; |
588
|
|
|
|
|
|
|
} |
589
|
|
|
|
|
|
|
|
590
|
2832
|
50
|
66
|
|
|
44513
|
if (defined $last_val && $last_val != $v) { |
591
|
0
|
|
|
|
|
0
|
$result .= $self->_append_or_print_bedgraph($fh,$seqid,$last_start,$start+$i-1,$last_val); |
592
|
0
|
|
|
|
|
0
|
$last_start = $start+$i-1; |
593
|
|
|
|
|
|
|
} |
594
|
|
|
|
|
|
|
|
595
|
2832
|
|
|
|
|
7334
|
$last_val = $v; |
596
|
2832
|
|
|
|
|
20723
|
$end = $start+$i-1; |
597
|
|
|
|
|
|
|
} |
598
|
1
|
50
|
|
|
|
12
|
$result .= $self->_append_or_print_bedgraph($fh,$seqid,$last_start,$end+1,$last_val) if $last_val; |
599
|
1
|
|
|
|
|
114
|
return $result; |
600
|
|
|
|
|
|
|
} |
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
sub _append_or_print_bedgraph { |
603
|
57
|
|
|
57
|
|
246
|
my $self = shift; |
604
|
57
|
|
|
|
|
973
|
my ($fh,$seqid,$start,$end,$val) = @_; |
605
|
57
|
|
|
|
|
5846
|
my $data = join("\t",$seqid,$start,$end,sprintf("%.2f",$val))."\n"; |
606
|
57
|
50
|
|
|
|
381
|
if ($fh) { |
607
|
0
|
|
|
|
|
0
|
print $fh $data; |
608
|
0
|
|
|
|
|
0
|
return ''; |
609
|
|
|
|
|
|
|
} else { |
610
|
57
|
|
|
|
|
481
|
return $data; |
611
|
|
|
|
|
|
|
} |
612
|
|
|
|
|
|
|
} |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
sub import_from_wif { |
615
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
616
|
0
|
|
|
|
|
0
|
my $wifdata = shift; |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
# BUG: should check that header is compatible |
619
|
0
|
|
|
|
|
0
|
my $header = substr($wifdata,0,HEADER_LEN); |
620
|
0
|
|
|
|
|
0
|
my $start = unpack('L',substr($wifdata,HEADER_LEN, 4)); |
621
|
0
|
|
|
|
|
0
|
my $end = unpack('L',substr($wifdata,HEADER_LEN+4,4)); |
622
|
|
|
|
|
|
|
|
623
|
0
|
|
|
|
|
0
|
my $options = $self->_parse_header($header); |
624
|
0
|
|
0
|
|
|
0
|
my $stored_options = eval {$self->_readoptions} || {}; |
625
|
0
|
|
|
|
|
0
|
my %merged_options = (%$stored_options,%$options); |
626
|
0
|
|
|
|
|
0
|
$self->{options} = \%merged_options; |
627
|
0
|
|
|
|
|
0
|
$self->{dirty}++; |
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
# write the data |
630
|
0
|
|
|
|
|
0
|
$self->seek($self->_calculate_offset($start)); |
631
|
0
|
0
|
|
|
|
0
|
$self->fh->print(substr($wifdata,HEADER_LEN+8)) or die "write failed: $!"; |
632
|
0
|
0
|
0
|
|
|
0
|
$self->{end} = $end if !defined $self->{end} or $self->{end} < $end; |
633
|
|
|
|
|
|
|
} |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
sub _retrieve_values { |
636
|
4
|
|
|
4
|
|
6
|
my $self = shift; |
637
|
4
|
|
|
|
|
8
|
my ($start,$end,$samples) = @_; |
638
|
|
|
|
|
|
|
|
639
|
4
|
|
|
|
|
11
|
my $data_start = $self->start; |
640
|
4
|
|
|
|
|
42
|
my $step = $self->step; |
641
|
4
|
|
|
|
|
10
|
my $span = $self->span; |
642
|
|
|
|
|
|
|
|
643
|
4
|
50
|
|
|
|
14
|
croak "Value of start position ($start) is less than data start of $data_start" |
644
|
|
|
|
|
|
|
unless $start >= $data_start; |
645
|
4
|
50
|
|
|
|
15
|
croak "Value of end position ($end) is greater than data end of ",$self->end+$span, |
646
|
|
|
|
|
|
|
unless $end <= $self->end + $span; |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
# generate list of positions to sample from |
649
|
4
|
|
|
|
|
10
|
my $length = $end-$start+1; |
650
|
4
|
|
33
|
|
|
22
|
$samples ||= $length; |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
# warn "samples = $samples, length=$length, span=$span, step=$step"; |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
# if the length is grossly greater than the samples, then we won't even |
655
|
|
|
|
|
|
|
# bother fetching all the data, but just sample into the disk file |
656
|
4
|
50
|
33
|
|
|
36
|
if ($length/$samples > 100 && $step == 1) { |
657
|
0
|
|
|
|
|
0
|
my @result; |
658
|
|
|
|
|
|
|
# my $window = 20*($span/$step); |
659
|
0
|
|
|
|
|
0
|
my $interval = $length/$samples; |
660
|
|
|
|
|
|
|
# my $window = 100*$interval/$span; |
661
|
0
|
|
|
|
|
0
|
my $window = $interval/2; |
662
|
|
|
|
|
|
|
# warn "window = $window, interval = $interval"; |
663
|
0
|
|
|
|
|
0
|
for (my $i=0;$i<$samples;$i++) { |
664
|
0
|
|
|
|
|
0
|
my $packed_data = $self->_retrieve_packed_range(int($start+$i*$interval-$window), |
665
|
|
|
|
|
|
|
int($window), |
666
|
|
|
|
|
|
|
$step); |
667
|
0
|
|
|
|
|
0
|
my @bases= grep {$_} unpack('C*',$packed_data); |
|
0
|
|
|
|
|
0
|
|
668
|
0
|
0
|
|
|
|
0
|
if (@bases) { |
669
|
0
|
|
|
|
|
0
|
local $^W = 0; |
670
|
0
|
|
|
|
|
0
|
my $arry = $self->unscale(\@bases); |
671
|
0
|
|
|
|
|
0
|
my $n = @$arry; |
672
|
0
|
|
|
|
|
0
|
my $total = 0; |
673
|
0
|
|
|
|
|
0
|
$total += $_ foreach @$arry; |
674
|
0
|
|
|
|
|
0
|
my $mean = $total/$n; |
675
|
0
|
|
|
|
|
0
|
my $max; |
676
|
0
|
0
|
0
|
|
|
0
|
for (@$arry) { $max = $_ if !defined $max || $max < $_ } |
|
0
|
|
|
|
|
0
|
|
677
|
|
|
|
|
|
|
# warn $start+$i*$interval,': ',join(',',map {int($_)} @$arry), |
678
|
|
|
|
|
|
|
# " mean = $mean, max = $max"; |
679
|
|
|
|
|
|
|
# push @result,$mean; |
680
|
0
|
|
|
|
|
0
|
push @result,$max; |
681
|
|
|
|
|
|
|
} else { |
682
|
0
|
|
|
|
|
0
|
push @result,0; |
683
|
|
|
|
|
|
|
} |
684
|
|
|
|
|
|
|
} |
685
|
0
|
|
|
|
|
0
|
return \@result; |
686
|
|
|
|
|
|
|
} |
687
|
|
|
|
|
|
|
|
688
|
4
|
|
|
|
|
16
|
my $packed_data = $self->_retrieve_packed_range($start,$length,$step); |
689
|
|
|
|
|
|
|
|
690
|
4
|
|
|
|
|
6
|
my @bases; |
691
|
4
|
|
|
|
|
91
|
$#bases = $length-1; |
692
|
|
|
|
|
|
|
|
693
|
4
|
50
|
|
|
|
50
|
if ($step == $span) { |
694
|
|
|
|
|
|
|
# in this case, we do not have any partially-empty |
695
|
|
|
|
|
|
|
# steps, so can operate on the step-length data structure |
696
|
|
|
|
|
|
|
# directly |
697
|
0
|
|
|
|
|
0
|
@bases = unpack('C*',$packed_data); |
698
|
|
|
|
|
|
|
} |
699
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
else { |
701
|
|
|
|
|
|
|
# In this case some regions may have partially missing data, |
702
|
|
|
|
|
|
|
# so we create an array equal to the length of the requested region, |
703
|
|
|
|
|
|
|
# fill it in, and then sample it |
704
|
4
|
|
|
|
|
16
|
for (my $i=0; $i<length $packed_data; $i++) { |
705
|
5202
|
|
|
|
|
13606
|
my $index = $i * $step; |
706
|
5202
|
|
|
|
|
10343
|
my $value = unpack('C',substr($packed_data,$i,1)); |
707
|
5202
|
100
|
|
|
|
23650
|
next unless $value; # ignore 0 values |
708
|
61
|
|
|
|
|
1507
|
@bases[$index..$index+$span-1] = ($value) x $span; |
709
|
|
|
|
|
|
|
} |
710
|
4
|
|
|
|
|
17
|
$#bases = $length-1; |
711
|
|
|
|
|
|
|
} |
712
|
|
|
|
|
|
|
|
713
|
4
|
|
|
|
|
25
|
my $r = $self->unscale(\@bases); |
714
|
4
|
|
|
|
|
36
|
$r = $self->sample($r,$samples); |
715
|
4
|
50
|
33
|
|
|
2599
|
$r = $self->smooth($r,$self->window * $samples/@bases) |
716
|
|
|
|
|
|
|
if defined $self->window && $self->window>1; |
717
|
4
|
|
|
|
|
121
|
return $r; |
718
|
|
|
|
|
|
|
} |
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
sub _retrieve_packed_range { |
721
|
4
|
|
|
4
|
|
5
|
my $self = shift; |
722
|
4
|
|
|
|
|
95
|
my ($start,$length,$step) = @_; |
723
|
4
|
|
|
|
|
9
|
my $span = $self->span; |
724
|
|
|
|
|
|
|
|
725
|
4
|
|
|
|
|
11
|
my $offset = $self->_calculate_offset($start); |
726
|
|
|
|
|
|
|
|
727
|
4
|
|
|
|
|
14
|
$self->seek($offset); |
728
|
4
|
|
|
|
|
60
|
my $packed_data; |
729
|
4
|
|
|
|
|
10
|
$self->fh->read($packed_data,$length/$step); |
730
|
|
|
|
|
|
|
|
731
|
|
|
|
|
|
|
# pad data up to required amount |
732
|
4
|
50
|
|
|
|
88
|
$packed_data .= "\0" x ($length/$step-length($packed_data)) |
733
|
|
|
|
|
|
|
if length $packed_data < $length/$step; |
734
|
4
|
|
|
|
|
157
|
return $packed_data; |
735
|
|
|
|
|
|
|
} |
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
sub sample { |
739
|
4
|
|
|
4
|
0
|
9
|
my $self = shift; |
740
|
4
|
|
|
|
|
9
|
my ($values,$samples) = @_; |
741
|
4
|
|
|
|
|
8
|
my $length = @$values; |
742
|
4
|
|
|
|
|
10
|
my $window_size = $length/$samples; |
743
|
|
|
|
|
|
|
|
744
|
4
|
|
|
|
|
6
|
my @samples; |
745
|
4
|
|
|
|
|
389
|
$#samples = $samples-1; |
746
|
|
|
|
|
|
|
|
747
|
4
|
50
|
|
|
|
16
|
if ($window_size < 2) { # no data smoothing needed |
748
|
4
|
|
|
|
|
183
|
@samples = map { $values->[$_*$window_size] } (0..$samples-1); |
|
5202
|
|
|
|
|
16953
|
|
749
|
|
|
|
|
|
|
} |
750
|
|
|
|
|
|
|
else { |
751
|
0
|
|
|
|
|
0
|
my $smoothsub = $self->smoothsub; |
752
|
0
|
|
|
|
|
0
|
for (my $i=0; $i<$samples; $i++) { |
753
|
0
|
|
|
|
|
0
|
my $start = $i * $window_size; |
754
|
0
|
|
|
|
|
0
|
my $end = $start + $window_size - 1; |
755
|
0
|
|
|
|
|
0
|
my @window = @{$values}[$start..$end]; |
|
0
|
|
|
|
|
0
|
|
756
|
|
|
|
|
|
|
|
757
|
0
|
|
|
|
|
0
|
my $value = $smoothsub->(\@window); |
758
|
0
|
|
|
|
|
0
|
$samples[$i] = $value; |
759
|
|
|
|
|
|
|
} |
760
|
|
|
|
|
|
|
} |
761
|
|
|
|
|
|
|
|
762
|
4
|
|
|
|
|
858
|
return \@samples; |
763
|
|
|
|
|
|
|
} |
764
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
sub smoothsub { |
766
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
767
|
|
|
|
|
|
|
|
768
|
0
|
|
|
|
|
0
|
my $smoothing = $self->smoothing; |
769
|
0
|
0
|
|
|
|
0
|
my $smoothsub = $smoothing eq 'mean' ? \&sample_mean |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
770
|
|
|
|
|
|
|
:$smoothing eq 'max' ? \&sample_max |
771
|
|
|
|
|
|
|
:$smoothing eq 'min' ? \&sample_min |
772
|
|
|
|
|
|
|
:$smoothing eq 'none' ? \&sample_center |
773
|
|
|
|
|
|
|
:croak("invalid smoothing type '$smoothing'"); |
774
|
0
|
|
|
|
|
0
|
return $smoothsub; |
775
|
|
|
|
|
|
|
} |
776
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
sub smooth { |
778
|
0
|
|
|
0
|
0
|
0
|
my ($self,$data,$window) = @_; |
779
|
|
|
|
|
|
|
|
780
|
0
|
|
|
|
|
0
|
my $smoothing = $self->smoothing; |
781
|
0
|
|
0
|
|
|
0
|
$window ||= $self->window; |
782
|
|
|
|
|
|
|
|
783
|
0
|
0
|
0
|
|
|
0
|
return $data if $smoothing eq 'none' || $window < 2; |
784
|
|
|
|
|
|
|
|
785
|
0
|
|
|
|
|
0
|
my @data = @$data; |
786
|
0
|
|
|
|
|
0
|
my $smoother = $self->smoothsub; |
787
|
0
|
0
|
|
|
|
0
|
$window++ unless $window % 2; |
788
|
0
|
|
|
|
|
0
|
my $offset = int($window/2); |
789
|
|
|
|
|
|
|
|
790
|
0
|
|
|
|
|
0
|
for (my $i=$offset; $i<@$data-$offset; $i++) { |
791
|
0
|
|
|
|
|
0
|
my $start = $i - $offset; |
792
|
0
|
|
|
|
|
0
|
my $end = $i + $offset; |
793
|
0
|
|
|
|
|
0
|
my @subset = @data[$start..$end]; |
794
|
0
|
|
|
|
|
0
|
$data->[$i] = $smoother->(\@subset); |
795
|
|
|
|
|
|
|
} |
796
|
|
|
|
|
|
|
|
797
|
0
|
|
|
|
|
0
|
return $data; |
798
|
|
|
|
|
|
|
} |
799
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
sub window { |
801
|
4
|
|
|
4
|
0
|
9
|
my $self = shift; |
802
|
4
|
|
|
|
|
10
|
my $d = $self->{window}; |
803
|
4
|
50
|
|
|
|
347
|
$self->{window} = shift if @_; |
804
|
4
|
|
|
|
|
20
|
$d; |
805
|
|
|
|
|
|
|
} |
806
|
|
|
|
|
|
|
|
807
|
|
|
|
|
|
|
sub sample_mean { |
808
|
0
|
|
|
0
|
0
|
0
|
my $values = shift; |
809
|
0
|
|
|
|
|
0
|
my ($total,$items); |
810
|
0
|
|
|
|
|
0
|
for my $v (@$values) { |
811
|
0
|
0
|
|
|
|
0
|
next unless defined $v; |
812
|
0
|
|
|
|
|
0
|
$items++; |
813
|
0
|
|
|
|
|
0
|
$total+=$v; |
814
|
|
|
|
|
|
|
} |
815
|
0
|
0
|
|
|
|
0
|
return $items ? $total/$items : undef; |
816
|
|
|
|
|
|
|
} |
817
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
sub sample_max { |
819
|
0
|
|
|
0
|
0
|
0
|
my $values = shift; |
820
|
0
|
|
|
|
|
0
|
my $max; |
821
|
0
|
|
|
|
|
0
|
for my $v (@$values) { |
822
|
0
|
0
|
|
|
|
0
|
next unless defined $v; |
823
|
0
|
0
|
0
|
|
|
0
|
$max = $v if !defined $max or $max < $v; |
824
|
|
|
|
|
|
|
} |
825
|
0
|
|
|
|
|
0
|
return $max; |
826
|
|
|
|
|
|
|
} |
827
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
sub sample_min { |
829
|
0
|
|
|
0
|
0
|
0
|
my $values = shift; |
830
|
0
|
|
|
|
|
0
|
my $min; |
831
|
0
|
|
|
|
|
0
|
for my $v (@$values) { |
832
|
0
|
0
|
|
|
|
0
|
next unless defined $v; |
833
|
0
|
0
|
0
|
|
|
0
|
$min = $v if !defined $min or $min > $v; |
834
|
|
|
|
|
|
|
} |
835
|
0
|
|
|
|
|
0
|
return $min; |
836
|
|
|
|
|
|
|
} |
837
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
sub sample_center { |
839
|
0
|
|
|
0
|
0
|
0
|
my $values = shift; |
840
|
0
|
|
|
|
|
0
|
return $values->[@$values/2]; |
841
|
|
|
|
|
|
|
} |
842
|
|
|
|
|
|
|
|
843
|
|
|
|
|
|
|
sub _store_values { |
844
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
845
|
0
|
|
|
|
|
0
|
my ($position,$data) = @_; |
846
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
# where does data start |
848
|
0
|
|
|
|
|
0
|
my $offset = $self->_calculate_offset($position); |
849
|
0
|
|
|
|
|
0
|
my $fh = $self->fh; |
850
|
0
|
|
|
|
|
0
|
my $step = $self->step; |
851
|
|
|
|
|
|
|
|
852
|
0
|
|
|
|
|
0
|
my $scaled = $self->scale($data); |
853
|
|
|
|
|
|
|
|
854
|
0
|
|
|
|
|
0
|
$self->seek($offset); |
855
|
0
|
|
|
|
|
0
|
my $packed_data = pack('C*',@$scaled); |
856
|
0
|
0
|
|
|
|
0
|
$fh->print($packed_data) or die "Write failed: $!"; |
857
|
|
|
|
|
|
|
|
858
|
0
|
|
|
|
|
0
|
my $new_end = $position+@$data-1; |
859
|
0
|
0
|
0
|
|
|
0
|
$self->{end} = $new_end if !exists $self->{end} || $self->{end} < $new_end; |
860
|
|
|
|
|
|
|
} |
861
|
|
|
|
|
|
|
|
862
|
|
|
|
|
|
|
# zero means "no data" |
863
|
|
|
|
|
|
|
# everything else is scaled from 1-255 |
864
|
|
|
|
|
|
|
sub scale { |
865
|
3809
|
|
|
3809
|
0
|
4104
|
my $self = shift; |
866
|
3809
|
|
|
|
|
4184
|
my $values = shift; |
867
|
3809
|
|
|
|
|
6514
|
my $scale = $self->_get_scale; |
868
|
3809
|
|
|
|
|
7978
|
my $min = $self->{options}{min}; |
869
|
3809
|
50
|
33
|
|
|
10110
|
if (ref $values && ref $values eq 'ARRAY') { |
870
|
0
|
|
|
|
|
0
|
my @return = map { |
871
|
0
|
|
|
|
|
0
|
my $i = ($_ - $min)/$scale; |
872
|
0
|
|
|
|
|
0
|
my $v = 1 + int($i+0.5*($i<=>0)); # avoid call to round() |
873
|
0
|
0
|
|
|
|
0
|
$v = 1 if $v < 1; |
874
|
0
|
0
|
|
|
|
0
|
$v = 255 if $v > 255; |
875
|
0
|
|
|
|
|
0
|
$v; |
876
|
|
|
|
|
|
|
} @$values; |
877
|
0
|
|
|
|
|
0
|
return \@return; |
878
|
|
|
|
|
|
|
} else { |
879
|
3809
|
|
|
|
|
12077
|
my $v = 1 + round (($values - $min)/$scale); |
880
|
3809
|
50
|
|
|
|
8130
|
$v = 1 if $v < 1; |
881
|
3809
|
50
|
|
|
|
7193
|
$v = 255 if $v > 255; |
882
|
3809
|
|
|
|
|
7442
|
return $v; |
883
|
|
|
|
|
|
|
} |
884
|
|
|
|
|
|
|
} |
885
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
sub unscale { |
887
|
4
|
|
|
4
|
0
|
7
|
my $self = shift; |
888
|
4
|
|
|
|
|
6
|
my $values = shift; |
889
|
4
|
|
|
|
|
12
|
my $scale = $self->_get_scale; |
890
|
4
|
|
|
|
|
12
|
my $min = $self->{options}{min}; |
891
|
|
|
|
|
|
|
|
892
|
4
|
50
|
33
|
|
|
32
|
if (ref $values && ref $values eq 'ARRAY') { |
893
|
4
|
100
|
|
|
|
332
|
my @return = map {$_ ? (($_-1) * $scale + $min) : undef} @$values; |
|
5202
|
|
|
|
|
14863
|
|
894
|
4
|
|
|
|
|
166
|
return \@return; |
895
|
|
|
|
|
|
|
} else { |
896
|
0
|
0
|
|
|
|
0
|
return $values ? ($values-1) * $scale + $min : undef; |
897
|
|
|
|
|
|
|
} |
898
|
|
|
|
|
|
|
} |
899
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
sub _get_scale { |
901
|
3813
|
|
|
3813
|
|
4521
|
my $self = shift; |
902
|
3813
|
100
|
|
|
|
8797
|
unless ($self->{scale}) { |
903
|
2
|
|
|
|
|
6
|
my $min = $self->{options}{min}; |
904
|
2
|
|
|
|
|
26
|
my $max = $self->{options}{max}; |
905
|
2
|
|
50
|
|
|
10
|
my $range = $max - $min || 0.001; # can't be zero! |
906
|
2
|
|
|
|
|
267
|
$self->{scale} = $range/254; |
907
|
|
|
|
|
|
|
} |
908
|
3813
|
|
|
|
|
6663
|
return $self->{scale}; |
909
|
|
|
|
|
|
|
} |
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
sub round { |
912
|
3809
|
|
|
3809
|
0
|
10586
|
return int($_[0]+0.5*($_[0]<=>0)); |
913
|
|
|
|
|
|
|
} |
914
|
|
|
|
|
|
|
|
915
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
1; |
917
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
__END__ |
919
|
|
|
|
|
|
|
|
920
|
|
|
|
|
|
|
=head1 SEE ALSO |
921
|
|
|
|
|
|
|
|
922
|
|
|
|
|
|
|
L<Bio::Graphics::Wiggle::Loader>, |
923
|
|
|
|
|
|
|
L<Bio::Graphics::Panel>, |
924
|
|
|
|
|
|
|
L<Bio::Graphics::Glyph>, |
925
|
|
|
|
|
|
|
L<Bio::Graphics::Feature>, |
926
|
|
|
|
|
|
|
L<Bio::Graphics::FeatureFile> |
927
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
=head1 AUTHOR |
929
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
Lincoln Stein E<lt>lstein@cshl.orgE<gt>. |
931
|
|
|
|
|
|
|
|
932
|
|
|
|
|
|
|
Copyright (c) 2007 Cold Spring Harbor Laboratory |
933
|
|
|
|
|
|
|
|
934
|
|
|
|
|
|
|
This package and its accompanying libraries is free software; you can |
935
|
|
|
|
|
|
|
redistribute it and/or modify it under the terms of the GPL (either |
936
|
|
|
|
|
|
|
version 1, or at your option, any later version) or the Artistic |
937
|
|
|
|
|
|
|
License 2.0. Refer to LICENSE for the full license text. In addition, |
938
|
|
|
|
|
|
|
please see DISCLAIMER.txt for disclaimers of warranty. |
939
|
|
|
|
|
|
|
|
940
|
|
|
|
|
|
|
=cut |