line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# -*- indent-tabs-mode: nil -*- |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package Bio::IlluminaSAV; |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
our $VERSION = '1.0.' . [qw$Revision: 9706 $]->[1]; |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 NAME |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
Bio::IlluminaSAV - Parse Illumina SAV files |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 SYNOPSIS |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
Routines for parsing and extracting information from Illumina SAV files |
15
|
|
|
|
|
|
|
(aka "InterOp"). |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
use IlluminaSAV; |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
my $savs = IlluminaSAV->new("/path/to/rundirectory"); |
20
|
|
|
|
|
|
|
my @qmetrics = $savs->quality_metrics(); |
21
|
|
|
|
|
|
|
my $cur_cycle = $savs->cur_cycle(); |
22
|
|
|
|
|
|
|
my $num_reads = $savs->num_reads(); |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 DESCRIPTION |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
Easy access to Illumina's SAV file data. |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=head1 AUTHOR |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
Andrew Hoerter & Erik Aronesty, Eearonesty@cpan.orgE |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
Copyright (C) 2013 Erik Aronesty / Expression Analysis |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
37
|
|
|
|
|
|
|
it under the same terms as Perl itself, either Perl version 5.10.1 or, |
38
|
|
|
|
|
|
|
at your option, any later version of Perl 5 you may have available. |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=cut |
41
|
|
|
|
|
|
|
|
42
|
1
|
|
|
1
|
|
112568
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
74
|
|
43
|
|
|
|
|
|
|
|
44
|
1
|
|
|
1
|
|
2566
|
use Module::Load; |
|
1
|
|
|
|
|
2119
|
|
|
1
|
|
|
|
|
8
|
|
45
|
1
|
|
|
1
|
|
56
|
use File::Spec; |
|
1
|
|
|
|
|
10
|
|
|
1
|
|
|
|
|
21
|
|
46
|
1
|
|
|
1
|
|
426
|
use XML::LibXML::Reader; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
use Cwd 'realpath'; |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# list of default exports |
52
|
|
|
|
|
|
|
our @EXPORT = qw( TILE_METRIC_CLUST_DENS |
53
|
|
|
|
|
|
|
TILE_METRIC_PF_CLUST_DENS |
54
|
|
|
|
|
|
|
TILE_METRIC_NUM_CLUSTERS |
55
|
|
|
|
|
|
|
TILE_METRIC_NUM_PF_CLUSTERS |
56
|
|
|
|
|
|
|
TILE_METRIC_CONTROL_LANE |
57
|
|
|
|
|
|
|
); |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# be nicer |
60
|
|
|
|
|
|
|
#our @EXPORT_OK = @EXPORT; |
61
|
|
|
|
|
|
|
#our %EXPORT_TAGS = ( const => [ @EXPORT ] ); |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
my %SAV_FORMAT = |
64
|
|
|
|
|
|
|
( |
65
|
|
|
|
|
|
|
'ExtractionMetrics' => [ ['lane', 'v', 1], |
66
|
|
|
|
|
|
|
['tile', 'v', 1], |
67
|
|
|
|
|
|
|
['cycle', 'v', 1], |
68
|
|
|
|
|
|
|
['fwhm', 'f', 4], # FWHM scores for A/C/G/T in order |
69
|
|
|
|
|
|
|
['intensities', 'v', 4], # intensities for A/C/G/T in order |
70
|
|
|
|
|
|
|
['cif_datestamp', 'V', 1], |
71
|
|
|
|
|
|
|
['cif_timestamp', 'V', 1] ], |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
'QualityMetrics' => [ ['lane', 'v', 1], |
74
|
|
|
|
|
|
|
['tile', 'v', 1], |
75
|
|
|
|
|
|
|
['cycle', 'v', 1], |
76
|
|
|
|
|
|
|
['qscores', 'L', 50] ], # number of clusters with quality Q1-Q50 |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
'ErrorMetrics' => [ ['lane', 'v', 1], |
79
|
|
|
|
|
|
|
['tile', 'v', 1], |
80
|
|
|
|
|
|
|
['cycle', 'v', 1], |
81
|
|
|
|
|
|
|
['err_rate', 'f', 1], |
82
|
|
|
|
|
|
|
['err_reads', 'L', 5] ], # number of perfect reads, reads with 1 error, |
83
|
|
|
|
|
|
|
# 2 errors, 3 errors, 4 errors in order |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
'TileMetrics' => [ ['lane', 'v', 1], |
86
|
|
|
|
|
|
|
['tile', 'v', 1], |
87
|
|
|
|
|
|
|
['metric', 'v', 1], # metric codes are exported here as constants |
88
|
|
|
|
|
|
|
['metric_val', 'f', 1] ], |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
'CorrectedIntMetrics' => [ ['lane', 'v', 1], |
91
|
|
|
|
|
|
|
['tile', 'v', 1], |
92
|
|
|
|
|
|
|
['cycle', 'v', 1], |
93
|
|
|
|
|
|
|
['avg_intensity', 'v', 1], |
94
|
|
|
|
|
|
|
['avg_corrected_int', 'v', 4], # avg corrected intensity for A/C/G/T in order |
95
|
|
|
|
|
|
|
['avg_called_int', 'v', 4], # as above, but only called clusters |
96
|
|
|
|
|
|
|
['num_basecalls', 'f', 5], # number of basecalls for no-call/A/C/G/T in order |
97
|
|
|
|
|
|
|
['snr', 'f', 1] ], # signal to noise ratio |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
'ControlMetrics' => [ ['lane', 'v', 1], |
100
|
|
|
|
|
|
|
['tile', 'v', 1], |
101
|
|
|
|
|
|
|
['read', 'v', 1], |
102
|
|
|
|
|
|
|
['control_name', 'v/A', undef], |
103
|
|
|
|
|
|
|
['index_name', 'v/A', undef], |
104
|
|
|
|
|
|
|
['control_clusters', 'L', 1] ], |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
'ImageMetrics' => [ ['lane', 'v', 1], |
107
|
|
|
|
|
|
|
['tile', 'v', 1], |
108
|
|
|
|
|
|
|
['cycle', 'v', 1], |
109
|
|
|
|
|
|
|
['channel_id', 'v', 1], # 0=A, 1=C, 2=G, 3=T |
110
|
|
|
|
|
|
|
['min_contrast', 'v', 1], |
111
|
|
|
|
|
|
|
['max_contrast', 'v', 1] ] |
112
|
|
|
|
|
|
|
); |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
# old change in HCS? Who knows. |
115
|
|
|
|
|
|
|
$SAV_FORMAT{'QMetrics'} = $SAV_FORMAT{'QualityMetrics'}; |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
my $XML_PARSER; |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
### Constants |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=head1 CONSTANTS |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
# constants you can use |
124
|
|
|
|
|
|
|
TILE_METRIC_CLUST_DENS |
125
|
|
|
|
|
|
|
TILE_METRIC_PF_CLUST_DENS |
126
|
|
|
|
|
|
|
TILE_METRIC_NUM_CLUSTERS |
127
|
|
|
|
|
|
|
TILE_METRIC_NUM_PF_CLUSTERS |
128
|
|
|
|
|
|
|
TILE_METRIC_CONTROL_LANE |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=cut |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
use constant TILE_METRIC_CLUST_DENS => 100; |
133
|
|
|
|
|
|
|
use constant TILE_METRIC_PF_CLUST_DENS => 101; |
134
|
|
|
|
|
|
|
use constant TILE_METRIC_NUM_CLUSTERS => 102; |
135
|
|
|
|
|
|
|
use constant TILE_METRIC_NUM_PF_CLUSTERS => 103; |
136
|
|
|
|
|
|
|
use constant TILE_METRIC_CONTROL_LANE => 400; |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
### Helper subs |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
sub unpack_fmt |
141
|
|
|
|
|
|
|
{ |
142
|
|
|
|
|
|
|
my ($packrule) = @_; |
143
|
|
|
|
|
|
|
my ($type, $repeat) = ($packrule->[1], $packrule->[2]); |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
return ($repeat ? $type . "[$repeat]" : $type); |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
### Class methods |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
=head1 FUNCTIONS |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=over |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
=cut |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
# System-independent way of accessing stuff beneath a run directory |
157
|
|
|
|
|
|
|
sub rundir_concat |
158
|
|
|
|
|
|
|
{ |
159
|
|
|
|
|
|
|
my ($self, @rest) = @_; |
160
|
|
|
|
|
|
|
my @rundir_dirs = File::Spec->splitdir($self->{'rundir'}); |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
return File::Spec->join(@rundir_dirs, @rest); |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
=item cur_cycle |
166
|
|
|
|
|
|
|
Return the current run extraction cycle |
167
|
|
|
|
|
|
|
=cut |
168
|
|
|
|
|
|
|
sub cur_cycle |
169
|
|
|
|
|
|
|
{ |
170
|
|
|
|
|
|
|
my $self = shift; |
171
|
|
|
|
|
|
|
# Extraction metrics seems to be the most reliable source... it's available early on |
172
|
|
|
|
|
|
|
# even for GA-II's |
173
|
|
|
|
|
|
|
my $ext_metrics = $self->extraction_metrics(); |
174
|
|
|
|
|
|
|
my @cycles_sorted = sort { $b <=> $a } (map { $_->{'cycle'} } (@$ext_metrics)); |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
return $cycles_sorted[0]; |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=item cur_copy_cycle |
180
|
|
|
|
|
|
|
Return the current copy cycle |
181
|
|
|
|
|
|
|
=cut |
182
|
|
|
|
|
|
|
sub cur_copy_cycle |
183
|
|
|
|
|
|
|
{ |
184
|
|
|
|
|
|
|
my $self = shift; |
185
|
|
|
|
|
|
|
my $glob_pat = $self->rundir_concat('Data', 'Intensities', 'BaseCalls', 'L001', 'C*.1'); |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
my @cycles_sorted = sort { $b <=> $a } (map { /C(\d+)\.1/ && $1; } (glob($glob_pat))); |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
return (scalar(@cycles_sorted) == 0) ? 0 : $cycles_sorted[0]; |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
=item max_cycles |
194
|
|
|
|
|
|
|
Return the maximum number of cycles |
195
|
|
|
|
|
|
|
=cut |
196
|
|
|
|
|
|
|
sub max_cycles |
197
|
|
|
|
|
|
|
{ |
198
|
|
|
|
|
|
|
my $self = shift; |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
my $sum = 0; |
201
|
|
|
|
|
|
|
my @readcycles = map { $_->{'numcycles'} } @{$self->run_info->{'reads'}}; |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
return undef |
204
|
|
|
|
|
|
|
unless @readcycles; |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
foreach (@readcycles) |
207
|
|
|
|
|
|
|
{ |
208
|
|
|
|
|
|
|
$sum += $_; |
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
return $sum; |
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
=item num_lanes |
215
|
|
|
|
|
|
|
Return the number of lanes |
216
|
|
|
|
|
|
|
=cut |
217
|
|
|
|
|
|
|
sub num_lanes |
218
|
|
|
|
|
|
|
{ |
219
|
|
|
|
|
|
|
my $self = shift; |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
return $self->run_info->{'numlanes'}; |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=item num_reads |
225
|
|
|
|
|
|
|
Return the number of reads |
226
|
|
|
|
|
|
|
=cut |
227
|
|
|
|
|
|
|
sub num_reads |
228
|
|
|
|
|
|
|
{ |
229
|
|
|
|
|
|
|
my $self = shift; |
230
|
|
|
|
|
|
|
return scalar(@{$self->run_info->{'reads'}}); |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
=item run_info |
234
|
|
|
|
|
|
|
Returns a hash with RunInfo.xml info |
235
|
|
|
|
|
|
|
=cut |
236
|
|
|
|
|
|
|
sub run_info { |
237
|
|
|
|
|
|
|
my $self = shift; |
238
|
|
|
|
|
|
|
$self->{'runinfo'} = parse_runinfo($self->rundir_concat('RunInfo.xml')) |
239
|
|
|
|
|
|
|
unless (defined($self->{'runinfo'})); |
240
|
|
|
|
|
|
|
return $self->{'runinfo'}; |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
=item extraction_metrics |
244
|
|
|
|
|
|
|
Load extraction metrics |
245
|
|
|
|
|
|
|
=cut |
246
|
|
|
|
|
|
|
sub extraction_metrics |
247
|
|
|
|
|
|
|
{ |
248
|
|
|
|
|
|
|
my $self = shift; |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
return parse_sav_file($self->rundir_concat('InterOp', 'ExtractionMetricsOut.bin')); |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
=item quality_metrics |
254
|
|
|
|
|
|
|
Load quality metrics |
255
|
|
|
|
|
|
|
=cut |
256
|
|
|
|
|
|
|
sub quality_metrics |
257
|
|
|
|
|
|
|
{ |
258
|
|
|
|
|
|
|
my $self = shift; |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
return parse_sav_file($self->rundir_concat('InterOp', 'QMetricsOut.bin')); |
261
|
|
|
|
|
|
|
} |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
=item error_metrics |
264
|
|
|
|
|
|
|
Load error metrics |
265
|
|
|
|
|
|
|
=cut |
266
|
|
|
|
|
|
|
sub error_metrics |
267
|
|
|
|
|
|
|
{ |
268
|
|
|
|
|
|
|
my $self = shift; |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
return parse_sav_file($self->rundir_concat('InterOp', 'ErrorMetricsOut.bin')); |
271
|
|
|
|
|
|
|
} |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
=item tile_metrics |
274
|
|
|
|
|
|
|
Load tile metrics |
275
|
|
|
|
|
|
|
=cut |
276
|
|
|
|
|
|
|
sub tile_metrics |
277
|
|
|
|
|
|
|
{ |
278
|
|
|
|
|
|
|
my $self = shift; |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
return parse_sav_file($self->rundir_concat('InterOp', 'TileMetricsOut.bin')); |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
=item corrected_int_metrics |
284
|
|
|
|
|
|
|
Load corrected int metrics |
285
|
|
|
|
|
|
|
=cut |
286
|
|
|
|
|
|
|
sub corrected_int_metrics |
287
|
|
|
|
|
|
|
{ |
288
|
|
|
|
|
|
|
my $self = shift; |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
return parse_sav_file($self->rundir_concat('InterOp', 'CorrectedIntMetricsOut.bin')); |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
=item control_metrics |
294
|
|
|
|
|
|
|
Load control metrics |
295
|
|
|
|
|
|
|
=cut |
296
|
|
|
|
|
|
|
sub control_metrics |
297
|
|
|
|
|
|
|
{ |
298
|
|
|
|
|
|
|
my $self = shift; |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
return parse_sav_file($self->rundir_concat('InterOp', 'ControlMetricsOut.bin')); |
301
|
|
|
|
|
|
|
} |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
=item image_metrics |
304
|
|
|
|
|
|
|
Load image metrics |
305
|
|
|
|
|
|
|
=cut |
306
|
|
|
|
|
|
|
sub image_metrics |
307
|
|
|
|
|
|
|
{ |
308
|
|
|
|
|
|
|
my $self = shift; |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
return parse_sav_file($self->rundir_concat('InterOp', 'ImageMetricsOut.bin')); |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
### Functional interfaces |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
=item parse_sav_file(PATHNAME) |
316
|
|
|
|
|
|
|
Load binary SAV-format file PATHNAME, inferring the type from the filename |
317
|
|
|
|
|
|
|
=cut |
318
|
|
|
|
|
|
|
sub parse_sav_file |
319
|
|
|
|
|
|
|
{ |
320
|
|
|
|
|
|
|
my ($path) = @_; |
321
|
|
|
|
|
|
|
my (undef, $dir, $file) = File::Spec->splitpath($path); |
322
|
|
|
|
|
|
|
my $type = $file; |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
$type =~ s/(Out)?\.bin//; |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
if ($type) |
327
|
|
|
|
|
|
|
{ |
328
|
|
|
|
|
|
|
my $rec; |
329
|
|
|
|
|
|
|
my $unpackfmt; |
330
|
|
|
|
|
|
|
my @ret; |
331
|
|
|
|
|
|
|
my $sav_version; |
332
|
|
|
|
|
|
|
my $reclen; |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
open(SAV, $path) || return undef; |
335
|
|
|
|
|
|
|
read(SAV, $rec, 1) || return undef; |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
$sav_version = unpack('C', $rec); |
338
|
|
|
|
|
|
|
# ControlMetrics has variable sized records |
339
|
|
|
|
|
|
|
if ($type eq 'ControlMetrics') |
340
|
|
|
|
|
|
|
{ |
341
|
|
|
|
|
|
|
$reclen = 0; |
342
|
|
|
|
|
|
|
} |
343
|
|
|
|
|
|
|
else |
344
|
|
|
|
|
|
|
{ |
345
|
|
|
|
|
|
|
read(SAV, $rec, 1) || return undef; |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
$reclen = unpack('C', $rec); |
348
|
|
|
|
|
|
|
} |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
local $/ = \$reclen; |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
my @packrules = @{$SAV_FORMAT{$type}}; |
353
|
|
|
|
|
|
|
return undef unless (@packrules); |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
if ($reclen == 0) |
356
|
|
|
|
|
|
|
{ |
357
|
|
|
|
|
|
|
# variable sized records |
358
|
|
|
|
|
|
|
$unpackfmt = "(" . join('', (map { unpack_fmt($_) } @packrules)) . ")*"; |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
else |
361
|
|
|
|
|
|
|
{ |
362
|
|
|
|
|
|
|
# static sized records |
363
|
|
|
|
|
|
|
$unpackfmt = join('', (map { unpack_fmt($_) } @packrules)); |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
# for variable sized records, slurp in the whole file and unpack as a whole. |
367
|
|
|
|
|
|
|
# memory-inefficient, but these files are not particularly large. |
368
|
|
|
|
|
|
|
# for static sized records, go record-by-record. |
369
|
|
|
|
|
|
|
while ($rec = ) |
370
|
|
|
|
|
|
|
{ |
371
|
|
|
|
|
|
|
my @unpacked = unpack($unpackfmt, $rec); |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
while (@unpacked) |
374
|
|
|
|
|
|
|
{ |
375
|
|
|
|
|
|
|
my $parsedrec = {}; |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
foreach my $packrule (@packrules) |
378
|
|
|
|
|
|
|
{ |
379
|
|
|
|
|
|
|
my ($fieldname, $len) = ($packrule->[0], $packrule->[2]); |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
next if !defined($fieldname); |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
if (defined($len) && $len > 1) |
384
|
|
|
|
|
|
|
{ |
385
|
|
|
|
|
|
|
# return array ref |
386
|
|
|
|
|
|
|
$parsedrec->{$fieldname} = [splice(@unpacked, 0, $len)]; |
387
|
|
|
|
|
|
|
} |
388
|
|
|
|
|
|
|
else |
389
|
|
|
|
|
|
|
{ |
390
|
|
|
|
|
|
|
# return scalar |
391
|
|
|
|
|
|
|
$parsedrec->{$fieldname} = shift(@unpacked); |
392
|
|
|
|
|
|
|
} |
393
|
|
|
|
|
|
|
} |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
push @ret, $parsedrec; |
396
|
|
|
|
|
|
|
} |
397
|
|
|
|
|
|
|
} |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
close(SAV); |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
return \@ret; |
402
|
|
|
|
|
|
|
} |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
return undef; |
405
|
|
|
|
|
|
|
} |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
sub parse_runinfo |
408
|
|
|
|
|
|
|
{ |
409
|
|
|
|
|
|
|
my ($path) = @_; |
410
|
|
|
|
|
|
|
my $parsed_ctx; |
411
|
|
|
|
|
|
|
my $numlanes; |
412
|
|
|
|
|
|
|
my @reads; |
413
|
|
|
|
|
|
|
my %ret; |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
$XML_PARSER = XML::LibXML->new(recover=>1) |
416
|
|
|
|
|
|
|
unless (defined($XML_PARSER)); |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
return undef unless ($XML_PARSER); |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
$parsed_ctx = XML::LibXML::XPathContext->new($XML_PARSER->parse_file($path)); |
421
|
|
|
|
|
|
|
return undef unless ($parsed_ctx); |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
$numlanes = $parsed_ctx->findvalue('/RunInfo/Run/FlowcellLayout/@LaneCount'); |
424
|
|
|
|
|
|
|
if (!$numlanes) |
425
|
|
|
|
|
|
|
{ |
426
|
|
|
|
|
|
|
# some (all?) GAII's use a slightly different format to enumerate lanes |
427
|
|
|
|
|
|
|
my @lanes = $parsed_ctx->findnodes('/RunInfo/Run/Tiles/Lane'); |
428
|
|
|
|
|
|
|
$numlanes = scalar(@lanes); |
429
|
|
|
|
|
|
|
} |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
$ret{'numlanes'} = $numlanes; |
432
|
|
|
|
|
|
|
$ret{'runid'} = $parsed_ctx->findvalue('/RunInfo/Run/@Id'); |
433
|
|
|
|
|
|
|
$ret{'run_num'} = $parsed_ctx->findvalue('/RunInfo/Run/@Number'); |
434
|
|
|
|
|
|
|
$ret{'fcid'} = $parsed_ctx->findvalue('/RunInfo/Run/Flowcell') |
435
|
|
|
|
|
|
|
or $parsed_ctx->findvalue('/RunInfo/Run/FlowcellId'); |
436
|
|
|
|
|
|
|
$ret{'instrument'} = $parsed_ctx->findvalue('/RunInfo/Run/Instrument'); |
437
|
|
|
|
|
|
|
$ret{'date'} = $parsed_ctx->findvalue('/RunInfo/Run/Date'); |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
my $read_idx = 0; |
440
|
|
|
|
|
|
|
foreach my $read_ele ($parsed_ctx->findnodes('/RunInfo/Run/Reads/Read')) |
441
|
|
|
|
|
|
|
{ |
442
|
|
|
|
|
|
|
my %parsed_read; |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
$read_idx++; |
445
|
|
|
|
|
|
|
my $first_cycle = $read_ele->getAttribute('FirstCycle'); |
446
|
|
|
|
|
|
|
my $last_cycle = $read_ele->getAttribute('LastCycle'); |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
my $has_idx_child = $read_ele->exists('Index'); |
449
|
|
|
|
|
|
|
my $idx_attr = $read_ele->getAttribute('IsIndexedRead'); |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
# default values are for HiSeqs and MiSeqs |
452
|
|
|
|
|
|
|
$parsed_read{'readnum'} = $read_ele->getAttribute('Number') // $read_idx; |
453
|
|
|
|
|
|
|
$parsed_read{'numcycles'} = $read_ele->getAttribute('NumCycles') // (($last_cycle - $first_cycle) + 1); |
454
|
|
|
|
|
|
|
$parsed_read{'is_index'} = ((defined($idx_attr)?$idx_attr:"") eq 'Y') || ($has_idx_child); |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
push @{$ret{'reads'}}, \%parsed_read; |
457
|
|
|
|
|
|
|
} |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
$ret{'reads'} = [sort { $a->{readnum} <=> $b->{readnum} } @{$ret{'reads'}}]; |
460
|
|
|
|
|
|
|
my $first_cyc = 1; |
461
|
|
|
|
|
|
|
foreach my $read (@{$ret{'reads'}}) |
462
|
|
|
|
|
|
|
{ |
463
|
|
|
|
|
|
|
$read->{'first_cycle'} = $first_cyc; |
464
|
|
|
|
|
|
|
$read->{'last_cycle'} = $first_cyc + $read->{'numcycles'} - 1; |
465
|
|
|
|
|
|
|
$first_cyc = $read->{'last_cycle'} + 1; |
466
|
|
|
|
|
|
|
} |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
if (scalar(grep { !($_->{'is_index'}) } @{$ret{'reads'}}) > 1) |
469
|
|
|
|
|
|
|
{ |
470
|
|
|
|
|
|
|
$ret{'runtype'} = 'PE'; |
471
|
|
|
|
|
|
|
} |
472
|
|
|
|
|
|
|
else |
473
|
|
|
|
|
|
|
{ |
474
|
|
|
|
|
|
|
$ret{'runtype'} = 'SR'; |
475
|
|
|
|
|
|
|
} |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
return \%ret; |
478
|
|
|
|
|
|
|
} |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
### OO-interface |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
sub new |
483
|
|
|
|
|
|
|
{ |
484
|
|
|
|
|
|
|
my ($cls, $rundir) = @_; |
485
|
|
|
|
|
|
|
my $self = { rundir => realpath($rundir) }; |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
bless($self); |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
return $self; |
490
|
|
|
|
|
|
|
} |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
1; |
493
|
|
|
|
|
|
|
|