line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# $Id: gmap_f9.pm 15987 2009-08-18 21:08:55Z lstein $ |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# BioPerl module for Bio::SearchIO::gmap_f9 |
4
|
|
|
|
|
|
|
# |
5
|
|
|
|
|
|
|
# Cared for by George Hartzell |
6
|
|
|
|
|
|
|
# |
7
|
|
|
|
|
|
|
# Copyright George Hartzell |
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=head1 NAME |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
Bio::SearchIO::gmap_f9 - Event generator for parsing gmap reports (Z format) |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 SYNOPSIS |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
# Do not use this object directly - it is used as part of the |
20
|
|
|
|
|
|
|
# Bio::SearchIO system. |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
use Bio::SearchIO; |
23
|
|
|
|
|
|
|
my $searchio = Bio::SearchIO->new(-format => 'gmap', |
24
|
|
|
|
|
|
|
-file => 't/data/her2.gmapz'); |
25
|
|
|
|
|
|
|
while( my $result = $searchio->next_result ) { |
26
|
|
|
|
|
|
|
while( my $hit = $result->next_hit ) { |
27
|
|
|
|
|
|
|
while( my $hsp = $hit->next_hsp ) { |
28
|
|
|
|
|
|
|
# ... |
29
|
|
|
|
|
|
|
} |
30
|
|
|
|
|
|
|
} |
31
|
|
|
|
|
|
|
} |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=head1 DESCRIPTION |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
This object encapsulated the necessary methods for generating events |
37
|
|
|
|
|
|
|
suitable for building Bio::Search objects from a GMAP "compressed" |
38
|
|
|
|
|
|
|
report (from gmap run with -Z flag) Read the L for more |
39
|
|
|
|
|
|
|
information about how to use this. |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=head2 REVERSE STRAND AND BIOPERL COORDINATES |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
I believe that I'm doing the correct thing when reporting hits on the |
44
|
|
|
|
|
|
|
negative strand of the genome. In particular, I've compared the |
45
|
|
|
|
|
|
|
"exons" this code generates with the set returned by ncbi's megablast |
46
|
|
|
|
|
|
|
web service. NCBI's hsp's are ordered differently and have a |
47
|
|
|
|
|
|
|
different genomic location (off by ~18,000,000 bases, padding?) but |
48
|
|
|
|
|
|
|
the starts, ends, and lengths were similar and my strand handling |
49
|
|
|
|
|
|
|
matches theirs. E.g. |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
CDNA GENOME |
52
|
|
|
|
|
|
|
start end strand start end strand |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
blast |
55
|
|
|
|
|
|
|
1913 2989 1 86236731 86237808 -1 |
56
|
|
|
|
|
|
|
1 475 1 86260509 86260983 -1 |
57
|
|
|
|
|
|
|
1510 1727 1 86240259 86240476 -1 |
58
|
|
|
|
|
|
|
841 989 1 86243034 86243182 -1 |
59
|
|
|
|
|
|
|
1381 1514 1 86240630 86240763 -1 |
60
|
|
|
|
|
|
|
989 1122 1 86242457 86242590 -1 |
61
|
|
|
|
|
|
|
599 729 1 86247470 86247600 -1 |
62
|
|
|
|
|
|
|
473 608 1 86259972 86260107 -1 |
63
|
|
|
|
|
|
|
1255 1382 1 86240837 86240964 -1 |
64
|
|
|
|
|
|
|
730 842 1 86244040 86244152 -1 |
65
|
|
|
|
|
|
|
1813 1921 1 86238123 86238231 -1 |
66
|
|
|
|
|
|
|
1725 1814 1 86239747 86239836 -1 |
67
|
|
|
|
|
|
|
1167 1256 1 86241294 86241383 -1 |
68
|
|
|
|
|
|
|
1120 1188 1 86242319 86242387 -1 |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
gmap |
71
|
|
|
|
|
|
|
1 475 1 104330509 104330983 -1 |
72
|
|
|
|
|
|
|
476 600 1 104329980 104330104 -1 |
73
|
|
|
|
|
|
|
601 729 1 104317470 104317598 -1 |
74
|
|
|
|
|
|
|
730 841 1 104314041 104314152 -1 |
75
|
|
|
|
|
|
|
842 989 1 104313034 104313181 -1 |
76
|
|
|
|
|
|
|
990 1121 1 104312458 104312589 -1 |
77
|
|
|
|
|
|
|
1122 1187 1 104312320 104312385 -1 |
78
|
|
|
|
|
|
|
1188 1256 1 104311294 104311362 -1 |
79
|
|
|
|
|
|
|
1257 1382 1 104310837 104310962 -1 |
80
|
|
|
|
|
|
|
1383 1511 1 104310633 104310761 -1 |
81
|
|
|
|
|
|
|
1512 1726 1 104310260 104310474 -1 |
82
|
|
|
|
|
|
|
1727 1814 1 104309747 104309834 -1 |
83
|
|
|
|
|
|
|
1815 1917 1 104308127 104308229 -1 |
84
|
|
|
|
|
|
|
1918 2989 1 104306731 104307802 -1 |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head1 FEEDBACK |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=head2 Mailing Lists |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
91
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to |
92
|
|
|
|
|
|
|
the Bioperl mailing list. Your participation is much appreciated. |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
95
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
=head2 Reporting Bugs |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
100
|
|
|
|
|
|
|
of the bugs and their resolution. Bug reports can be submitted via |
101
|
|
|
|
|
|
|
the web: |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=head1 AUTHOR - George Hartzell |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
Email hartzell@alerce.com |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
Additional contributors names and emails here |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=head1 APPENDIX |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
The rest of the documentation details each of the object methods. |
116
|
|
|
|
|
|
|
Internal methods are usually preceded with an underscore (_). |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=cut |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
# Let the code begin... |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
package Bio::SearchIO::gmap_f9; |
125
|
1
|
|
|
1
|
|
4
|
use strict; |
|
1
|
|
|
|
|
0
|
|
|
1
|
|
|
|
|
24
|
|
126
|
1
|
|
|
1
|
|
2
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
20
|
|
127
|
|
|
|
|
|
|
|
128
|
1
|
|
|
1
|
|
418
|
use Bio::Search::Hit::GenericHit; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
25
|
|
129
|
1
|
|
|
1
|
|
442
|
use Bio::Search::HSP::GenericHSP; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
39
|
|
130
|
|
|
|
|
|
|
|
131
|
1
|
|
|
1
|
|
5
|
use base qw(Bio::SearchIO ); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
66
|
|
132
|
|
|
|
|
|
|
|
133
|
1
|
|
|
1
|
|
5
|
use Data::Dumper; |
|
1
|
|
|
|
|
0
|
|
|
1
|
|
|
|
|
954
|
|
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=head2 next_result |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
Title : next_result |
138
|
|
|
|
|
|
|
Usage : $result = stream->next_result |
139
|
|
|
|
|
|
|
Function: Reads the next ResultI object from the stream and returns it. |
140
|
|
|
|
|
|
|
Returns : A Bio::Search::Result::ResultI object |
141
|
|
|
|
|
|
|
Args : n/a |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=cut |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
sub next_result { |
146
|
62
|
|
|
62
|
1
|
124
|
my $self = shift; |
147
|
|
|
|
|
|
|
|
148
|
62
|
|
|
|
|
78
|
my $info = []; |
149
|
62
|
|
|
|
|
63
|
my $result; |
150
|
|
|
|
|
|
|
my $hit; |
151
|
0
|
|
|
|
|
0
|
my @hsp_info; |
152
|
0
|
|
|
|
|
0
|
my $previous_hit_pos; |
153
|
|
|
|
|
|
|
|
154
|
62
|
|
|
|
|
163
|
while ( $_ = $self->_readline ) { |
155
|
86654
|
100
|
|
|
|
97153
|
if ( $_ =~ /^>/ ) { # looking at the start of a result |
156
|
118
|
100
|
|
|
|
212
|
if ($result) { # and done if there's one in progress |
157
|
57
|
|
|
|
|
186
|
$self->_pushback($_); |
158
|
57
|
|
|
|
|
826
|
goto DONE; |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
else { # otherwise start a new one. |
161
|
61
|
|
|
|
|
416
|
my ($id, $desc, $md5) = m|>([^ ]*)\s*(.*)\s*(?:md5:(.*))?|; |
162
|
|
|
|
|
|
|
|
163
|
61
|
|
|
|
|
280
|
$result = Bio::Search::Result::GenericResult->new(); |
164
|
61
|
|
|
|
|
187
|
$result->algorithm('gmap'); |
165
|
61
|
|
|
|
|
174
|
$result->query_name($id); |
166
|
61
|
|
|
|
|
147
|
$result->query_accession($id); |
167
|
61
|
|
|
|
|
164
|
$result->query_description($desc); |
168
|
|
|
|
|
|
|
#$self->warn("Take care of MD5!\n"); |
169
|
|
|
|
|
|
|
|
170
|
61
|
|
33
|
|
|
319
|
$hit ||= Bio::Search::Hit::GenericHit->new( -name => |
171
|
|
|
|
|
|
|
"NONE_SPECIFIED"); |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
else { # add another position to the hit/hsp |
175
|
|
|
|
|
|
|
# 468 H 1956 C -14:104307764 2298317517 C H |
176
|
|
|
|
|
|
|
# 468 1957 A -14:104307763 2298317516 A |
177
|
86536
|
|
|
|
|
51492
|
my $c; # info about a column |
178
|
|
|
|
|
|
|
($c->{query_aa_pos}, $c->{query_aa}, $c->{query_pos}, |
179
|
|
|
|
|
|
|
$c->{query_base}, |
180
|
|
|
|
|
|
|
$c->{hit_strand}, $c->{hit_chromo}, $c->{hit_pos}, |
181
|
|
|
|
|
|
|
$c->{hit_concat_pos}, $c->{hit_base}, $c->{hit_aa}) |
182
|
86536
|
|
|
|
|
576883
|
= ($_ =~ |
183
|
|
|
|
|
|
|
m| |
184
|
|
|
|
|
|
|
(\d+)[ ]?(.)?[\t] |
185
|
|
|
|
|
|
|
(\d+)[ ]?(.)?[\t] |
186
|
|
|
|
|
|
|
# TODO chromosome isn't a number... X, Y, MT.... |
187
|
|
|
|
|
|
|
(\+\|\-)([\dxXyY]+\|MT):(\d+)[ ](\d+)[ ](.) |
188
|
|
|
|
|
|
|
[\t]?(.)? |
189
|
|
|
|
|
|
|
|xo |
190
|
|
|
|
|
|
|
); |
191
|
|
|
|
|
|
|
|
192
|
86536
|
100
|
100
|
|
|
325045
|
if ($previous_hit_pos && |
193
|
|
|
|
|
|
|
(abs($c->{hit_pos} - $previous_hit_pos) > 1)) { |
194
|
541
|
|
33
|
|
|
928
|
$hit ||= Bio::Search::Hit::GenericHit->new( -name => |
195
|
|
|
|
|
|
|
"NONE_SPECIFIED", |
196
|
|
|
|
|
|
|
); |
197
|
541
|
|
|
|
|
1379
|
$hit->add_hsp( $self->_hsp_from_info(\@hsp_info) ); |
198
|
541
|
|
|
|
|
58869
|
@hsp_info = (); |
199
|
|
|
|
|
|
|
} |
200
|
86536
|
|
|
|
|
65951
|
push @hsp_info, $c; |
201
|
86536
|
|
|
|
|
162936
|
$previous_hit_pos = $c->{hit_pos}; |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
DONE: |
206
|
62
|
100
|
|
|
|
166
|
if ($result) { |
207
|
61
|
50
|
|
|
|
308
|
$hit->add_hsp( $self->_hsp_from_info(\@hsp_info) ) if (@hsp_info); |
208
|
|
|
|
|
|
|
|
209
|
61
|
|
|
|
|
84
|
my ($hit_length,$query_length); |
210
|
61
|
|
|
|
|
289
|
for my $hsp ($hit->hsps) { |
211
|
602
|
|
|
|
|
1083
|
$hit_length += $hsp->length(); |
212
|
602
|
|
|
|
|
911
|
$query_length += $hsp->length('query'); |
213
|
|
|
|
|
|
|
} |
214
|
61
|
|
|
|
|
314
|
$hit->length($hit_length); |
215
|
61
|
|
|
|
|
216
|
$hit->query_length($query_length); |
216
|
|
|
|
|
|
|
# update this now that we actually know something useful.q |
217
|
61
|
|
|
|
|
228
|
$hit->name($hsp_info[0]->{hit_chromo}); |
218
|
|
|
|
|
|
|
|
219
|
61
|
50
|
|
|
|
344
|
$result->add_hit($hit) if ($hit); |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
|
222
|
62
|
|
|
|
|
13614
|
return($result); |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
sub _hsp_from_info { |
228
|
602
|
|
|
602
|
|
579
|
my $self = shift; |
229
|
602
|
|
|
|
|
399
|
my $info = shift; |
230
|
602
|
|
|
|
|
677
|
my $a = {}; # args w/ which we'll create hsp |
231
|
602
|
|
|
|
|
502
|
my $hsp; |
232
|
|
|
|
|
|
|
my $identical; |
233
|
|
|
|
|
|
|
|
234
|
602
|
|
|
|
|
1135
|
$a->{-algorithm} = 'GMAP'; |
235
|
|
|
|
|
|
|
|
236
|
602
|
|
|
|
|
596
|
for my $c (@{$info}) { |
|
602
|
|
|
|
|
1190
|
|
237
|
86536
|
|
|
|
|
79680
|
$a->{-query_seq} .= $c->{query_base}; |
238
|
86536
|
|
|
|
|
70419
|
$a->{-hit_seq} .= $c->{hit_base}; |
239
|
86536
|
100
|
|
|
|
105394
|
$a->{-homology_seq} .= $c->{query_base} eq $c->{hit_base} ? $c->{hit_base} : ' '; |
240
|
86536
|
100
|
|
|
|
110984
|
$identical++ if ( $c->{query_base} eq $c->{hit_base} ); |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
|
243
|
602
|
|
|
|
|
1244
|
$a->{-query_seq} =~ s| |\-|g; # switch to bioperl gaps. |
244
|
602
|
|
|
|
|
961
|
$a->{-hit_seq} =~ s| |\-|g; |
245
|
|
|
|
|
|
|
|
246
|
602
|
|
|
|
|
1515
|
$a->{-conserved} = $a->{-identical} = $identical; |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
# use the coordinates from from gmap's -f 9 output to |
249
|
|
|
|
|
|
|
# determine whether gmap revcomped the query sequence |
250
|
|
|
|
|
|
|
# to generate the alignment. Note that this is not |
251
|
|
|
|
|
|
|
# the same as the cDNA's sense/anti-sense-ness. |
252
|
602
|
|
|
|
|
1014
|
$a->{-stranded} = 'both'; |
253
|
|
|
|
|
|
|
|
254
|
602
|
|
|
|
|
1458
|
$a->{-query_start} = $info->[0]->{query_pos}; |
255
|
602
|
|
|
|
|
1129
|
$a->{-query_end} = $info->[-1]->{query_pos}; |
256
|
602
|
|
|
|
|
1711
|
$a->{-query_length} = $a->{-query_end} - $a->{-query_start} + 1; |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
# hit can be either strand, -f 9 output tells us which. |
259
|
|
|
|
|
|
|
# we don't have to worry about it here, but telling the generichsp code |
260
|
|
|
|
|
|
|
# that this hit is 'stranded', it compares the start and end positions |
261
|
|
|
|
|
|
|
# sets it for us. |
262
|
602
|
|
|
|
|
940
|
$a->{-hit_start} = $info->[0]->{hit_pos}; |
263
|
602
|
|
|
|
|
872
|
$a->{-hit_end} = $info->[-1]->{hit_pos}; |
264
|
|
|
|
|
|
|
|
265
|
602
|
|
|
|
|
1311
|
$a->{-hit_length} = abs($a->{-hit_end} - $a->{-hit_start}) + 1; |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
$a->{-hsp_length} = |
268
|
|
|
|
|
|
|
$a->{-query_length} > $a->{-hit_length} ? |
269
|
602
|
50
|
|
|
|
1753
|
$a->{-query_length} : $a->{-hit_length}; |
270
|
|
|
|
|
|
|
|
271
|
602
|
|
|
|
|
3667
|
$hsp = Bio::Search::HSP::GenericHSP->new( %$a ); |
272
|
|
|
|
|
|
|
|
273
|
602
|
|
|
|
|
3465
|
return $hsp; |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
# TODO (adjust regexp to swallow lines w/out md5 sig's. |
277
|
|
|
|
|
|
|
sub _parse_path_header { |
278
|
0
|
|
|
0
|
|
|
my $self = shift; |
279
|
0
|
|
|
|
|
|
my $path_line = shift; |
280
|
0
|
|
|
|
|
|
my $path = {}; |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
( |
283
|
|
|
|
|
|
|
$path->{query}, |
284
|
|
|
|
|
|
|
$path->{db}, |
285
|
|
|
|
|
|
|
$path->{path_num}, |
286
|
|
|
|
|
|
|
$path->{path_total_num}, |
287
|
|
|
|
|
|
|
$path->{query_length}, |
288
|
|
|
|
|
|
|
$path->{exon_count}, |
289
|
|
|
|
|
|
|
$path->{trimmed_coverage}, |
290
|
|
|
|
|
|
|
$path->{percent_identity}, |
291
|
|
|
|
|
|
|
$path->{query_start}, |
292
|
|
|
|
|
|
|
$path->{query_end}, |
293
|
|
|
|
|
|
|
$path->{whole_genome_start}, |
294
|
|
|
|
|
|
|
$path->{whole_genome_end}, |
295
|
|
|
|
|
|
|
$path->{chromosome}, |
296
|
|
|
|
|
|
|
$path->{chromo_start}, |
297
|
|
|
|
|
|
|
$path->{chromo_end}, |
298
|
|
|
|
|
|
|
$path->{strand}, |
299
|
|
|
|
|
|
|
$path->{sense}, |
300
|
|
|
|
|
|
|
$path->{md5}, |
301
|
0
|
|
|
|
|
|
) = |
302
|
|
|
|
|
|
|
($_ =~ qr| |
303
|
|
|
|
|
|
|
> |
304
|
|
|
|
|
|
|
([^ ]*)[ ] # the query id}, followed by a space |
305
|
|
|
|
|
|
|
([^ ]*)[ ] # the genome database, followed by a space |
306
|
|
|
|
|
|
|
(\d+)/(\d+)[ ] # path_num/path_total_num (e.g. 3/12) |
307
|
|
|
|
|
|
|
(\d+)[ ] # query length, followed by a space |
308
|
|
|
|
|
|
|
(\d+)[ ] # hsp/exon count, followed by a space |
309
|
|
|
|
|
|
|
(\d+\.\d*)[ ] # trimmed coverage |
310
|
|
|
|
|
|
|
(\d+\.\d*)[ ] # percent identity |
311
|
|
|
|
|
|
|
(\d+)\.\.(\d+)[ ] # query start .. query end, followed by space |
312
|
|
|
|
|
|
|
(\d+)\.\.(\d+)[ ] # whole genome s..e, followed by space |
313
|
|
|
|
|
|
|
(\d+): # chromosome number |
314
|
|
|
|
|
|
|
(\d+)\.\.(\d+)[ ] # chromo s..e, followed by a space |
315
|
|
|
|
|
|
|
([+-])[ ] # strand, followed by a space |
316
|
|
|
|
|
|
|
dir:(.*) # dir:sense or dir:antisense |
317
|
|
|
|
|
|
|
[ ]md5:([\dabcdefg]+) # md5 signature |
318
|
|
|
|
|
|
|
|x |
319
|
|
|
|
|
|
|
); |
320
|
|
|
|
|
|
|
|
321
|
0
|
0
|
|
|
|
|
$path->{query} or $self->throw("query was not found in path line."); |
322
|
0
|
0
|
|
|
|
|
$path->{db} or $self->throw("db was not found in path line."); |
323
|
0
|
0
|
|
|
|
|
$path->{path_num} or $self->throw("path_num was not found in path line."); |
324
|
|
|
|
|
|
|
$path->{path_total_num} or |
325
|
0
|
0
|
|
|
|
|
$self->throw("path_total_num was not found in path line."); |
326
|
|
|
|
|
|
|
$path->{query_length} or |
327
|
0
|
0
|
|
|
|
|
$self->throw("query_length was not found in path line."); |
328
|
|
|
|
|
|
|
$path->{exon_count} or |
329
|
0
|
0
|
|
|
|
|
$self->throw("exon_count was not found in path line."); |
330
|
|
|
|
|
|
|
$path->{trimmed_coverage} or |
331
|
0
|
0
|
|
|
|
|
$self->throw("trimmed_coverage was not found in path line."); |
332
|
|
|
|
|
|
|
$path->{percent_identity} or |
333
|
0
|
0
|
|
|
|
|
$self->throw("percent_identity was not found in path line."); |
334
|
|
|
|
|
|
|
$path->{query_start} or |
335
|
0
|
0
|
|
|
|
|
$self->throw("query_start was not found in path line."); |
336
|
|
|
|
|
|
|
$path->{query_end} or |
337
|
0
|
0
|
|
|
|
|
$self->throw("query_end was not found in path line."); |
338
|
|
|
|
|
|
|
$path->{whole_genome_start} or |
339
|
0
|
0
|
|
|
|
|
$self->throw("whole_genome_start was not found in path line."); |
340
|
|
|
|
|
|
|
$path->{whole_genome_end} or |
341
|
0
|
0
|
|
|
|
|
$self->throw("whole_genome_end was not found in path line."); |
342
|
|
|
|
|
|
|
$path->{chromosome} or |
343
|
0
|
0
|
|
|
|
|
$self->throw("chromosome was not found in path line."); |
344
|
|
|
|
|
|
|
$path->{chromo_start} or |
345
|
0
|
0
|
|
|
|
|
$self->throw("chromo_start was not found in path line."); |
346
|
|
|
|
|
|
|
$path->{chromo_end} or |
347
|
0
|
0
|
|
|
|
|
$self->throw("chromo_end was not found in path line."); |
348
|
0
|
0
|
|
|
|
|
$path->{strand} or $self->throw("strand was not found in path line."); |
349
|
0
|
0
|
|
|
|
|
$path->{sense} or $self->throw("sense was not found in path line."); |
350
|
|
|
|
|
|
|
|
351
|
0
|
|
|
|
|
|
return $path; |
352
|
|
|
|
|
|
|
} |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
sub _parse_alignment_line { |
355
|
0
|
|
|
0
|
|
|
my $self = shift; |
356
|
0
|
|
|
|
|
|
my $a_line = shift; |
357
|
0
|
|
|
|
|
|
my $align = {}; |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
( |
360
|
|
|
|
|
|
|
$align->{chromo_start}, |
361
|
|
|
|
|
|
|
$align->{chromo_end}, |
362
|
|
|
|
|
|
|
$align->{query_start}, |
363
|
|
|
|
|
|
|
$align->{query_end}, |
364
|
|
|
|
|
|
|
$align->{percent_identity}, |
365
|
|
|
|
|
|
|
$align->{align_length}, |
366
|
|
|
|
|
|
|
$align->{intron_length}, |
367
|
0
|
|
|
|
|
|
) = |
368
|
|
|
|
|
|
|
($_ =~ qr| |
369
|
|
|
|
|
|
|
[\t] |
370
|
|
|
|
|
|
|
([\d]+)[ ] # start in chromosome coord. |
371
|
|
|
|
|
|
|
([\d]+)[ ] # end in chromosome coord. |
372
|
|
|
|
|
|
|
([\d]+)[ ] # start in query coord. |
373
|
|
|
|
|
|
|
([\d]+)[ ] # end in query coord. |
374
|
|
|
|
|
|
|
([\d]+) # percent identity (as integer) |
375
|
|
|
|
|
|
|
[\t].*[\t] # skip the edit script |
376
|
|
|
|
|
|
|
([\d]+) # length of alignment block. |
377
|
|
|
|
|
|
|
[\t]*([\d]+)* # length of following intron. |
378
|
|
|
|
|
|
|
|x |
379
|
|
|
|
|
|
|
); |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
$align->{chromo_start} |
382
|
0
|
0
|
|
|
|
|
or $self->throw("chromo_start missing in alignment line."); |
383
|
|
|
|
|
|
|
$align->{chromo_end}, |
384
|
0
|
0
|
|
|
|
|
or $self->throw("chromo_end was missing in alignment line."); |
385
|
|
|
|
|
|
|
$align->{query_start}, |
386
|
0
|
0
|
|
|
|
|
or $self->throw("query_start was missing in alignment line."); |
387
|
|
|
|
|
|
|
$align->{query_end}, |
388
|
0
|
0
|
|
|
|
|
or $self->throw("query_end was missing in alignment line."); |
389
|
|
|
|
|
|
|
$align->{percent_identity}, |
390
|
0
|
0
|
|
|
|
|
or $self->throw("percent_identity was missing in alignment line."); |
391
|
|
|
|
|
|
|
$align->{align_length}, |
392
|
0
|
0
|
|
|
|
|
or $self->throw("align_length was missing in alignment line."); |
393
|
|
|
|
|
|
|
|
394
|
0
|
|
|
|
|
|
return $align; |
395
|
|
|
|
|
|
|
} |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
1; |