line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package BioX::Seq::Stream::TwoBit; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
6
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
29
|
|
4
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
13
|
|
|
1
|
|
|
|
|
27
|
|
5
|
1
|
|
|
1
|
|
15
|
use POSIX qw/ceil/; |
|
1
|
|
|
|
|
17
|
|
|
1
|
|
|
|
|
8
|
|
6
|
|
|
|
|
|
|
|
7
|
1
|
|
|
1
|
|
101
|
use parent qw/BioX::Seq::Stream/; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
5
|
|
8
|
|
|
|
|
|
|
|
9
|
1
|
|
|
1
|
|
90
|
use constant MAGIC => 0x1a412743; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
101
|
|
10
|
1
|
|
|
1
|
|
6
|
use constant LE_MAGIC_S => pack('C2', 0x43, 0x27); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
67
|
|
11
|
1
|
|
|
1
|
|
7
|
use constant LE_MAGIC_L => pack('C2', 0x41, 0x1a); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
65
|
|
12
|
1
|
|
|
1
|
|
7
|
use constant BE_MAGIC_S => pack('C2', 0x1a, 0x41); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
54
|
|
13
|
1
|
|
|
1
|
|
6
|
use constant BE_MAGIC_L => pack('C2', 0x27, 0x43); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
1038
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
my @byte_map = map { |
16
|
|
|
|
|
|
|
my $i = $_; join '', map {qw/T C A G/[ vec(chr($i),3-$_,2) ]} 0..3 |
17
|
|
|
|
|
|
|
} 0..255; |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
sub _check_type { |
20
|
|
|
|
|
|
|
|
21
|
15
|
|
|
15
|
|
64
|
my ($class,$self) = @_; |
22
|
15
|
100
|
|
|
|
63
|
return 1 if $self->{buffer} eq LE_MAGIC_S; |
23
|
14
|
50
|
|
|
|
62
|
return 1 if $self->{buffer} eq BE_MAGIC_S; |
24
|
14
|
|
|
|
|
58
|
return 0; |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
} |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
sub _init { |
29
|
|
|
|
|
|
|
|
30
|
1
|
|
|
1
|
|
20
|
my ($self) = @_; |
31
|
|
|
|
|
|
|
|
32
|
1
|
|
|
|
|
27
|
binmode $self->{fh}; |
33
|
1
|
|
|
|
|
5
|
my $fh = $self->{fh}; |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# Determine endianness |
36
|
1
|
|
|
|
|
19
|
my $magic = $self->{buffer} . _safe_read( $fh, 2 ); |
37
|
1
|
0
|
|
|
|
26
|
my $byte_order = unpack('V', $magic) == MAGIC ? 'V' |
|
|
50
|
|
|
|
|
|
38
|
|
|
|
|
|
|
: unpack('N', $magic) == MAGIC ? 'N' |
39
|
|
|
|
|
|
|
: die "File signature check failed"; |
40
|
1
|
|
|
|
|
13
|
$self->{byte_order} = $byte_order; |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
# Unpack rest of header |
43
|
1
|
|
|
|
|
16
|
my ($version, $seq_count, $reserved) = unpack "$byte_order*", |
44
|
|
|
|
|
|
|
_safe_read( $fh, 12 ); |
45
|
1
|
50
|
33
|
|
|
28
|
die "File header check failed" if ($version != 0 || $reserved != 0); |
46
|
1
|
|
|
|
|
5
|
$self->{seq_count} = $seq_count; |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
# Build index |
49
|
1
|
|
|
|
|
4
|
my $last_name; |
50
|
|
|
|
|
|
|
my $buf; |
51
|
1
|
|
|
|
|
0
|
my @index; |
52
|
1
|
|
|
|
|
5
|
for (1..$self->{seq_count}) { |
53
|
4
|
|
|
|
|
11
|
read $fh, $buf, 1; |
54
|
4
|
|
|
|
|
12
|
read $fh, $buf, ord($buf); |
55
|
4
|
|
|
|
|
13
|
my $name = $buf; |
56
|
4
|
|
|
|
|
14
|
read $fh, $buf, 4; |
57
|
4
|
|
|
|
|
15
|
my $offset = unpack $byte_order, $buf; |
58
|
4
|
50
|
|
|
|
32
|
die "$name already defined" if (defined $self->{index}->{$name}); |
59
|
4
|
|
|
|
|
22
|
push @index, [$name, $offset]; |
60
|
|
|
|
|
|
|
} |
61
|
1
|
|
|
|
|
6
|
$self->{index} = [@index]; |
62
|
1
|
|
|
|
|
9
|
$self->{curr_idx} = 0; |
63
|
|
|
|
|
|
|
|
64
|
1
|
|
|
|
|
9
|
return; |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
} |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
sub next_seq { |
69
|
|
|
|
|
|
|
|
70
|
3
|
|
|
3
|
1
|
760
|
my ($self) = @_; |
71
|
|
|
|
|
|
|
|
72
|
3
|
50
|
|
|
|
12
|
return undef if ($self->{curr_idx} >= $self->{seq_count}); |
73
|
|
|
|
|
|
|
|
74
|
3
|
|
|
|
|
14
|
my $seq = $self->_fetch_record( $self->{curr_idx} ); |
75
|
3
|
|
|
|
|
6
|
++$self->{curr_idx}; |
76
|
3
|
|
|
|
|
19
|
return $seq; |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
sub _safe_read { |
81
|
|
|
|
|
|
|
|
82
|
23
|
|
|
23
|
|
51
|
my ($fh, $bytes) = @_; |
83
|
23
|
|
|
|
|
93
|
my $r = read($fh, my $buffer, $bytes); |
84
|
23
|
50
|
|
|
|
60
|
die "Unexpected read length" if ($r != $bytes); |
85
|
23
|
|
|
|
|
122
|
return $buffer; |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
} |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
sub _fetch_record { |
90
|
|
|
|
|
|
|
|
91
|
3
|
|
|
3
|
|
13
|
my ($self, $idx) = @_; |
92
|
|
|
|
|
|
|
|
93
|
3
|
|
|
|
|
7
|
my ($id,$offset) = @{ $self->{index}->[$idx] }; |
|
3
|
|
|
|
|
9
|
|
94
|
3
|
|
|
|
|
7
|
my $byte_order = $self->{byte_order}; |
95
|
3
|
|
|
|
|
5
|
my $fh = $self->{fh}; |
96
|
3
|
|
|
|
|
39
|
seek $fh, $offset, 0; |
97
|
|
|
|
|
|
|
|
98
|
3
|
|
|
|
|
20
|
my $seq_len = unpack "$byte_order*", _safe_read($fh, 4); |
99
|
3
|
|
|
|
|
10
|
my $N_count = unpack "$byte_order*", _safe_read($fh, 4); |
100
|
3
|
|
|
|
|
10
|
my @N_data = unpack "$byte_order*", _safe_read($fh, 4 * $N_count * 2); |
101
|
3
|
|
|
|
|
6
|
my %N_lens; |
102
|
3
|
|
|
|
|
27
|
@N_lens{ @N_data[0..$N_count-1] } = @N_data[$N_count..$#N_data]; |
103
|
|
|
|
|
|
|
|
104
|
3
|
|
|
|
|
12
|
my $mask_count = unpack "$byte_order*", _safe_read($fh, 4); |
105
|
3
|
|
|
|
|
12
|
my @mask_data = unpack "$byte_order*", _safe_read($fh, 4 * $mask_count * 2); |
106
|
3
|
|
|
|
|
7
|
my %mask_lens; |
107
|
3
|
|
|
|
|
25
|
@mask_lens{ @mask_data[0..$mask_count-1] } = @mask_data[$mask_count..$#mask_data]; |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
# reserved field |
110
|
3
|
|
|
|
|
20
|
my $reserved = unpack "$byte_order*", _safe_read($fh, 4); |
111
|
|
|
|
|
|
|
|
112
|
3
|
|
|
|
|
25
|
my $to_read = ceil($seq_len/4); |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
# this is the speed bottleneck, but haven't found a better way yet |
115
|
3
|
|
|
|
|
6
|
my $string; |
116
|
3
|
|
|
|
|
6
|
$string .= $byte_map[$_] for (unpack "C*", _safe_read($fh, $to_read)); |
117
|
|
|
|
|
|
|
|
118
|
3
|
|
|
|
|
10
|
$string = substr $string, 0, $seq_len; |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
# N and mask |
121
|
3
|
|
|
|
|
10
|
for (keys %N_lens) { |
122
|
6
|
|
|
|
|
9
|
my $len = $N_lens{$_}; |
123
|
6
|
|
|
|
|
19
|
substr($string, $_, $len) = 'N' x $len; |
124
|
|
|
|
|
|
|
} |
125
|
3
|
|
|
|
|
8
|
for (keys %mask_lens) { |
126
|
8
|
|
|
|
|
16
|
my $len = $mask_lens{$_}; |
127
|
8
|
|
|
|
|
37
|
substr($string, $_, $len) ^= (' ' x $len); |
128
|
|
|
|
|
|
|
} |
129
|
3
|
|
|
|
|
29
|
return BioX::Seq->new($string, $id, ''); |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
1; |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
__END__ |