line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package BioX::SeqHash; |
2
|
1
|
|
|
1
|
|
57306
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
26
|
|
3
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
24
|
|
4
|
1
|
|
|
1
|
|
4
|
use Exporter 'import'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
436
|
|
5
|
|
|
|
|
|
|
our @EXPORT_OK = qw(fa2hs); |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
our $VERSION = '0.1.1'; # VERSION: |
8
|
|
|
|
|
|
|
# ABSTRACT: get one or more sequences from a FASTA file quickly. |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
sub new { |
12
|
0
|
|
|
0
|
0
|
|
my $class = shift; |
13
|
0
|
|
|
|
|
|
my $self = { |
14
|
|
|
|
|
|
|
@_ |
15
|
|
|
|
|
|
|
}; |
16
|
0
|
0
|
|
|
|
|
$self->{file} || die `pod2text $0`; |
17
|
0
|
|
|
|
|
|
bless $self, $class; |
18
|
|
|
|
|
|
|
} |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
sub fa2hs { |
22
|
0
|
0
|
|
0
|
1
|
|
my $fa = ref $_[0] ? $_[0]->{file} : $_[0]; |
23
|
0
|
0
|
|
|
|
|
open my $file, "<", "$fa" or die "Can not open $fa $!"; |
24
|
0
|
|
|
|
|
|
my (%hs, $name); |
25
|
0
|
|
|
|
|
|
while (my $line = <$file>) { |
26
|
0
|
|
|
|
|
|
chomp($line); |
27
|
0
|
0
|
|
|
|
|
if ($line =~/^>(.+?)\s/) { |
28
|
0
|
|
|
|
|
|
$name = $1; |
29
|
|
|
|
|
|
|
} else { |
30
|
0
|
|
|
|
|
|
$hs{$name} .= $line; |
31
|
|
|
|
|
|
|
} |
32
|
|
|
|
|
|
|
} |
33
|
0
|
|
|
|
|
|
close($file); |
34
|
0
|
|
|
|
|
|
return \%hs; |
35
|
|
|
|
|
|
|
} |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
sub _initialize { |
38
|
0
|
|
|
0
|
|
|
my $self = shift; |
39
|
0
|
|
|
|
|
|
$self->{id2seq} = $self->fa2hs(); |
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
sub get_id_seq { |
44
|
0
|
|
|
0
|
1
|
|
my ($self, $id) = @_; |
45
|
0
|
0
|
|
|
|
|
$self->_initialize() unless (exists $self->{id2seq}); |
46
|
0
|
|
|
|
|
|
return ">$id\n" . $self->{id2seq}->{$id} . "\n"; |
47
|
|
|
|
|
|
|
} |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
sub get_seq { |
51
|
0
|
|
|
0
|
1
|
|
my ($self, $id) = @_; |
52
|
0
|
0
|
|
|
|
|
$self->_initialize() unless (exists $self->{id2seq}); |
53
|
0
|
|
|
|
|
|
return $self->{id2seq}{$id}; |
54
|
|
|
|
|
|
|
} |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
sub get_seqs_batch { |
58
|
0
|
|
|
0
|
1
|
|
my ($self, $id_file, $outfile) = @_; |
59
|
0
|
0
|
|
|
|
|
open my $IN, "<", $id_file or die "Can not open $id_file $!"; |
60
|
0
|
|
|
|
|
|
my @ids = <$IN>; |
61
|
0
|
|
|
|
|
|
chomp @ids; |
62
|
0
|
0
|
|
|
|
|
open my $OUT, ">", $outfile or die "Can not open $outfile $!"; |
63
|
0
|
|
|
|
|
|
for my $id (@ids) { |
64
|
0
|
|
|
|
|
|
print $OUT $self->get_id_seq($id); |
65
|
|
|
|
|
|
|
} |
66
|
|
|
|
|
|
|
} |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# may be use later |
69
|
|
|
|
0
|
0
|
|
sub rename_seqs { |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
} |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
1; |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
__END__ |