line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::MUST::Drivers::CdHit; |
2
|
|
|
|
|
|
|
# ABSTRACT: Bio::MUST driver for running the CD-HIT program |
3
|
|
|
|
|
|
|
# CONTRIBUTOR: Amandine BERTRAND <amandine.bertrand@doct.uliege.be> |
4
|
|
|
|
|
|
|
$Bio::MUST::Drivers::CdHit::VERSION = '0.193030'; |
5
|
6
|
|
|
6
|
|
4182940
|
use Moose; |
|
6
|
|
|
|
|
21
|
|
|
6
|
|
|
|
|
44
|
|
6
|
6
|
|
|
6
|
|
40795
|
use namespace::autoclean; |
|
6
|
|
|
|
|
16
|
|
|
6
|
|
|
|
|
72
|
|
7
|
|
|
|
|
|
|
|
8
|
6
|
|
|
6
|
|
580
|
use autodie; |
|
6
|
|
|
|
|
14
|
|
|
6
|
|
|
|
|
49
|
|
9
|
6
|
|
|
6
|
|
31896
|
use feature qw(say); |
|
6
|
|
|
|
|
13
|
|
|
6
|
|
|
|
|
529
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
# use Smart::Comments; |
12
|
|
|
|
|
|
|
|
13
|
6
|
|
|
6
|
|
77
|
use Carp; |
|
6
|
|
|
|
|
15
|
|
|
6
|
|
|
|
|
597
|
|
14
|
6
|
|
|
6
|
|
40
|
use IPC::System::Simple qw(system); |
|
6
|
|
|
|
|
14
|
|
|
6
|
|
|
|
|
323
|
|
15
|
6
|
|
|
6
|
|
41
|
use Module::Runtime qw(use_module); |
|
6
|
|
|
|
|
12
|
|
|
6
|
|
|
|
|
51
|
|
16
|
6
|
|
|
6
|
|
596
|
use Path::Class qw(file); |
|
6
|
|
|
|
|
11
|
|
|
6
|
|
|
|
|
299
|
|
17
|
6
|
|
|
6
|
|
44
|
use Tie::IxHash; |
|
6
|
|
|
|
|
12
|
|
|
6
|
|
|
|
|
225
|
|
18
|
|
|
|
|
|
|
|
19
|
6
|
|
|
6
|
|
51
|
use Bio::MUST::Core; |
|
6
|
|
|
|
|
10
|
|
|
6
|
|
|
|
|
252
|
|
20
|
|
|
|
|
|
|
extends 'Bio::MUST::Core::Ali::Temporary'; |
21
|
|
|
|
|
|
|
|
22
|
6
|
|
|
6
|
|
569
|
use Bio::MUST::Drivers::Utils qw(stringify_args); |
|
6
|
|
|
|
|
27
|
|
|
6
|
|
|
|
|
334
|
|
23
|
6
|
|
|
6
|
|
37
|
use aliased 'Bio::MUST::Core::Ali'; |
|
6
|
|
|
|
|
14
|
|
|
6
|
|
|
|
|
93
|
|
24
|
6
|
|
|
6
|
|
1353
|
use aliased 'Bio::MUST::Core::SeqId'; |
|
6
|
|
|
|
|
13
|
|
|
6
|
|
|
|
|
21
|
|
25
|
6
|
|
|
6
|
|
1163
|
use aliased 'Bio::FastParsers::CdHit'; |
|
6
|
|
|
|
|
18
|
|
|
6
|
|
|
|
|
24
|
|
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
has 'cdhit_args' => ( |
29
|
|
|
|
|
|
|
is => 'ro', |
30
|
|
|
|
|
|
|
isa => 'HashRef', |
31
|
|
|
|
|
|
|
default => sub { {} }, |
32
|
|
|
|
|
|
|
); |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
has '_cluster_seq_ids' => ( |
35
|
|
|
|
|
|
|
traits => ['Hash'], |
36
|
|
|
|
|
|
|
is => 'ro', |
37
|
|
|
|
|
|
|
isa => 'HashRef[ArrayRef[Bio::MUST::Core::SeqId]]', |
38
|
|
|
|
|
|
|
init_arg => undef, |
39
|
|
|
|
|
|
|
writer => '_set_cluster_seq_ids', |
40
|
|
|
|
|
|
|
handles => { |
41
|
|
|
|
|
|
|
all_cluster_names => 'keys', |
42
|
|
|
|
|
|
|
all_cluster_seq_ids => 'values', |
43
|
|
|
|
|
|
|
seq_ids_for => 'get', |
44
|
|
|
|
|
|
|
}, |
45
|
|
|
|
|
|
|
); |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
has '_representatives' => ( |
48
|
|
|
|
|
|
|
is => 'ro', |
49
|
|
|
|
|
|
|
isa => 'Bio::MUST::Core::Ali', |
50
|
|
|
|
|
|
|
init_arg => undef, |
51
|
|
|
|
|
|
|
writer => '_set_representatives', |
52
|
|
|
|
|
|
|
handles => { |
53
|
|
|
|
|
|
|
all_representatives => 'all_seqs', |
54
|
|
|
|
|
|
|
count_representatives => 'count_seqs', |
55
|
|
|
|
|
|
|
get_representative_with_id => 'get_seq_with_id', # useless? |
56
|
|
|
|
|
|
|
}, |
57
|
|
|
|
|
|
|
); |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
has '_representative_for' => ( |
60
|
|
|
|
|
|
|
traits => ['Hash'], |
61
|
|
|
|
|
|
|
is => 'ro', |
62
|
|
|
|
|
|
|
isa => 'HashRef[Bio::MUST::Core::Seq]', |
63
|
|
|
|
|
|
|
init_arg => undef, |
64
|
|
|
|
|
|
|
lazy => 1, |
65
|
|
|
|
|
|
|
builder => '_build_representative_for', |
66
|
|
|
|
|
|
|
handles => { |
67
|
|
|
|
|
|
|
all_member_names => 'keys', |
68
|
|
|
|
|
|
|
representative_for => 'get', |
69
|
|
|
|
|
|
|
}, |
70
|
|
|
|
|
|
|
); |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
## no critic (ProhibitUnusedPrivateSubroutines) |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
sub _build_representative_for { |
75
|
0
|
|
|
0
|
|
|
my $self = shift; |
76
|
|
|
|
|
|
|
|
77
|
0
|
|
|
|
|
|
my %representative_for; |
78
|
0
|
|
|
|
|
|
for my $repr ( $self->all_cluster_names ) { |
79
|
0
|
|
|
|
|
|
for my $id ( @{ $self->seq_ids_for($repr) } ) { |
|
0
|
|
|
|
|
|
|
80
|
0
|
|
|
|
|
|
$representative_for{ $id->full_id } |
81
|
|
|
|
|
|
|
= $self->get_representative_with_id($repr); |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
} |
84
|
|
|
|
|
|
|
|
85
|
0
|
|
|
|
|
|
return \%representative_for; |
86
|
|
|
|
|
|
|
} |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
## use critic |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
sub BUILD { |
91
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
# provision executable |
94
|
0
|
|
|
|
|
|
my $app = use_module('Bio::MUST::Provision::CdHit')->new; |
95
|
0
|
|
|
|
|
|
$app->meet(); |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
# setup output files |
98
|
0
|
|
|
|
|
|
my $infile = $self->filename; |
99
|
0
|
|
|
|
|
|
my $basename = $infile . '.cdhit'; |
100
|
0
|
|
|
|
|
|
my $outfile = $basename . '.out'; |
101
|
0
|
|
|
|
|
|
my $outfile_clstr = $basename . '.out.clstr'; |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# format cd-hit (optional) arguments |
104
|
0
|
|
|
|
|
|
my $args = $self->cdhit_args; |
105
|
0
|
|
|
|
|
|
my $args_str = stringify_args($args); |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
# create cd-hit (or cd-hit-est) command |
108
|
0
|
0
|
|
|
|
|
my $pgm = $self->type eq 'prot' ? 'cd-hit' : 'cd-hit-est'; |
109
|
0
|
|
|
|
|
|
my $cmd = "$pgm -i $infile -o $outfile $args_str > /dev/null 2> /dev/null"; |
110
|
|
|
|
|
|
|
#### $cmd |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
# try to robustly execute cd-hit |
113
|
0
|
|
|
|
|
|
my $ret_code = system( [ 0, 127 ], $cmd); |
114
|
0
|
0
|
|
|
|
|
if ($ret_code == 127) { |
115
|
0
|
|
|
|
|
|
carp "[BMD] Warning: cannot execute $pgm command;" |
116
|
|
|
|
|
|
|
. ' returning without contigs!'; |
117
|
0
|
|
|
|
|
|
return; |
118
|
|
|
|
|
|
|
} |
119
|
|
|
|
|
|
|
# TODO: try to bypass shell (need for absolute path to executable then) |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
# parse output file |
122
|
0
|
|
|
|
|
|
my $parser = CdHit->new(file => $outfile_clstr); |
123
|
0
|
|
|
|
|
|
my $mapper = $self->mapper; |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
# restore original ids for cluster members... |
126
|
0
|
|
|
|
|
|
tie my %cluster_seq_ids, 'Tie::IxHash'; |
127
|
0
|
|
|
|
|
|
for my $abbr_id ( $parser->all_representatives ) { |
128
|
|
|
|
|
|
|
my @member_ids = map { |
129
|
0
|
|
|
|
|
|
SeqId->new( full_id => $mapper->long_id_for($_) ) |
130
|
0
|
|
0
|
|
|
|
} @{ $parser->members_for($abbr_id) // [] }; |
|
0
|
|
|
|
|
|
|
131
|
0
|
|
|
|
|
|
$cluster_seq_ids{ $mapper->long_id_for($abbr_id) } = \@member_ids; |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
# ... and store cluster members |
135
|
0
|
|
|
|
|
|
$self->_set_cluster_seq_ids(\%cluster_seq_ids); |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
# read and store representative seqs... |
138
|
0
|
|
|
|
|
|
my $representatives = Ali->load($outfile); |
139
|
0
|
|
|
|
|
|
$representatives->dont_guess; |
140
|
0
|
|
|
|
|
|
$representatives->restore_ids($mapper); # ... restoring original ids |
141
|
0
|
|
|
|
|
|
$self->_set_representatives($representatives); |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
# unlink temp files |
144
|
0
|
|
|
|
|
|
file($_)->remove for $outfile, $outfile_clstr; |
145
|
|
|
|
|
|
|
|
146
|
0
|
|
|
|
|
|
return; |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
150
|
|
|
|
|
|
|
1; |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
__END__ |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
=pod |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
=head1 NAME |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
Bio::MUST::Drivers::CdHit - Bio::MUST driver for running the CD-HIT program |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=head1 VERSION |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
version 0.193030 |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=head1 SYNOPSIS |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
# TODO |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=head1 DESCRIPTION |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
# TODO |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
=head1 AUTHOR |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
Denis BAURAIN <denis.baurain@uliege.be> |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=head1 CONTRIBUTOR |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=for stopwords Amandine BERTRAND |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
Amandine BERTRAND <amandine.bertrand@doct.uliege.be> |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
This software is copyright (c) 2013 by University of Liege / Unit of Eukaryotic Phylogenomics / Denis BAURAIN. |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
187
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=cut |