line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::FastParsers::Roles::Clusterable; |
2
|
|
|
|
|
|
|
# ABSTRACT: Attributes and methods common to CD-HIT and UCLUST drivers |
3
|
|
|
|
|
|
|
# CONTRIBUTOR: Amandine BERTRAND <amandine.bertrand@doct.uliege.be> |
4
|
|
|
|
|
|
|
$Bio::FastParsers::Roles::Clusterable::VERSION = '0.213510'; |
5
|
7
|
|
|
7
|
|
6196
|
use Moose::Role; |
|
7
|
|
|
|
|
22
|
|
|
7
|
|
|
|
|
74
|
|
6
|
|
|
|
|
|
|
|
7
|
7
|
|
|
7
|
|
44719
|
use autodie; |
|
7
|
|
|
|
|
28
|
|
|
7
|
|
|
|
|
89
|
|
8
|
7
|
|
|
7
|
|
41314
|
use feature qw(say); |
|
7
|
|
|
|
|
38
|
|
|
7
|
|
|
|
|
772
|
|
9
|
|
|
|
|
|
|
|
10
|
7
|
|
|
7
|
|
63
|
use Carp; |
|
7
|
|
|
|
|
16
|
|
|
7
|
|
|
|
|
714
|
|
11
|
7
|
|
|
7
|
|
5500
|
use Sort::Naturally; |
|
7
|
|
|
|
|
30346
|
|
|
7
|
|
|
|
|
599
|
|
12
|
7
|
|
|
7
|
|
71
|
use Try::Tiny; |
|
7
|
|
|
|
|
20
|
|
|
7
|
|
|
|
|
3561
|
|
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
# private attributes |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
has '_members_for' => ( |
18
|
|
|
|
|
|
|
traits => ['Hash'], |
19
|
|
|
|
|
|
|
is => 'ro', |
20
|
|
|
|
|
|
|
isa => 'HashRef[ArrayRef[Str]]', |
21
|
|
|
|
|
|
|
init_arg => undef, |
22
|
|
|
|
|
|
|
writer => '_set_members_for', |
23
|
|
|
|
|
|
|
handles => { |
24
|
|
|
|
|
|
|
all_representatives => 'keys', |
25
|
|
|
|
|
|
|
members_for => 'get', |
26
|
|
|
|
|
|
|
}, |
27
|
|
|
|
|
|
|
); |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
sub all_representatives_by_cluster_size { |
30
|
2
|
|
|
2
|
0
|
3594
|
my $self = shift; |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
# sort first on descending cluster size then on representative id |
33
|
|
|
|
|
|
|
# using natural sort and the Schwartzian transform |
34
|
134
|
|
|
|
|
363
|
my @list = map { $_->[1] } |
35
|
664
|
50
|
|
|
|
37201
|
sort { $b->[0] <=> $a->[0] || ncmp($a->[1], $b->[1]) } |
36
|
2
|
|
|
|
|
107
|
map { [ scalar @{ $self->members_for($_) }, $_ ] } |
|
134
|
|
|
|
|
1462
|
|
|
134
|
|
|
|
|
5133
|
|
37
|
|
|
|
|
|
|
$self->all_representatives |
38
|
|
|
|
|
|
|
; |
39
|
|
|
|
|
|
|
|
40
|
2
|
|
|
|
|
83
|
return @list; |
41
|
|
|
|
|
|
|
} |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
sub clust_mapper { |
45
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
46
|
0
|
|
0
|
|
|
|
my $sep = shift // q{/}; |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
# do not force Bio::FastParsers to depend on Bio::MUST::Core |
49
|
0
|
|
|
0
|
|
|
my $bmc = try { require Bio::MUST::Core } |
50
|
0
|
|
|
0
|
|
|
catch { return } |
51
|
0
|
|
|
|
|
|
; |
52
|
0
|
0
|
|
|
|
|
unless ($bmc) { |
53
|
0
|
|
|
|
|
|
carp 'Warning: Bio::MUST::Core not installed; returning nothing!'; |
54
|
0
|
|
|
|
|
|
return; |
55
|
|
|
|
|
|
|
} |
56
|
|
|
|
|
|
|
|
57
|
0
|
|
|
|
|
|
my @abbr_ids; |
58
|
|
|
|
|
|
|
my @long_ids; |
59
|
|
|
|
|
|
|
|
60
|
0
|
|
|
|
|
|
for my $repr ( $self->all_representatives_by_cluster_size ) { |
61
|
0
|
|
|
|
|
|
push @abbr_ids, $repr; |
62
|
|
|
|
|
|
|
push @long_ids, join $sep, |
63
|
0
|
|
|
|
|
|
nsort ( @{ $self->members_for($repr) }, $repr ) |
|
0
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
; |
65
|
|
|
|
|
|
|
} |
66
|
|
|
|
|
|
|
|
67
|
0
|
|
|
|
|
|
return Bio::MUST::Core::IdMapper->new( |
68
|
|
|
|
|
|
|
abbr_ids => \@abbr_ids, |
69
|
|
|
|
|
|
|
long_ids => \@long_ids, |
70
|
|
|
|
|
|
|
); |
71
|
|
|
|
|
|
|
} |
72
|
|
|
|
|
|
|
|
73
|
7
|
|
|
7
|
|
70
|
no Moose::Role; |
|
7
|
|
|
|
|
24
|
|
|
7
|
|
|
|
|
95
|
|
74
|
|
|
|
|
|
|
1; |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
__END__ |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=pod |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=head1 NAME |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
Bio::FastParsers::Roles::Clusterable - Attributes and methods common to CD-HIT and UCLUST drivers |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=head1 VERSION |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
version 0.213510 |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=head1 DESCRIPTION |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
This role implements the attributes and methods that are common to CD-HIT and |
91
|
|
|
|
|
|
|
UCLUST parsers. Those are documented in their respective modules: |
92
|
|
|
|
|
|
|
L<Bio::FastParsers::CdHit> and L<Bio::FastParsers::Uclust>. |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
Available methods are: C<all_representatives>, |
95
|
|
|
|
|
|
|
C<all_representatives_by_cluster_size>, C<members_for> and C<clust_mapper>. |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
=head1 AUTHOR |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
Denis BAURAIN <denis.baurain@uliege.be> |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=head1 CONTRIBUTOR |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=for stopwords Amandine BERTRAND |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
Amandine BERTRAND <amandine.bertrand@doct.uliege.be> |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
This software is copyright (c) 2013 by University of Liege / Unit of Eukaryotic Phylogenomics / Denis BAURAIN. |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
112
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=cut |