File Coverage

blib/lib/Bio/DOOP/ClusterSubset.pm

Criterion	Covered	Total	%
statement	6	87	6.9
branch	0	14	0.0
condition	0	3	0.0
subroutine	2	14	14.2
pod	12	12	100.0
total	20	130	15.3

line	stmt	bran	cond	sub	pod	time	code
1							package Bio::DOOP::ClusterSubset;
2
3	1			1		7	use strict;
	1					2
	1					36
4	1			1		6	use warnings;
	1					2
	1					1005
5
6							=head1 NAME
7
8							Bio::DOOP::ClusterSubset - One subset of a cluster
9
10							=head1 VERSION
11
12							Version 0.13
13
14							=cut
15
16							our $VERSION = '0.13';
17
18							=head1 SYNOPSIS
19
20							@cluster_subsets = @{$cluster->get_all_subsets};
21
22
23							=head1 DESCRIPTION
24
25							This object represents one subset of a cluster. A subset is a set of homologous sequences,
26							hopefully monophyletic, grouped by evolutionary distance from the reference species (Arabidopsis
27							or human).
28
29							=head1 AUTHORS
30
31							Tibor Nagy, Godollo, Hungary and Endre Sebestyen, Martonvasar, Hungary
32
33							=head1 METHODS
34
35							=head2 new
36
37							Creates a new subset object from the subset primary id. You usually won't need this, as you will create
38							the subsets from a Bio::DOOP::Cluster object, based on the subset type.
39
40							Return type: Bio::DOOP::ClusterSubset object
41
42							$cluster_subset = Bio::DOOP::ClusterSubset->new($db,"123");
43
44							=cut
45
46							sub new {
47	0			0	1		my $self = {};
48	0						my $dummy = shift;
49	0						my $db = shift;
50	0						my $id = shift;
51
52	0						my $ret = $db->query("SELECT * FROM cluster_subset WHERE subset_primary_id = \"$id\";");
53
54	0	0					if ($#$ret == -1){
55	0						return(-1);
56							}
57
58	0						my @fields = @{$$ret[0]};
	0
59
60	0						$self->{DB} = $db;
61	0						$self->{PRIMARY} = $id;
62	0						$self->{TYPE} = $fields[1];
63	0						$self->{SEQNO} = $fields[2];
64	0						$self->{MOTIFNO} = $fields[3];
65	0						$self->{FEATNO} = $fields[4];
66	0						$self->{ORIG} = $fields[5];
67	0						$self->{CLUSTER} = Bio::DOOP::Cluster->new_by_id($db,$fields[6]);
68
69	0						$ret = $db->query("SELECT alignment_dialign,alignment_fasta FROM cluster_subset_data WHERE subset_primary_id = \"$id\";");
70
71	0	0					if ($#$ret == -1){
72	0						return(-1);
73							}
74
75	0						@fields = @{$$ret[0]};
	0
76
77	0						$self->{DIALIGN} = $fields[0];
78	0						$self->{FASTA} = $fields[1];
79
80	0						bless $self;
81	0						return($self);
82							}
83
84							=head2 get_id
85
86							Prints out the subset primary id. This is the internal ID from the MySQL database.
87
88							Return type: string
89
90							print $cluster_subset->get_id;
91
92							=cut
93
94							sub get_id {
95	0			0	1		my $self = shift;
96	0						return($self->{PRIMARY});
97							}
98
99							=head2 get_type
100
101							Prints out the subset type.
102
103							Return type: string
104
105							print $cluster_subset->get_type;
106
107							=cut
108
109							sub get_type {
110	0			0	1		my $self = shift;
111	0						return($self->{TYPE});
112							}
113
114							=head2 get_seqno
115
116							Returns the number of sequences in the subset.
117
118							Return type: string
119
120							for(i = 0; i < $cluster_subset->get_seqno; i++){
121							print $seq[$i];
122							}
123
124							=cut
125
126							sub get_seqno {
127	0			0	1		my $self = shift;
128	0						return($self->{SEQNO});
129							}
130
131							=head2 get_featno
132
133							Returns the total number of features (motifs, TSSs and other) in the subset.
134
135							Return type: string
136
137							if ($cluster_subset->get_featno > 4){
138							print "We have lots of features!!!\n";
139							}
140
141							=cut
142
143							sub get_featno {
144	0			0	1		my $self = shift;
145	0						return($self->{FEATNO});
146							}
147
148							=head2 get_motifno
149
150							Returns the number of motifs in the subset.
151
152							Return type: string
153
154							$motifs = $cluster_subset->get_motifno;
155
156							=cut
157
158							sub get_motifno {
159	0			0	1		my $self = shift;
160	0						return($self->{MOTIFNO});
161							}
162
163							=head2 get_orig
164
165							Returns 'y' if the subset is the same as the original cluster, 'n' if not.
166
167							Return type: string ('y' or 'n')
168
169							if ($cluster_subset->get_orig eq "y") {
170							print "This is the original cluster!\n";
171							}
172							elsif ($cluster_subset->get_orig eq "n"){
173							print "This is some smaller subset!\n";
174							}
175
176							=cut
177
178							sub get_orig {
179	0			0	1		my $self = shift;
180	0						return($self->{ORIG});
181							}
182
183							=head2 get_cluster
184
185							Returns the ID of the cluster, from which the subset originates.
186
187							Return type: string
188
189							$cluster_id = $cluster_subset->get_cluster;
190
191							=cut
192
193							sub get_cluster {
194	0			0	1		my $self = shift;
195	0						return($self->{CLUSTER});
196							}
197
198							=head2 get_dialign
199
200							Prints out the dialign format alignment of the subset.
201
202							Return type: string
203
204							print $cluster_subset->get_dialign;
205
206							=cut
207
208							sub get_dialign {
209	0			0	1		my $self = shift;
210	0						return($self->{DIALIGN});
211							}
212
213							=head2 get_fasta_align
214
215							Prints out the fasta format alignment of the subset.
216
217							Return type: string
218
219							print $cluster_subset->get_fasta_align;
220
221							=cut
222
223							sub get_fasta_align {
224	0			0	1		my $self = shift;
225	0						return($self->{FASTA});
226							}
227
228							=head2 get_all_motifs
229
230							Returns the arrayref of all motifs associated with the subset.
231
232							Return type: arrayref, the array containig Bio::DOOP::Motif objects
233
234							@motifs = @{$cluster_subset->get_all_motifs};
235
236							=cut
237
238							sub get_all_motifs {
239	0			0	1		my $self = shift;
240
241	0						my $id = $self->{PRIMARY};
242	0						my $i;
243							my @motifs;
244
245	0						my $ret = $self->{DB}->query("SELECT motif_feature_primary_id FROM motif_feature WHERE subset_primary_id = \"$id\";");
246
247	0	0					if ($#$ret == -1){
248	0						return(-1);
249							}
250
251	0						for($i = 0; $i < $#$ret + 1; $i++){
252	0						push @motifs,Bio::DOOP::Motif->new($self->{DB},$$ret[$i]->[0]);
253							}
254
255	0						return(\@motifs);
256							}
257
258							=head2 get_all_seqs
259
260							Returns a sorted arrayref of all sequences associated with the subset.
261
262							Sorting the sequences by the following criteria:
263							The first sequence is always the reference species (Arabidopsis/Human).
264							All other sequences are sorted first by the taxon_class (B E M V in the plants and
265							P R E H M N T F V C in the chordates ) and then by the alphabetical order.
266
267							Return type: arrayref, the array containig Bio::DOOP::Sequence objects
268
269							@seq = @{$cluster_subset->get_all_seqs};
270
271							=cut
272
273							sub get_all_seqs {
274	0			0	1		my $self = shift;
275
276	0						my $id = $self->{PRIMARY};
277	0						my @seqs;
278	0						my $ret = $self->{DB}->query("SELECT sequence_primary_id FROM subset_xref WHERE subset_primary_id = \"$id\";");
279
280	0	0					if ($#$ret == -1){
281	0						return(-1);
282							}
283
284	0						for(@$ret){
285	0						push @seqs,Bio::DOOP::Sequence->new($self->{DB},$_->[0]);
286							}
287
288	0						my $seq;
289							my $i;
290	0						my %groups;
291	0						my @sortseqs;
292
293	0						for($i = 0; $i < $#seqs+1; $i++){
294	0	0	0				if( ($seqs[$i]->get_taxid eq "3702") \|\|
295							($seqs[$i]->get_taxid eq "9606") ) {
296	0						$sortseqs[0] = $seqs[$i];
297	0						next;
298							}
299	0						push @{$groups{$seqs[$i]->get_taxon_class}}, $seqs[$i];
	0
300							}
301
302	0						for my $key ("Brassicaceae","eudicotyledons","Magnoliophyta","Viridiplantae"){
303	0	0					if ($groups{$key}){
304	0						push @sortseqs, sort {$a->get_taxon_name cmp $b->get_taxon_name} @{$groups{$key}};
	0
	0
305							}
306							}
307	0						for my $key ("Primates","Glires","Euarchontoglires","Cetartiodactyla","Carnivora","Laurasiatheria","Xenarthra","Afrotheria","Metatheria","Prototheria","Aves","Sauropsida","Amphibia","Teleostomi","Chondrichthyes","Vertebrata","Chordata"){
308	0	0					if ($groups{$key}) {
309	0						push @sortseqs, sort {$a->get_taxon_name cmp $b->get_taxon_name} @{$groups{$key}};
	0
	0
310							}
311							}
312	0						return(\@sortseqs);
313							}
314
315							1;