File Coverage

blib/lib/Ace/Sequence/Feature.pm
Criterion Covered Total %
statement 24 100 24.0
branch 0 28 0.0
condition 0 17 0.0
subroutine 8 29 27.5
pod 12 20 60.0
total 44 194 22.6


line stmt bran cond sub pod time code
1             package Ace::Sequence::Feature;
2 1     1   6 use strict;
  1         2  
  1         43  
3              
4 1     1   4 use Ace qw(:DEFAULT rearrange);
  1         1  
  1         112  
5 1     1   6 use Ace::Object;
  1         2  
  1         31  
6 1     1   653 use Ace::Sequence::Homol;
  1         3  
  1         26  
7 1     1   7 use Carp;
  1         2  
  1         62  
8 1     1   4 use AutoLoader 'AUTOLOAD';
  1         2  
  1         7  
9 1     1   35 use vars '@ISA','%REV';
  1         2  
  1         66  
10             @ISA = 'Ace::Sequence'; # for convenience sake only
11             %REV = ('+1' => '-1',
12             '-1' => '+1'); # war is peace, &c.
13              
14             use overload
15 1         6 '""' => 'asString',
16 1     1   5 ;
  1         2  
17              
18             # parse a line from a sequence list
19             sub new {
20 0     0 0   my $pack = shift;
21 0           my ($parent,$ref,$r_offset,$r_strand,$abs,$gff_line,$db) = @_;
22 0           my ($sourceseq,$method,$type,$start,$end,$score,$strand,$frame,$group) = split "\t",$gff_line;
23 0 0         if (defined($strand)) {
24 0 0         $strand = $strand eq '-' ? '-1' : '+1';
25             } else {
26 0           $strand = 0;
27             }
28              
29             # for efficiency/performance, we don't use superclass new() method, but modify directly
30             # handling coordinates. See SCRAPS below for what should be in here
31 0 0 0       $strand = '+1' if $strand < 0 && $r_strand < 0; # two wrongs do make a right
32 0 0         ($start,$end) = ($end,$start) if $strand < 0;
33 0           my $offset = $start - 1;
34 0 0         my $length = ($end > $start) ? $end - $offset : $end - $offset - 2;
35              
36             # handle negative strands
37 0   0       $offset ||= 0;
38 0 0 0       $offset *= -1 if $r_strand < 0 && $strand != $r_strand;
39              
40 0           my $self= bless {
41             obj => $ref,
42             offset => $offset,
43             length => $length,
44             parent => $parent,
45             p_offset => $r_offset,
46             refseq => [$ref,$r_offset,$r_strand],
47             strand => $r_strand,
48             fstrand => $strand,
49             absolute => $abs,
50             info => {
51             seqname=> $sourceseq,
52             method => $method,
53             type => $type,
54             score => $score,
55             frame => $frame,
56             group => $group,
57             db => $db,
58             }
59             },$pack;
60 0           return $self;
61             }
62              
63 0     0 0   sub smapped { 1; }
64              
65             # $_[0] is field name, $_[1] is self, $_[2] is optional replacement value
66             sub _field {
67 0     0     my $self = shift;
68 0           my $field = shift;
69 0           my $v = $self->{info}{$field};
70 0 0         $self->{info}{$field} = shift if @_;
71 0 0 0       return if defined $v && $v eq '.';
72 0           return $v;
73             }
74              
75 0     0 1   sub strand { return $_[0]->{fstrand} }
76              
77             sub seqname {
78 0     0 1   my $self = shift;
79 0           my $seq = $self->_field('seqname');
80 0           $self->db->fetch(Sequence=>$seq);
81             }
82              
83 0     0 1   sub method { shift->_field('method',@_) } # ... I prefer "method"
84 0     0 1   sub subtype { shift->_field('method',@_) } # ... or even "subtype"
85 0     0 1   sub type { shift->_field('type',@_) } # ... I prefer "type"
86 0     0 1   sub score { shift->_field('score',@_) } # float indicating some sort of score
87 0     0 1   sub frame { shift->_field('frame',@_) } # one of 1, 2, 3 or undef
88             sub info { # returns Ace::Object(s) with info about the feature
89 0     0 1   my $self = shift;
90 0 0         unless ($self->{group}) {
91 0   0       my $info = $self->{info}{group} || 'Method "'.$self->method.'"';
92 0           $info =~ s/(\"[^\"]*);([^\"]*\")/$1$;$2/g;
93 0           my @data = split(/\s*;\s*/,$info);
94 0           foreach (@data) { s/$;/;/g }
  0            
95 0           $self->{group} = [map {$self->toAce($_)} @data];
  0            
96             }
97 0 0         return wantarray ? @{$self->{group}} : $self->{group}->[0];
  0            
98             }
99              
100             # bioperl compatibility
101 0     0 0   sub primary_tag { shift->type(@_) }
102 0     0 0   sub source_tag { shift->subtype(@_) }
103              
104             sub db { # database identifier (from Ace::Sequence::Multi)
105 0     0 1   my $self = shift;
106 0           my $db = $self->_field('db',@_);
107 0   0       return $db || $self->SUPER::db;
108             }
109              
110 0     0 1   sub group { $_[0]->info; }
111 0     0 1   sub target { $_[0]->info; }
112              
113             sub asString {
114 0     0 1   my $self = shift;
115 0           my $name = $self->SUPER::asString;
116 0           my $type = $self->type;
117 0           return "$type:$name";
118             }
119              
120             # unique ID
121             sub id {
122 0     0 0   my $self = shift;
123 0           my $source = $self->source->name;
124 0           my $start = $self->start;
125 0           my $end = $self->end;
126 0           return "$source/$start,$end";
127             }
128              
129             # map info into a reasonable set of ace objects
130             sub toAce {
131 0     0 0   my $self = shift;
132 0           my $thing = shift;
133 0           my ($tag,@values) = $thing=~/(\"[^\"]+?\"|\S+)/g;
134 0           foreach (@values) { # strip the damn quotes
135 0           s/^\"(.*)\"$/$1/; # get rid of leading and trailing quotes
136             }
137 0           return $self->tag2ace($tag,@values);
138             }
139              
140             # synthesize an artificial Ace object based on the tag
141             sub tag2ace {
142 0     0 0   my $self = shift;
143 0           my ($tag,@data) = @_;
144              
145             # Special cases, hardcoded in Ace GFF code...
146 0           my $db = $self->db;;
147 0           my $class = $db->class;
148              
149             # for Notes we just return a text, no database associated
150 0 0         return $class->new(Text=>$data[0]) if $tag eq 'Note';
151              
152             # for homols, we create the indicated Protein or Sequence object
153             # then generate a bogus Homology object (for future compatability??)
154 0 0         if ($tag eq 'Target') {
155 0           my ($objname,$start,$end) = @data;
156 0           my ($classe,$name) = $objname =~ /^(\w+):(.+)/;
157 0           return Ace::Sequence::Homol->new_homol($classe,$name,$db,$start,$end);
158             }
159              
160             # General case:
161 0           my $obj = $class->new($tag=>$data[0],$self->db);
162              
163 0 0         return $obj if defined $obj;
164              
165             # Last resort, return a Text
166 0           return $class->new(Text=>$data[0]);
167             }
168              
169             sub sub_SeqFeature {
170 0 0   0 0   return wantarray ? () : 0;
171             }
172              
173             1;
174              
175             =head1 NAME
176              
177             Ace::Sequence::Feature - Examine Sequence Feature Tables
178              
179             =head1 SYNOPSIS
180              
181             # open database connection and get an Ace::Object sequence
182             use Ace::Sequence;
183              
184             # get a megabase from the middle of chromosome I
185             $seq = Ace::Sequence->new(-name => 'CHROMOSOME_I,
186             -db => $db,
187             -offset => 3_000_000,
188             -length => 1_000_000);
189              
190             # get all the homologies (a list of Ace::Sequence::Feature objs)
191             @homol = $seq->features('Similarity');
192              
193             # Get information about the first one
194             $feature = $homol[0];
195             $type = $feature->type;
196             $subtype = $feature->subtype;
197             $start = $feature->start;
198             $end = $feature->end;
199             $score = $feature->score;
200              
201             # Follow the target
202             $target = $feature->info;
203              
204             # print the target's start and end positions
205             print $target->start,'-',$target->end, "\n";
206              
207             =head1 DESCRIPTION
208              
209             I is a subclass of L
210             specialized for returning information about particular features in a
211             GFF format feature table.
212              
213             =head1 OBJECT CREATION
214              
215             You will not ordinarily create an I object
216             directly. Instead, objects will be created in response to a feature()
217             call to an I object. If you wish to create an
218             I object directly, please consult the source
219             code for the I method.
220              
221             =head1 OBJECT METHODS
222              
223             Most methods are inherited from I. The following
224             methods are also supported:
225              
226             =over 4
227              
228             =item seqname()
229              
230             $object = $feature->seqname;
231              
232             Return the ACeDB Sequence object that this feature is attached to.
233             The return value is an I of the Sequence class. This
234             corresponds to the first field of the GFF format and does not
235             necessarily correspond to the I object from which the
236             feature was obtained (use source_seq() for that).
237              
238             =item source()
239              
240             =item method()
241              
242             =item subtype()
243              
244             $source = $feature->source;
245              
246             These three methods are all synonyms for the same thing. They return
247             the second field of the GFF format, called "source" in the
248             documentation. This is usually the method or algorithm used to
249             predict the feature, such as "GeneFinder" or "tRNA" scan. To avoid
250             ambiguity and enhance readability, the method() and subtype() synonyms
251             are also recognized.
252              
253             =item feature()
254              
255             =item type()
256              
257             $type = $feature->type;
258              
259             These two methods are also synonyms. They return the type of the
260             feature, such as "exon", "similarity" or "Predicted_gene". In the GFF
261             documentation this is called the "feature" field. For readability,
262             you can also use type() to fetch the field.
263              
264             =item abs_start()
265              
266             $start = $feature->abs_start;
267              
268             This method returns the absolute start of the feature within the
269             sequence segment indicated by seqname(). As in the I
270             method, use start() to obtain the start of the feature relative to its
271             source.
272              
273             =item abs_start()
274              
275             $start = $feature->abs_start;
276              
277             This method returns the start of the feature relative to the sequence
278             segment indicated by seqname(). As in the I method,
279             you will more usually use the inherited start() method to obtain the
280             start of the feature relative to its source sequence (the
281             I from which it was originally derived).
282              
283             =item abs_end()
284              
285             $start = $feature->abs_end;
286              
287             This method returns the end of the feature relative to the sequence
288             segment indicated by seqname(). As in the I method,
289             you will more usually use the inherited end() method to obtain the end
290             of the feature relative to the I from which it was
291             derived.
292              
293             =item score()
294              
295             $score = $feature->score;
296              
297             For features that are associated with a numeric score, such as
298             similarities, this returns that value. For other features, this
299             method returns undef.
300              
301             =item strand()
302              
303             $strand = $feature->strand;
304              
305             Returns the strandedness of this feature, either "+1" or "-1". For
306             features that are not stranded, returns 0.
307              
308             =item reversed()
309              
310             $reversed = $feature->reversed;
311              
312             Returns true if the feature is reversed relative to its source
313             sequence.
314              
315             =item frame()
316              
317             $frame = $feature->frame;
318              
319             For features that have a frame, such as a predicted coding sequence,
320             returns the frame, either 0, 1 or 2. For other features, returns undef.
321              
322             =item group()
323              
324             =item info()
325              
326             =item target()
327              
328             $info = $feature->info;
329              
330             These methods (synonyms for one another) return an Ace::Object
331             containing other information about the feature derived from the 8th
332             field of the GFF format, the so-called "group" field. The type of the
333             Ace::Object is dependent on the nature of the feature. The
334             possibilities are shown in the table below:
335              
336             Feature Type Value of Group Field
337             ------------ --------------------
338            
339             note A Text object containing the note.
340            
341             similarity An Ace::Sequence::Homology object containing
342             the target and its start/stop positions.
343              
344             intron An Ace::Object containing the gene from
345             exon which the feature is derived.
346             misc_feature
347              
348             other A Text object containing the group data.
349              
350             =item asString()
351              
352             $label = $feature->asString;
353              
354             Returns a human-readable identifier describing the nature of the
355             feature. The format is:
356              
357             $type:$name/$start-$end
358              
359             for example:
360              
361             exon:ZK154.3/1-67
362              
363             This method is also called automatically when the object is treated in
364             a string context.
365              
366             =back
367              
368             =head1 SEE ALSO
369              
370             L, L, L,L,
371             L, L
372              
373             =head1 AUTHOR
374              
375             Lincoln Stein with extensive help from Jean
376             Thierry-Mieg
377              
378             Copyright (c) 1999, Lincoln D. Stein
379              
380             This library is free software; you can redistribute it and/or modify
381             it under the same terms as Perl itself. See DISCLAIMER.txt for
382             disclaimers of warranty.
383              
384             =cut
385              
386              
387             __END__