File Coverage

blib/lib/GO/AnnotationProvider.pm
Criterion Covered Total %
statement 12 39 30.7
branch n/a
condition 0 3 0.0
subroutine 4 23 17.3
pod 17 17 100.0
total 33 82 40.2


line stmt bran cond sub pod time code
1             package GO::AnnotationProvider;
2              
3             # File : AnnotationProvider.pm
4             # Author : Gavin Sherlock
5             # Date Begun : September 26th 2002
6              
7             # $Id: AnnotationProvider.pm,v 1.13 2006/07/27 23:59:48 sherlock Exp $
8              
9             # License information (the MIT license)
10              
11             # Copyright (c) 2003 Gavin Sherlock; Stanford University
12              
13             # Permission is hereby granted, free of charge, to any person
14             # obtaining a copy of this software and associated documentation files
15             # (the "Software"), to deal in the Software without restriction,
16             # including without limitation the rights to use, copy, modify, merge,
17             # publish, distribute, sublicense, and/or sell copies of the Software,
18             # and to permit persons to whom the Software is furnished to do so,
19             # subject to the following conditions:
20              
21             # The above copyright notice and this permission notice shall be
22             # included in all copies or substantial portions of the Software.
23              
24             # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25             # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26             # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27             # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28             # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29             # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30             # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31             # SOFTWARE.
32              
33 2     2   13 use strict;
  2         4  
  2         73  
34 2     2   11 use warnings;
  2         4  
  2         63  
35 2     2   10 use diagnostics;
  2         4  
  2         26  
36              
37 2     2   58 use vars qw ($VERSION);
  2         4  
  2         1558  
38              
39             $VERSION = 0.12;
40              
41             =pod
42              
43             =head1 NAME
44              
45             GO::AnnotationProvider - abstract base class defining interface for how Annotation information should be provided
46              
47             =head1 DESCRIPTION
48              
49             GO::AnnotationProvider is an interface that defines an API that
50             should be implemented by specific subclasses, which may read GO
51             annotation from databases, flatfiles, XML files etc.
52              
53             GO (Gene Ontology) is a project of the Gene Ontology Consortium
54             (http://www.geneontology.org). The GO project has 3 'aspects' :
55              
56             Biological Process
57             Molecular Function
58             Cellular Component
59              
60             When a method requires the client to refer to an aspect, it is simply
61             by a shorthand, namely P, F and C, respectively.
62              
63             In GO associations, annotated entities may be identified by many
64             different names. Firstly, they should have a database identifier,
65             which should be unique for an entity. Secondly, they should have a
66             standard name. Standard names should be unique among standard names,
67             but it is possible that a standard name of one entity may be used as
68             an alias of another. An entity may have many aliases, and an alias
69             may be used for many entities. Hence, a name (drawn from databaseIds,
70             standard names, and aliases) may be ambiguous in the entity to which
71             it refers. This is an important concept for clients of concrete
72             subclasses to take into consideration, so that unexpected results are
73             avoided.
74              
75             =head1 TODO
76              
77             Currently this interface dictates that clients can retrieve GOIDs that
78             have been used to annotated genes. In future, this interface is
79             likely to change, such that instead of GOIDs, GO::Annotation objects
80             are instead returned, which will be richer in the terms of information
81             they can give about a given annotation. Such objects would contain a
82             GO::AnnotatedGene object, one or more GO::Reference objects, and an
83             evidence code. The retrieval of annotations for a given database id
84             could then be extended to allow filtering by evidence codes, to either
85             include or exclude certain codes.
86              
87             This interface also currently only allows retrieval of GOIDs for
88             genes, in future, it will be extended such that the genes can be
89             retrieved by GOID.
90              
91             =head1 Constructor
92              
93             Because this is an abstract class, there is no constructor. A
94             constructor must be implemented by concrete subclasses.
95              
96             =head1 Public instance methods
97              
98             All of these public instance methods must be implemented by concrete subclasses.
99              
100             =head1 Some methods dealing with ambiguous names
101              
102             Because there are many names by which an annotated entity may be
103             referred to, that are non-unique, this interface defines a set of
104             methods for determining whether a name is ambiguous, and to what
105             database identifiers such ambiguous names may refer.
106              
107             Note, that the AnnotationProvider subclasses should now be case
108             insensitive, though there are some caveats. For instance, you can use
109             'cdc6' to retrieve data for CDC6. However, This if gene has been
110             referred to as abc1, and another referred to as ABC1, then these are
111             treated as different, and unambiguous. However, the text 'Abc1' would
112             be considered ambiguous, because it could refer to either. On the
113             other hand, if a single gene is referred to as XYZ1 and xyz1, and no
114             other genes have that name (in any casing), then Xyz1 would still be
115             considered unambiguous.
116              
117             =cut
118              
119             ##############################################################################
120             sub nameIsAmbiguous{
121             ##############################################################################
122             =pod
123              
124             =head2 nameIsAmbiguous
125              
126             NB: API change:
127              
128             nameIsAmbiguous is now case insensitive - that is, if there is a name
129             that is used twice using different casing, that will be treated as
130             ambiguous. Previous versions would have not treated these as
131             ambiguous. In the case that a name is provided in a certain casing,
132             which was encountered only once, then it will be treated as
133             unambiguous. This is the price of wanting a case insensitive
134             annotation provider...
135              
136             Usage:
137              
138             if ($annotationProvider->nameIsAmbiguous($name)){
139              
140             do something useful....or not....
141              
142             }
143              
144             =cut
145             ##############################################################################
146              
147 0     0 1   $_[0]->__complainStubMethod;
148              
149             }
150              
151             ############################################################################
152             sub databaseIdsForAmbiguousName{
153             ############################################################################
154             =pod
155              
156             =head2 databaseIdsForAmbiguousName
157              
158             This public method returns an array of database identifiers for an
159             ambiguous name. If the name is not ambiguous, an empty list will be
160             returned.
161              
162             B: API change:
163              
164             databaseIdsForAmbiguousName is now case insensitive - that is, if
165             there is a name that is used twice using different casing, that will
166             be treated as ambiguous. Previous versions would have not treated
167             these as ambiguous. However, if the name provided is of the exact
168             casing as a name that appeared only once with that exact casing, then
169             it is treated as unambiguous. This is the price of wanting a case
170             insensitive annotation provider...
171              
172             Usage:
173              
174             my @databaseIds = $annotationProvider->databaseIdsForAmbiguousName($name);
175              
176             =cut
177             ##############################################################################
178              
179 0     0 1   $_[0]->__complainStubMethod;
180              
181             }
182              
183             ############################################################################
184             sub ambiguousNames{
185             ############################################################################
186             =pod
187              
188             =head2 ambiguousNames
189              
190             This method returns an array of names, which from the annotation source
191             have been deemed to be ambiguous.
192              
193             Note - even though this is now case insensitive, if something is
194             called both BLAH1 and blah1, we would not deem either of these to be
195             ambiguous. However, if it appeared as blah1 twice, referring to two
196             different genes, then blah1 would be ambiguous.
197              
198             Usage:
199              
200             my @ambiguousNames = $annotationProvider->ambiguousNames;
201              
202             =cut
203              
204 0     0 1   $_[0]->__complainStubMethod;
205              
206             }
207              
208             =pod
209              
210             =head1 Methods for retrieving GO annotations for entities
211              
212             =cut
213              
214             ############################################################################
215             sub goIdsByDatabaseId{
216             ############################################################################
217             =pod
218              
219             =head2 goIdsByDatabaseId
220              
221             This public method returns a reference to an array of GOIDs that are
222             associated with the supplied databaseId for a specific aspect. If no
223             annotations are associated with that databaseId in that aspect, then a
224             reference to an empty array will be returned. If the databaseId is
225             not recognized, then undef will be returned.
226              
227             Usage:
228              
229             my $goidsRef = $annotationProvider->goIdsByDatabaseId(databaseId=>$databaseId,
230             aspect=>);
231              
232             =cut
233             ##############################################################################
234              
235 0     0 1   $_[0]->__complainStubMethod;
236              
237             }
238              
239             ############################################################################
240             sub goIdsByStandardName{
241             ############################################################################
242             =pod
243              
244             =head2 goIdsByStandardName
245              
246             This public method returns a reference to an array of GOIDs that are
247             associated with the supplied standardName for a specific aspect. If
248             no annotations are associated with the entity with that standard name
249             in that aspect, then a a reference to an empty list will be returned.
250             If the supplied name is not used as a standard name, then undef will
251             be returned.
252              
253             Usage:
254              
255             my $goidsRef = $annotationProvider->goIdsByStandardName(standardName=>$databaseId,
256             aspect=>);
257              
258             =cut
259             ##############################################################################
260              
261 0     0 1   $_[0]->__complainStubMethod;
262              
263             }
264              
265             ############################################################################
266             sub goIdsByName{
267             ############################################################################
268             =pod
269              
270             =head2 goIdsByName
271              
272             This public method returns a reference to an array of GO IDs that are
273             associated with the supplied name for a specific aspect. If there are
274             no GO associations for the entity corresponding to the supplied name
275             in the provided aspect, then a reference to an empty list will be
276             returned. If the supplied name does not correspond to any entity,
277             then undef will be returned. Because the name can be any of the
278             databaseId, the standard name, or any of the aliases, it is possible
279             that the name might be ambiguous. Clients of this object should first
280             test whether the name they are using is ambiguous, using the
281             nameIsAmbiguous() method, and handle it accordingly. If an ambiguous
282             name is supplied, then it will die.
283              
284             NB: API change:
285              
286             goIdsByName is now case insensitive - that is, if there is a name that
287             is used twice using different casing, that will be treated as
288             ambiguous. Previous versions would have not treated these as
289             ambiguous. This is the price of wanting a case insensitive annotation
290             provider. In the event that a name is provided that is ambiguous
291             because of case, if it matches exactly the case of one of the possible
292             matches, it will be treated unambiguously.
293              
294             Usage:
295              
296             my $goidsRef = $annotationProvider->goIdsByName(name=>$name,
297             aspect=>);
298              
299             =cut
300             ##############################################################################
301              
302 0     0 1   $_[0]->__complainStubMethod;
303              
304             }
305              
306             =pod
307              
308             =head1 Methods for mapping different types of name to each other
309              
310             =cut
311              
312             ############################################################################
313             sub standardNameByDatabaseId{
314             ############################################################################
315             =pod
316              
317             =head2 standardNameByDatabaseId
318              
319             This method returns the standard name for a database id.
320              
321             NB: API change
322              
323             standardNameByDatabaseId is now case insensitive - that is, if there
324             is a databaseId that is used twice (or more) using different casing,
325             it will be treated as ambiguous. Previous versions would have not
326             treated these as ambiguous. This is the price of wanting a case
327             insensitive annotation provider. In the event that a name is provided
328             that is ambiguous because of case, if it matches exactly the case of
329             one of the possible matches, it will be treated unambiguously.
330              
331             Usage:
332              
333             my $standardName = $annotationProvider->standardNameByDatabaseId($databaseId);
334              
335             =cut
336             ##############################################################################
337              
338 0     0 1   $_[0]->__complainStubMethod;
339              
340             }
341              
342             ############################################################################
343             sub databaseIdByStandardName{
344             ############################################################################
345             =pod
346              
347             =head2 databaseIdByStandardName
348              
349             This method returns the database id for a standard name.
350              
351             NB: API change
352              
353             databaseIdByStandardName is now case insensitive - that is, if there
354             is a standard name that is used twice (or more) using different
355             casing, it will be treated as ambiguous. Previous versions would have
356             not treated these as ambiguous. This is the price of wanting a case
357             insensitive annotation provider. In the event that a name is provided
358             that is ambiguous because of case, if it matches exactly the case of
359             one of the possible matches, it will be treated unambiguously.
360              
361             Usage:
362              
363             my $databaseId = $annotationProvider->databaseIdByStandardName($standardName);
364              
365             =cut
366             ##############################################################################
367              
368 0     0 1   $_[0]->__complainStubMethod;
369              
370             }
371              
372             ############################################################################
373             sub databaseIdByName{
374             ############################################################################
375             =pod
376              
377             =head2 databaseIdByName
378              
379             This method returns the database id for any identifier for a gene (e.g.
380             by databaseId itself, by standard name, or by alias). If the used
381             name is ambiguous, then the program will die. Thus clients should
382             call the nameIsAmbiguous() method, prior to using this method. If the
383             name does not map to any databaseId, then undef will be returned.
384              
385             NB: API change
386              
387             databaseIdByName is now case insensitive - that is, if there is a name
388             that is used twice using different casing, that will be treated as
389             ambiguous. Previous versions would have not treated these as
390             ambiguous. This is the price of wanting a case insensitive annotation
391             provider. In the event that a name is provided that is ambiguous
392             because of case, if it matches exactly the case of one of the possible
393             matches, it will be treated unambiguously.
394              
395             Usage:
396              
397             my $databaseId = $annotationProvider->databaseIdByName($name);
398              
399             =cut
400             ##############################################################################
401              
402 0     0 1   $_[0]->__complainStubMethod;
403              
404             }
405              
406             ############################################################################
407             sub standardNameByName{
408             ############################################################################
409             =pod
410              
411             =head2 standardNameByName
412              
413             This public method returns the standard name for the the gene
414             specified by the given name. Because a name may be ambiguous, the
415             nameIsAmbiguous() method should be called first. If an ambiguous name
416             is supplied, then it will die with an appropriate error message. If
417             the name does not map to a standard name, then undef will be returned.
418              
419             NB: API change
420              
421             standardNameByName is now case insensitive - that is, if there is a
422             name that is used twice using different casing, that will be treated
423             as ambiguous. Previous versions would have not treated these as
424             ambiguous. This is the price of wanting a case insensitive annotation
425             provider.
426              
427             Usage:
428              
429             my $standardName = $annotationProvider->standardNameByName($name);
430            
431             =cut
432             ##############################################################################
433              
434 0     0 1   $_[0]->__complainStubMethod;
435              
436             }
437              
438             =pod
439              
440             =head1 Other methods relating to names
441              
442             =cut
443              
444             ##############################################################################
445             sub nameIsStandardName{
446             ##############################################################################
447             =pod
448              
449             =head2 nameIsStandardName
450              
451             This method returns a boolean to indicate whether the supplied name is
452             used as a standard name.
453              
454             NB : API change.
455              
456             This is now case insensitive. If you provide abC1, and ABc1 is a
457             standard name, then it will return true.
458              
459             Usage :
460              
461             if ($annotationProvider->nameIsStandardName($name)){
462              
463             # do something
464              
465             }
466              
467             =cut
468             ##############################################################################
469              
470 0     0 1   $_[0]->__complainStubMethod;
471              
472             }
473              
474             ##############################################################################
475             sub nameIsDatabaseId{
476             ##############################################################################
477             =pod
478              
479             =head2 nameIsDatabaseId
480              
481             This method returns a boolean to indicate whether the supplied name is
482             used as a database id.
483              
484             NB : API change.
485              
486             This is now case insensitive. If you provide abC1, and ABc1 is a
487             database id, then it will return true.
488              
489             Usage :
490              
491             if ($annotationProvider->nameIsDatabaseId($name)){
492              
493             # do something
494              
495             }
496              
497             =cut
498             ##############################################################################
499              
500 0     0 1   $_[0]->__complainStubMethod;
501              
502             }
503              
504             ############################################################################
505             sub nameIsAnnotated{
506             ############################################################################
507             =pod
508              
509             =head2 nameIsAnnotated
510              
511             This method returns a boolean to indicate whether the supplied name has any
512             annotations, either when considered as a databaseId, a standardName, or
513             an alias. If an aspect is also supplied, then it indicates whether that
514             name has any annotations in that aspect only.
515              
516             NB: API change.
517              
518             This is now case insensitive. If you provide abC1, and ABc1 has
519             annotation, then it will return true.
520              
521             Usage :
522              
523             if ($annotationProvider->nameIsAnnotated(name => $name)){
524              
525             # blah
526              
527             }
528              
529             or:
530              
531             if ($annotationProvider->nameIsAnnotated(name => $name,
532             aspect => $aspect)){
533              
534             # blah
535              
536             }
537              
538              
539             =cut
540             ##############################################################################
541              
542 0     0 1   $_[0]->__complainStubMethod;
543              
544             }
545              
546              
547             =pod
548              
549             =head1 Other public methods
550              
551             =cut
552              
553             ############################################################################
554             sub databaseName{
555             ############################################################################
556             =pod
557              
558             =head2 databaseName
559              
560             This method returns the name of the annotating authority of the annotations.
561              
562             Usage :
563              
564             my $databaseName = $annotationProvider->databaseName;
565              
566             =cut
567             ##############################################################################
568              
569 0     0 1   $_[0]->__complainStubMethod;
570              
571             }
572              
573             ############################################################################
574             sub numAnnotatedGenes{
575             ############################################################################
576             =pod
577              
578             =head2 numAnnotatedGenes
579              
580             This method returns the number of entities in the annotation file that
581             have annotations in the supplied aspect. If no aspect is provided,
582             then it will return the number of genes with an annotation in at least
583             one aspect of GO.
584              
585             Usage:
586              
587             my $numAnnotatedGenes = $annotationProvider->numAnnotatedGenes;
588              
589             my $numAnnotatedGenes = $annotationProvider->numAnnotatedGenes($aspect);
590              
591             =cut
592             ##############################################################################
593              
594 0     0 1   $_[0]->__complainStubMethod;
595              
596             }
597              
598             ############################################################################
599             sub allDatabaseIds{
600             ############################################################################
601             =pod
602              
603             =head2 allDatabaseIds
604              
605             This public method returns an array of all the database identifiers
606              
607             Usage:
608              
609             my @databaseIds = $annotationProvider->allDatabaseIds;
610              
611             =cut
612             ##############################################################################
613              
614 0     0 1   $_[0]->__complainStubMethod;
615              
616             }
617              
618             ############################################################################
619             sub allStandardNames{
620             ############################################################################
621             =pod
622              
623              
624             =head2 allStandardNames
625              
626             This public method returns an array of all standard names.
627              
628             Usage:
629              
630             my @standardNames = $annotationProvider->allStandardNames;
631              
632             =cut
633             ##############################################################################
634              
635 0     0 1   $_[0]->__complainStubMethod;
636              
637             }
638              
639             ############################################################################
640             #
641             # PROTECTED METHODS
642             #
643             ############################################################################
644              
645             =pod
646              
647             =head1 Protected Methods
648              
649             =cut
650              
651             ############################################################################
652             sub _handleMissingArgument{
653             ############################################################################
654             =pod
655              
656             =head2 _handleMissingArgument
657              
658             This protected method simply provides a simple way for concrete
659             subclasses to deal with missing arguments from method calls. It will
660             die with an appropriate error message.
661              
662             Usage:
663              
664             $self->_handleMissingArgument(argument=>'blah');
665              
666             =cut
667             ##############################################################################
668              
669 0     0     my ($self, %args) = @_;
670              
671 0   0       my $arg = $args{'argument'} || $self->_handleMissingArgument(argument=>'argument');
672              
673 0           my $receiver = (caller(1))[3];
674 0           my $caller = (caller(2))[3];
675              
676 0           die "The method $caller did not provide a value for the '$arg' argument for the $receiver method";
677              
678             }
679              
680             ############################################################################
681             #
682             # PRIVATE METHODS
683             #
684             ############################################################################
685              
686             ############################################################################
687             sub __complainStubMethod{
688             ############################################################################
689             # This method is called only if a stub method gets called, because a
690             # subclass failed to provide an implementation of one of the methods
691             # required by the interface. It will cause a fatal error.
692              
693 0     0     my $self = shift;
694              
695 0           my $subroutine = (caller(1))[3];
696              
697 0           $subroutine =~ s/.+:://;
698              
699 0           my $package = ref $self;
700              
701 0           die "The package $package has not implemented the required method $subroutine().\n";
702              
703             }
704              
705             1; # to keep Perl happy
706              
707             =pod
708              
709             =head1 AUTHOR
710              
711             Gavin Sherlock, sherlock@genome.stanford.edu
712              
713             =cut