File Coverage

blib/lib/Bio/MUST/Core/Types.pm
Criterion Covered Total %
statement 15 15 100.0
branch n/a
condition n/a
subroutine 5 5 100.0
pod n/a
total 20 20 100.0


line stmt bran cond sub pod time code
1             package Bio::MUST::Core::Types;
2             # ABSTRACT: Distribution-wide Moose types for Bio::MUST::Core
3             $Bio::MUST::Core::Types::VERSION = '0.212650';
4 17     17   9956 use Moose::Util::TypeConstraints;
  17         5006728  
  17         179  
5              
6 17     17   38776 use autodie;
  17         41  
  17         175  
7 17     17   87915 use feature qw(say);
  17         42  
  17         1698  
8              
9 17     17   981 use Path::Class qw(dir file);
  17         59495  
  17         21990  
10              
11             # declare types without loading corresponding classes
12             class_type('Bio::MUST::Core::Ali');
13             class_type('Bio::MUST::Core::Ali::Stash');
14             class_type('Bio::MUST::Core::IdList');
15             class_type('Bio::MUST::Core::IdMapper');
16             class_type('Bio::MUST::Core::SeqId');
17              
18             # TODO: consider MooseX::Types
19              
20             # http://www.ebi.ac.uk/2can/tutorials/aa.html
21              
22             # A Ala Alanine
23             # R Arg Arginine
24             # N Asn Asparagine
25             # D Asp Aspartic acid
26             # C Cys Cysteine
27             # Q Gln Glutamine
28             # E Glu Glutamic acid
29             # G Gly Glycine
30             # H His Histidine
31             # J Xle Leucine or Isoleucine
32             # L Leu Leucine
33             # I ILe Isoleucine
34             # K Lys Lysine
35             # M Met Methionine
36             # F Phe Phenylalanine
37             # P Pro Proline
38             # O Pyl Pyrrolysine
39             # U Sec Selenocysteine
40             # S Ser Serine
41             # T Thr Threonine
42             # W Trp Tryptophan
43             # Y Tyr Tyrosine
44             # V Val Valine
45             # B Asx Aspartic acid or Asparagine
46             # Z Glx Glutamic acid or Glutamine
47             # X Xaa Any amino acid
48              
49             # IUB Meaning Complement
50             # A A T
51             # C C G
52             # G G C
53             # T/U T A
54             # M A/C K
55             # R A/G Y
56             # W A/T W
57             # S C/G S
58             # Y C/T R
59             # K G/T M
60             # V A/C/G B
61             # H A/C/T D
62             # D A/G/T H
63             # B C/G/T V
64             # X/N A/C/G/T X
65              
66              
67             # auto-build SeqId instance from bare string
68             # useful when the SeqId class is used as an attribute in another class
69             # TODO: check whether it is the best practice
70             coerce 'Bio::MUST::Core::SeqId'
71             => from 'Str'
72             => via { Bio::MUST::Core::SeqId->new(full_id => $_) }
73             ;
74              
75             # auto-build ArrayRef[full_id] from ArrayRef[SeqId] or ArrayRef[Seq]
76             # useful for IdList and IdMapper objects
77             subtype 'Bio::MUST::Core::Types::full_ids'
78             => as 'ArrayRef[Str]';
79              
80             coerce 'Bio::MUST::Core::Types::full_ids'
81             => from 'ArrayRef[Bio::MUST::Core::SeqId]'
82             => via { [ map { $_->full_id } @{$_} ] }
83              
84             => from 'ArrayRef[Bio::MUST::Core::Seq]'
85             => via { [ map { $_->full_id } @{$_} ] }
86             ;
87              
88             # quite tolerant subtype designed to preserve original casing
89             # however FASTA '-' symbols are converted to ALI '*' during coercion
90             # whereas spaces and '?' are left untouched
91             # Note: \A are \z are absolutely required for converting hard-wrapped seqs
92             subtype 'Bio::MUST::Core::Types::Seq'
93             => as 'Str'
94             => where { m{\A [\*\ A-Za-z\?]* \z}xms }
95             => message { 'Only IUPAC codes and gaps [*-<space>?] are allowed.' }
96             ;
97              
98             coerce 'Bio::MUST::Core::Types::Seq'
99             => from 'Str'
100             => via { tr/-\n/*/dr } # convert FASTA on the fly
101             ; # ('-' => '*' and delete newlines)
102              
103             # subtype for a stringified NCBI Taxonomy lineage
104             subtype 'Bio::MUST::Core::Types::Lineage'
105             => as 'Str'
106             => where { tr/;// || m/\A cellular \s organisms/xms || m/\A Viruses/xms
107             || m/\A other \s sequences/xms || m/\A unclassified \s sequences/xms }
108             ;
109              
110             class_type('Path::Class::Dir');
111             class_type('Path::Class::File');
112             class_type('File::Temp');
113              
114             # auto-build Ali/Stash from various source types...
115             # useful in Bio::MUST::Drivers modules
116              
117             coerce 'Bio::MUST::Core::Ali'
118             => from 'Bio::MUST::Core::Ali::Stash'
119             => via { Bio::MUST::Core::Ali->new( seqs => $_->seqs, guessing => 1 ) }
120              
121             => from 'ArrayRef[Bio::MUST::Core::Seq]'
122             => via { Bio::MUST::Core::Ali->new( seqs => $_, guessing => 1 ) }
123              
124             => from 'Path::Class::File'
125             => via { Bio::MUST::Core::Ali->load( $_->stringify ) }
126              
127             => from 'Str'
128             => via { Bio::MUST::Core::Ali->load( $_ ) }
129             ;
130              
131             coerce 'Bio::MUST::Core::Ali::Stash'
132             => from 'Path::Class::File'
133             => via { Bio::MUST::Core::Ali::Stash->load( $_->stringify ) }
134              
135             => from 'Str'
136             => via { Bio::MUST::Core::Ali::Stash->load( $_ ) }
137             ;
138              
139             coerce 'Bio::MUST::Core::IdList'
140             => from 'ArrayRef[Str]'
141             => via { Bio::MUST::Core::IdList->new( ids => $_ ) }
142              
143             => from 'ArrayRef[Bio::MUST::Core::SeqId]'
144             => via { Bio::MUST::Core::IdList->new(
145             ids => [ map { $_->full_id } @{$_} ]
146             ) }
147              
148             => from 'ArrayRef[Bio::MUST::Core::Seq]'
149             => via { Bio::MUST::Core::IdList->new(
150             ids => [ map { $_->full_id } @{$_} ]
151             ) }
152              
153             => from 'Path::Class::File'
154             => via { Bio::MUST::Core::IdList->load( $_->stringify ) }
155              
156             => from 'Str'
157             => via { Bio::MUST::Core::IdList->load( $_ ) }
158             ;
159              
160             # TODO: add coercion for IdMapper from HashRef[SeqId], HashRef[Seq]?
161              
162             coerce 'Bio::MUST::Core::IdMapper'
163             => from 'HashRef[Str]'
164             => via { Bio::MUST::Core::IdMapper->new(
165             long_ids => [ keys %{$_} ],
166             abbr_ids => [ values %{$_} ],
167             ) }
168              
169             => from 'Path::Class::File'
170             => via { Bio::MUST::Core::IdMapper->load( $_->stringify ) }
171              
172             => from 'Str'
173             => via { Bio::MUST::Core::IdMapper->load( $_ ) }
174             ;
175              
176             # TODO: add tests for these coercions? templatize code?
177              
178             # subtype for 'dir' attributes
179             subtype 'Bio::MUST::Core::Types::Dir'
180             => as 'Path::Class::Dir'
181             ;
182              
183             # avoid the need for 'isa' unions such as 'Str|Path::Class::Dir'...
184             # ... and allow fixing '~/' paths on the fly (through glob)
185             coerce 'Bio::MUST::Core::Types::Dir'
186             => from 'Str'
187             => via { dir( glob $_ ) }
188             ;
189              
190             # === in part borrowed from Bio::FastParsers to avoid dependency
191              
192             # subtype for 'file' attributes
193             subtype 'Bio::MUST::Core::Types::File'
194             => as 'Path::Class::File'
195             ;
196              
197             # avoid the need for 'isa' unions such as 'Str|Path::Class::File'...
198             # ... and allow fixing '~/' paths on the fly (through glob)
199             # ... and allow delegating to Path::Class::File methods (e.g., remove)
200             coerce 'Bio::MUST::Core::Types::File'
201             => from 'File::Temp' # useful for Drivers
202             => via { file( $_->filename ) }
203              
204             => from 'Str'
205             => via { file( glob $_ ) }
206             ;
207              
208             # ===
209              
210 17     17   156 no Moose::Util::TypeConstraints;
  17         58  
  17         166  
211             1;
212              
213             __END__
214              
215             =pod
216              
217             =head1 NAME
218              
219             Bio::MUST::Core::Types - Distribution-wide Moose types for Bio::MUST::Core
220              
221             =head1 VERSION
222              
223             version 0.212650
224              
225             =head1 SYNOPSIS
226              
227             # TODO
228              
229             =head1 DESCRIPTION
230              
231             # TODO
232              
233             =head1 AUTHOR
234              
235             Denis BAURAIN <denis.baurain@uliege.be>
236              
237             =head1 COPYRIGHT AND LICENSE
238              
239             This software is copyright (c) 2013 by University of Liege / Unit of Eukaryotic Phylogenomics / Denis BAURAIN.
240              
241             This is free software; you can redistribute it and/or modify it under
242             the same terms as the Perl 5 programming language system itself.
243              
244             =cut