line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::MUST::Core::Constants; |
2
|
|
|
|
|
|
|
# ABSTRACT: Distribution-wide constants for Bio::MUST::Core |
3
|
|
|
|
|
|
|
$Bio::MUST::Core::Constants::VERSION = '0.212530'; |
4
|
17
|
|
|
17
|
|
193906
|
use strict; |
|
17
|
|
|
|
|
45
|
|
|
17
|
|
|
|
|
623
|
|
5
|
17
|
|
|
17
|
|
98
|
use warnings; |
|
17
|
|
|
|
|
40
|
|
|
17
|
|
|
|
|
460
|
|
6
|
|
|
|
|
|
|
|
7
|
17
|
|
|
17
|
|
472
|
use Const::Fast; |
|
17
|
|
|
|
|
2208
|
|
|
17
|
|
|
|
|
106
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
use Exporter::Easy ( |
10
|
17
|
|
|
|
|
239
|
OK => [ qw(:seqtypes :gaps :ncbi :seqids :files) ], |
11
|
|
|
|
|
|
|
TAGS => [ |
12
|
|
|
|
|
|
|
seqtypes => [ qw($PROTLIKE $RNALIKE $NONPUREDNA) ], |
13
|
|
|
|
|
|
|
gaps => [ qw($GAP $PROTMISS $DNAMISS |
14
|
|
|
|
|
|
|
$GAPPROTMISS $GAPDNAMISS $FRAMESHIFT) ], |
15
|
|
|
|
|
|
|
ncbi => [ qw($NCBIPART $NCBIACC $NCBIDBABBR |
16
|
|
|
|
|
|
|
$NCBIPKEY $PKEYONLY $NCBIGCA $GCAONLY) ], |
17
|
|
|
|
|
|
|
seqids => [ qw($NOID_CHARS $NEW_TAG $TAIL_42 |
18
|
|
|
|
|
|
|
$DEF_ID $GI_ID $LCL_ID $GNL_ID $JGI_ID $PAC_ID) ], |
19
|
|
|
|
|
|
|
files => [ qw($EMPTY_LINE $COMMENT_LINE $DEF_LINE |
20
|
|
|
|
|
|
|
$DIM_LINE $PHY_LINE |
21
|
|
|
|
|
|
|
$STK_COMMENT $STK_SEQ $END_LINE |
22
|
|
|
|
|
|
|
$COUNT_LINE $ALI_SUFFIX) ], |
23
|
|
|
|
|
|
|
dirs => [ qw(%SUFFICES_FOR) ], |
24
|
|
|
|
|
|
|
], |
25
|
17
|
|
|
17
|
|
11114
|
); |
|
17
|
|
|
|
|
26617
|
|
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
# regexes for determining sequence type |
29
|
|
|
|
|
|
|
const our $PROTLIKE => qr{[EFILPQefilpq]}xms; |
30
|
|
|
|
|
|
|
const our $RNALIKE => qr{[Uu]}xms; |
31
|
|
|
|
|
|
|
const our $NONPUREDNA => qr{[^ACGTacgt]}xms; |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# regexes for gap and missing symbols |
34
|
|
|
|
|
|
|
# see also Bio::MUST::Core::Types for "more" gaps |
35
|
|
|
|
|
|
|
# Note the 2-step definition of char classes for maximal regex speed |
36
|
|
|
|
|
|
|
const my $GAPCHCL => q{\*\-\ }; |
37
|
|
|
|
|
|
|
const my $PROTMISSCHCL => q{\?Xx}; |
38
|
|
|
|
|
|
|
const my $DNAMISSCHCL => q{\?XxNn}; |
39
|
|
|
|
|
|
|
const my $PROTAMBIGCHCL => q{BJOUZbjouz}; # O/U actually are not ambiguous... |
40
|
|
|
|
|
|
|
const my $DNAAMBIGCHCL => q{BDHKMRSVWYbdhkmrsvwy}; |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
const our $GAP => qr{[$GAPCHCL]}xms; |
43
|
|
|
|
|
|
|
const our $PROTMISS => qr{[$PROTMISSCHCL$PROTAMBIGCHCL]}xms; |
44
|
|
|
|
|
|
|
const our $DNAMISS => qr{[$DNAMISSCHCL$DNAAMBIGCHCL]}xms; |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
const our $GAPPROTMISS => qr{[$GAPCHCL$PROTMISSCHCL$PROTAMBIGCHCL]}xms; |
47
|
|
|
|
|
|
|
const our $GAPDNAMISS => qr{[$GAPCHCL$DNAMISSCHCL$DNAAMBIGCHCL]}xms; |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
const our $FRAMESHIFT => 'x'; |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# regexes for NCBI id components |
52
|
|
|
|
|
|
|
const our $NCBIPART => qr{[^\|\s]+}xms; |
53
|
|
|
|
|
|
|
const our $NCBIACC => qr{[A-Z0-9\.\_]+}xms; |
54
|
|
|
|
|
|
|
const our $NCBIDBABBR => qr{[a-z]{2,}}xms; |
55
|
|
|
|
|
|
|
const our $NCBIPKEY => qr{[1-9]\d*}xms; |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
const our $PKEYONLY => qr{\A $NCBIPKEY \z}xms; |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# http://www.ncbi.nlm.nih.gov/assembly/model/ |
60
|
|
|
|
|
|
|
# The assembly accession starts with a three letter prefix, GCA for GenBank |
61
|
|
|
|
|
|
|
# assemblies and GCF for RefSeq assemblies. This is followed by an underscore |
62
|
|
|
|
|
|
|
# and 9 digits. A version is then added to the accession. For example, the |
63
|
|
|
|
|
|
|
# assembly accession for the GenBank version of the current public human |
64
|
|
|
|
|
|
|
# reference assembly ( GRCh37.p2 ) is GCA_000001405.3. |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
const our $NCBIGCA => qr{GC[AF]_\d{9} \. \d+}xms; |
67
|
|
|
|
|
|
|
const our $GCAONLY => qr{\A $NCBIGCA \z}xms; |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
# regexes for parsing seq_ids |
70
|
|
|
|
|
|
|
const our $NOID_CHARS => qr{[,;:]}xms; |
71
|
|
|
|
|
|
|
const our $NEW_TAG => qr{\#NEW\#}xms; |
72
|
|
|
|
|
|
|
const our $TAIL_42 => qr{(?: \.H\d+\.\d+ | \.E\.bf | \.E\.lc) $NEW_TAG? \z}xms; |
73
|
|
|
|
|
|
|
const our $DEF_ID => qr{\A (\S+) }xms; |
74
|
|
|
|
|
|
|
const our $GI_ID => qr{\A gi \| ($NCBIPKEY) }xms; |
75
|
|
|
|
|
|
|
const our $LCL_ID => qr{\A lcl \| (\S+) }xms; |
76
|
|
|
|
|
|
|
const our $GNL_ID => qr{\A gnl \| $NCBIPART \| ($NCBIPART) }xms; |
77
|
|
|
|
|
|
|
const our $JGI_ID => qr{\A jgi \| $NCBIPART \| (\d+) }xms; |
78
|
|
|
|
|
|
|
const our $PAC_ID => qr{\| PACid: (\d+) }xms; |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
# regexes for parsing files |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# common |
83
|
|
|
|
|
|
|
const our $EMPTY_LINE => qr{\A \s* \z}xms; |
84
|
|
|
|
|
|
|
const our $COMMENT_LINE => qr{\A (\#)\s*(.*)}xms; |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
# FASTA-like |
87
|
|
|
|
|
|
|
const our $DEF_LINE => qr{\A >(.*)}xms; |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
# PHYLIP-related |
90
|
|
|
|
|
|
|
const our $DIM_LINE => qr{\A \s*(\d+)\s+(\d+)\s* \z}xms; |
91
|
|
|
|
|
|
|
const our $PHY_LINE => qr{\A (?:(\S+)\s)? \s* (.*) }xms; |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
# STOCKHOLM-related |
94
|
|
|
|
|
|
|
const our $STK_COMMENT => qr{\A (\#=GF)\s*(.*)}xms; |
95
|
|
|
|
|
|
|
const our $STK_SEQ => qr{\A (\S+)\s+(.*)}xms; |
96
|
|
|
|
|
|
|
const our $END_LINE => qr{\A //}xms; |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
# MUST-related |
99
|
|
|
|
|
|
|
const our $COUNT_LINE => qr{\A (\d+) \z}xms; |
100
|
|
|
|
|
|
|
const our $ALI_SUFFIX => qr{\.ali \z}xmsi; |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
# regexes for traversing directories |
103
|
|
|
|
|
|
|
# Note: hash values correspond to -name arg in File::Find::Rule constructor |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
const our %SUFFICES_FOR => ( |
106
|
|
|
|
|
|
|
Ali => qr{\. (?: ali|fasta|fas|fa|faa|fna ) \z}xmsi, |
107
|
|
|
|
|
|
|
); |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
1; |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
__END__ |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=pod |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=head1 NAME |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
Bio::MUST::Core::Constants - Distribution-wide constants for Bio::MUST::Core |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
=head1 VERSION |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
version 0.212530 |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=head1 AUTHOR |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
Denis BAURAIN <denis.baurain@uliege.be> |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
This software is copyright (c) 2013 by University of Liege / Unit of Eukaryotic Phylogenomics / Denis BAURAIN. |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
132
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=cut |