line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::Phylo::Parsers::Abstract; |
2
|
33
|
|
|
33
|
|
211
|
use strict; |
|
33
|
|
|
|
|
60
|
|
|
33
|
|
|
|
|
760
|
|
3
|
33
|
|
|
33
|
|
137
|
use warnings; |
|
33
|
|
|
|
|
58
|
|
|
33
|
|
|
|
|
668
|
|
4
|
33
|
|
|
33
|
|
134
|
use base 'Bio::Phylo::IO'; |
|
33
|
|
|
|
|
54
|
|
|
33
|
|
|
|
|
2943
|
|
5
|
33
|
|
|
33
|
|
190
|
use IO::Handle; |
|
33
|
|
|
|
|
58
|
|
|
33
|
|
|
|
|
1259
|
|
6
|
33
|
|
|
33
|
|
179
|
use Bio::Phylo::Util::Exceptions 'throw'; |
|
33
|
|
|
|
|
58
|
|
|
33
|
|
|
|
|
1353
|
|
7
|
33
|
|
|
33
|
|
173
|
use Bio::Phylo::Util::CONSTANT '/looks_like/'; |
|
33
|
|
|
|
|
58
|
|
|
33
|
|
|
|
|
5208
|
|
8
|
33
|
|
|
33
|
|
203
|
use Bio::Phylo::Util::Logger ':simple'; |
|
33
|
|
|
|
|
59
|
|
|
33
|
|
|
|
|
3458
|
|
9
|
33
|
|
|
33
|
|
7463
|
use Bio::Phylo::Factory; |
|
33
|
|
|
|
|
70
|
|
|
33
|
|
|
|
|
182
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head1 NAME |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
Bio::Phylo::Parsers::Abstract - Superclass for parsers used by Bio::Phylo::IO |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 DESCRIPTION |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
This package is subclassed by all other packages within Bio::Phylo::Parsers::.*. |
18
|
|
|
|
|
|
|
There is no direct usage. |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=cut |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
my $factory = Bio::Phylo::Factory->new; |
23
|
|
|
|
|
|
|
my $logger = Bio::Phylo::Util::Logger->new; |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# argument is a file name, which we open |
26
|
|
|
|
|
|
|
sub _open_file { |
27
|
1
|
|
|
1
|
|
2
|
my $file_name = shift; |
28
|
1
|
|
50
|
|
|
7
|
my $encoding = shift || ''; |
29
|
1
|
50
|
|
|
|
67
|
open my $handle, "<${encoding}", $file_name or throw 'FileError' => $!; |
30
|
0
|
|
|
|
|
0
|
return $handle; |
31
|
|
|
|
|
|
|
} |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# argument is a string, which, at perl version >5.8, |
34
|
|
|
|
|
|
|
# we can treat as a handle by opening it by reference |
35
|
|
|
|
|
|
|
sub _open_string { |
36
|
108
|
|
|
108
|
|
228
|
my $string_value = shift; |
37
|
108
|
|
50
|
|
|
565
|
my $encoding = shift || ''; |
38
|
30
|
50
|
|
30
|
|
197
|
open my $handle, "<${encoding}", \$string_value or throw 'FileError' => $!; |
|
30
|
|
|
|
|
56
|
|
|
30
|
|
|
|
|
194
|
|
|
108
|
|
|
|
|
2627
|
|
39
|
108
|
|
|
|
|
15614
|
return $handle; |
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
# argument is a url, |
43
|
|
|
|
|
|
|
sub _open_url { |
44
|
0
|
|
|
0
|
|
0
|
my $url = shift; |
45
|
0
|
|
0
|
|
|
0
|
my $encoding = shift || ''; |
46
|
0
|
|
|
|
|
0
|
my $handle; |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
# we need to use LWP::UserAgent to fetch the resource, but |
49
|
|
|
|
|
|
|
# we don't "use" it at the top of the module because that |
50
|
|
|
|
|
|
|
# would make it a required dependency |
51
|
0
|
|
|
|
|
0
|
eval { require LWP::UserAgent }; |
|
0
|
|
|
|
|
0
|
|
52
|
0
|
0
|
|
|
|
0
|
if ($@) { |
53
|
0
|
|
|
|
|
0
|
throw 'ExtensionError' => |
54
|
|
|
|
|
|
|
"Providing a -url argument requires\nsuccesful loading " |
55
|
|
|
|
|
|
|
. "of LWP::UserAgent.\nHowever, there was an error when " |
56
|
|
|
|
|
|
|
. "I\ntried that:\n" |
57
|
|
|
|
|
|
|
. $@; |
58
|
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
# apparently it's installed, so let's instantiate a client |
61
|
0
|
|
|
|
|
0
|
my $ua = LWP::UserAgent->new; |
62
|
0
|
|
|
|
|
0
|
$ua->timeout(10); |
63
|
0
|
|
|
|
|
0
|
$ua->env_proxy; |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# fetch the resource, get an HTTP::Response object |
66
|
0
|
|
|
|
|
0
|
my $response = $ua->get($url); |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# i.e. 200, or 304 (unchanged cache) |
69
|
0
|
0
|
0
|
|
|
0
|
if ( $response->is_success or $response->status_line =~ /304/ ) { |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
# content is a string, so we create a handle in the same way |
72
|
|
|
|
|
|
|
# as when the argument was a string |
73
|
0
|
|
|
|
|
0
|
$handle = _open_string( $response->content, $encoding ); |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
else { |
76
|
0
|
|
|
|
|
0
|
throw 'NetworkError' => $response->status_line; |
77
|
|
|
|
|
|
|
} |
78
|
0
|
|
|
|
|
0
|
return $handle; |
79
|
|
|
|
|
|
|
} |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
# deal with all possible data sources, return |
82
|
|
|
|
|
|
|
# a handle to whatever it is or throw an exception |
83
|
|
|
|
|
|
|
sub _open_handle { |
84
|
116
|
|
|
116
|
|
373
|
my %args = @_; |
85
|
116
|
|
|
|
|
213
|
my $handle; |
86
|
116
|
100
|
|
|
|
562
|
if ( $args{'-handle'} ) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
87
|
7
|
|
|
|
|
36
|
binmode $args{'-handle'}, ":utf8"; |
88
|
7
|
|
|
|
|
13
|
$handle = $args{'-handle'}; |
89
|
|
|
|
|
|
|
} |
90
|
|
|
|
|
|
|
elsif ( $args{'-file'} ) { |
91
|
1
|
|
|
|
|
6
|
$handle = _open_file( $args{'-file'}, $args{'-encoding'} ); |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
elsif ( $args{'-string'} ) { |
94
|
108
|
|
|
|
|
484
|
$handle = _open_string( $args{'-string'}, $args{'-encoding'} ); |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
elsif ( $args{'-url'} ) { |
97
|
0
|
|
|
|
|
0
|
$handle = _open_url( $args{'-url'}, $args{'-encoding'} ); |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
else { |
100
|
0
|
|
|
|
|
0
|
throw 'BadArgs' => 'No data source provided!'; |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# check to see if the data source contains anything |
104
|
|
|
|
|
|
|
#if ( eof $handle ) { |
105
|
|
|
|
|
|
|
# throw 'NoData' => "Source is empty!"; |
106
|
|
|
|
|
|
|
#} |
107
|
115
|
|
|
|
|
636
|
return $handle; |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
# open a Bio::Phylo::Project if asked (if the -as_project flag |
111
|
|
|
|
|
|
|
# was provided.) If the user has supplied one (the -project flag) |
112
|
|
|
|
|
|
|
# simply return that or undefined otherwise. |
113
|
|
|
|
|
|
|
sub _open_project { |
114
|
113
|
|
|
113
|
|
421
|
my ( $fac, %args ) = @_; |
115
|
113
|
50
|
|
|
|
501
|
if ( $args{'-project'} ) { |
|
|
100
|
|
|
|
|
|
116
|
0
|
|
|
|
|
0
|
return $args{'-project'}; |
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
elsif ( $args{'-as_project'} ) { |
119
|
24
|
|
|
|
|
188
|
return $fac->create_project; |
120
|
|
|
|
|
|
|
} |
121
|
|
|
|
|
|
|
else { |
122
|
89
|
|
|
|
|
1149
|
return undef; |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
# this constructor is called by the Bio::Phylo::IO::parse |
127
|
|
|
|
|
|
|
# subroutine |
128
|
|
|
|
|
|
|
sub _new { |
129
|
116
|
|
|
116
|
|
262
|
my $class = shift; |
130
|
116
|
|
|
|
|
375
|
my %args = looks_like_hash @_; |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
# we need to guess the format |
133
|
116
|
100
|
|
|
|
422
|
if ( $class eq __PACKAGE__ ) { |
134
|
2
|
100
|
|
|
|
8
|
if ( my $format = _guess_format(_open_handle(%args)) ) { |
135
|
1
|
|
|
|
|
4
|
$class = 'Bio::Phylo::Parsers::' . ucfirst($format); |
136
|
1
|
|
|
|
|
3
|
return looks_like_class($class)->_new(%args); |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
else { |
139
|
1
|
|
|
|
|
6
|
throw 'BadArgs' => "No format specified and unable to guess!"; |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
# factory is either user supplied or a private static |
144
|
114
|
|
33
|
|
|
812
|
my $fac = $args{'-factory'} || $factory; |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
# values of these object fields will be accessed |
147
|
|
|
|
|
|
|
# by child classes through the appropriate protected |
148
|
|
|
|
|
|
|
# getters |
149
|
|
|
|
|
|
|
return bless { |
150
|
|
|
|
|
|
|
'_fac' => $fac, |
151
|
|
|
|
|
|
|
'_handle' => _open_handle(%args), |
152
|
|
|
|
|
|
|
'_proj' => _open_project( $fac, %args ), |
153
|
|
|
|
|
|
|
'_args' => \%args, # for child-specific arguments |
154
|
|
|
|
|
|
|
'_encoding' => $args{'-encoding'}, |
155
|
|
|
|
|
|
|
'_handlers' => $args{'-handlers'}, |
156
|
114
|
|
|
|
|
559
|
'_flush' => $args{'-flush'}, |
157
|
|
|
|
|
|
|
}, $class; |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
# child classes can override this to specify |
161
|
|
|
|
|
|
|
# that their return value is a single scalar |
162
|
|
|
|
|
|
|
# (e.g. a tree block, as is the case for newick), |
163
|
|
|
|
|
|
|
# instead of an array of blocks |
164
|
29
|
|
|
29
|
|
82
|
sub _return_is_scalar { 0 } |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
# this is called by Bio::Phylo::IO::parse, and |
167
|
|
|
|
|
|
|
# in turn it calls the _parse method of whatever |
168
|
|
|
|
|
|
|
# the concrete child instance is. |
169
|
|
|
|
|
|
|
sub _process { |
170
|
113
|
|
|
113
|
|
269
|
my $self = shift; |
171
|
113
|
100
|
|
|
|
442
|
if ( $self->_return_is_scalar ) { |
172
|
84
|
|
|
|
|
288
|
my $result = $self->_parse; |
173
|
84
|
100
|
|
|
|
455
|
if ( my $p = $self->_project ) { |
174
|
10
|
50
|
|
|
|
43
|
if ( my $meta = $self->_project_meta ) { |
175
|
0
|
|
|
|
|
0
|
$p->add_meta($_) for @{ $meta }; |
|
0
|
|
|
|
|
0
|
|
176
|
|
|
|
|
|
|
} |
177
|
10
|
|
|
|
|
66
|
return $p->insert($result); |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
else { |
180
|
74
|
|
|
|
|
849
|
return $result; |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
else { |
184
|
29
|
|
|
|
|
112
|
my @result = $self->_parse; |
185
|
27
|
100
|
|
|
|
192
|
if ( my $p = $self->_project ) { |
186
|
14
|
50
|
|
|
|
79
|
if ( my $meta = $self->_project_meta ) { |
187
|
0
|
|
|
|
|
0
|
$p->add_meta($_) for @{ $meta }; |
|
0
|
|
|
|
|
0
|
|
188
|
|
|
|
|
|
|
} |
189
|
14
|
|
|
|
|
77
|
return $p->insert(@result); |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
else { |
192
|
13
|
|
|
|
|
173
|
return [@result]; |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
} |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
# once this is called, the handle will have read to |
198
|
|
|
|
|
|
|
# the end of the stream, so it needs to be rewound |
199
|
|
|
|
|
|
|
# if we want to read from the top |
200
|
|
|
|
|
|
|
sub _string { |
201
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
202
|
0
|
|
|
|
|
0
|
my $handle = $self->_handle; |
203
|
0
|
|
|
|
|
0
|
my $string = do { local $/; <$handle> }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
204
|
0
|
|
|
|
|
0
|
return $string; |
205
|
|
|
|
|
|
|
} |
206
|
|
|
|
24
|
|
|
sub _project_meta {}; |
207
|
159199
|
|
|
159199
|
|
520895
|
sub _logger { $logger } |
208
|
111
|
|
|
111
|
|
464
|
sub _project { shift->{'_proj'} } |
209
|
113
|
|
|
113
|
|
514
|
sub _handle { shift->{'_handle'} } |
210
|
5355
|
|
|
5355
|
|
15960
|
sub _factory { shift->{'_fac'} } |
211
|
18984
|
|
|
18984
|
|
37698
|
sub _args { shift->{'_args'} } |
212
|
0
|
|
|
0
|
|
0
|
sub _encoding { shift->{'_encoding'} } |
213
|
586
|
|
|
586
|
|
3127
|
sub _flush { shift->{'_flush'} } |
214
|
|
|
|
|
|
|
sub _handlers { |
215
|
584
|
|
|
584
|
|
1088
|
my ( $self, $type ) = @_; |
216
|
584
|
50
|
|
|
|
1699
|
if ( my $h = $self->{'_handlers'} ) { |
217
|
584
|
50
|
|
|
|
1798
|
return defined $type ? $h->{$type} : $h; |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
sub _guess_format { |
222
|
2
|
|
|
2
|
|
4
|
my $handle = shift; |
223
|
2
|
|
|
|
|
62
|
my $line = $handle->getline; |
224
|
2
|
|
|
|
|
47
|
my $format; |
225
|
2
|
100
|
|
|
|
22
|
if ( $line =~ /^#nexus/i ) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
226
|
1
|
|
|
|
|
3
|
$format = 'nexus'; |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
elsif ( $line =~ /^<[^>]*nexml/ ) { |
229
|
0
|
|
|
|
|
0
|
$format = 'nexml'; |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
elsif ( $line =~ /^<[^>]*phyloxml/ ) { |
232
|
0
|
|
|
|
|
0
|
$format = 'phyloxml'; |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
elsif ( $line =~ /^\s*\d+\s+\d+\s*$/ ) { |
235
|
0
|
|
|
|
|
0
|
$format = 'phylip'; |
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
elsif ( $line =~ /^>/ ) { |
238
|
0
|
|
|
|
|
0
|
$format = 'fasta'; |
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
elsif ( $line =~ /^\@/ ) { |
241
|
0
|
|
|
|
|
0
|
$format = 'fastq'; |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
elsif ( $line =~ /^\s*\(/ ) { |
244
|
0
|
|
|
|
|
0
|
$format = 'newick'; |
245
|
0
|
0
|
|
|
|
0
|
if ( $line =~ /{/ ) { |
246
|
0
|
|
|
|
|
0
|
$format = 'figtree'; |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
} |
249
|
|
|
|
|
|
|
elsif ( $line =~ /<\? xml/ ) { |
250
|
0
|
|
|
|
|
0
|
$line = $handle; |
251
|
0
|
0
|
|
|
|
0
|
if ( $line =~ /^<[^>]*nexml/ ) { |
|
|
0
|
|
|
|
|
|
252
|
0
|
|
|
|
|
0
|
$format = 'nexml'; |
253
|
|
|
|
|
|
|
} |
254
|
|
|
|
|
|
|
elsif ( $line =~ /^<[^>]*phyloxml/ ) { |
255
|
0
|
|
|
|
|
0
|
$format = 'phyloxml'; |
256
|
|
|
|
|
|
|
} |
257
|
|
|
|
|
|
|
} |
258
|
2
|
|
|
|
|
6
|
seek( $handle, 0, 0 ); |
259
|
2
|
|
|
|
|
7
|
return $format; |
260
|
|
|
|
|
|
|
} |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
# podinherit_insert_token |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
=head1 SEE ALSO |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
There is a mailing list at L<https://groups.google.com/forum/#!forum/bio-phylo> |
267
|
|
|
|
|
|
|
for any user or developer questions and discussions. |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
=over |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
=item L<Bio::Phylo::IO> |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
The parsers are called by the L<Bio::Phylo::IO> object. |
274
|
|
|
|
|
|
|
Look there for examples. |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
=item L<Bio::Phylo::Manual> |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
Also see the manual: L<Bio::Phylo::Manual> and L<http://rutgervos.blogspot.com>. |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
=back |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
=head1 CITATION |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
If you use Bio::Phylo in published research, please cite it: |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
B<Rutger A Vos>, B<Jason Caravas>, B<Klaas Hartmann>, B<Mark A Jensen> |
287
|
|
|
|
|
|
|
and B<Chase Miller>, 2011. Bio::Phylo - phyloinformatic analysis using Perl. |
288
|
|
|
|
|
|
|
I<BMC Bioinformatics> B<12>:63. |
289
|
|
|
|
|
|
|
L<http://dx.doi.org/10.1186/1471-2105-12-63> |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
=cut |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
1; |