line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::Phylo::Parsers::Abstract; |
2
|
33
|
|
|
33
|
|
225
|
use strict; |
|
33
|
|
|
|
|
68
|
|
|
33
|
|
|
|
|
907
|
|
3
|
33
|
|
|
33
|
|
161
|
use base 'Bio::Phylo::IO'; |
|
33
|
|
|
|
|
63
|
|
|
33
|
|
|
|
|
3172
|
|
4
|
33
|
|
|
33
|
|
227
|
use IO::Handle; |
|
33
|
|
|
|
|
74
|
|
|
33
|
|
|
|
|
1310
|
|
5
|
33
|
|
|
33
|
|
183
|
use Bio::Phylo::Util::Exceptions 'throw'; |
|
33
|
|
|
|
|
74
|
|
|
33
|
|
|
|
|
1374
|
|
6
|
33
|
|
|
33
|
|
186
|
use Bio::Phylo::Util::CONSTANT '/looks_like/'; |
|
33
|
|
|
|
|
69
|
|
|
33
|
|
|
|
|
5480
|
|
7
|
33
|
|
|
33
|
|
211
|
use Bio::Phylo::Util::Logger ':simple'; |
|
33
|
|
|
|
|
59
|
|
|
33
|
|
|
|
|
3501
|
|
8
|
33
|
|
|
33
|
|
8115
|
use Bio::Phylo::Factory; |
|
33
|
|
|
|
|
78
|
|
|
33
|
|
|
|
|
188
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
=head1 NAME |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
Bio::Phylo::Parsers::Abstract - Superclass for parsers used by Bio::Phylo::IO |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 DESCRIPTION |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
This package is subclassed by all other packages within Bio::Phylo::Parsers::.*. |
17
|
|
|
|
|
|
|
There is no direct usage. |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=cut |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
my $factory = Bio::Phylo::Factory->new; |
22
|
|
|
|
|
|
|
my $logger = Bio::Phylo::Util::Logger->new; |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
# argument is a file name, which we open |
25
|
|
|
|
|
|
|
sub _open_file { |
26
|
1
|
|
|
1
|
|
2
|
my $file_name = shift; |
27
|
1
|
|
50
|
|
|
8
|
my $encoding = shift || ''; |
28
|
1
|
50
|
|
|
|
73
|
open my $handle, "<${encoding}", $file_name or throw 'FileError' => $!; |
29
|
0
|
|
|
|
|
0
|
return $handle; |
30
|
|
|
|
|
|
|
} |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
# argument is a string, which, at perl version >5.8, |
33
|
|
|
|
|
|
|
# we can treat as a handle by opening it by reference |
34
|
|
|
|
|
|
|
sub _open_string { |
35
|
108
|
|
|
108
|
|
222
|
my $string_value = shift; |
36
|
108
|
|
50
|
|
|
552
|
my $encoding = shift || ''; |
37
|
30
|
50
|
|
30
|
|
191
|
open my $handle, "<${encoding}", \$string_value or throw 'FileError' => $!; |
|
30
|
|
|
|
|
57
|
|
|
30
|
|
|
|
|
189
|
|
|
108
|
|
|
|
|
2527
|
|
38
|
108
|
|
|
|
|
15736
|
return $handle; |
39
|
|
|
|
|
|
|
} |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# argument is a url, |
42
|
|
|
|
|
|
|
sub _open_url { |
43
|
0
|
|
|
0
|
|
0
|
my $url = shift; |
44
|
0
|
|
0
|
|
|
0
|
my $encoding = shift || ''; |
45
|
0
|
|
|
|
|
0
|
my $handle; |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
# we need to use LWP::UserAgent to fetch the resource, but |
48
|
|
|
|
|
|
|
# we don't "use" it at the top of the module because that |
49
|
|
|
|
|
|
|
# would make it a required dependency |
50
|
0
|
|
|
|
|
0
|
eval { require LWP::UserAgent }; |
|
0
|
|
|
|
|
0
|
|
51
|
0
|
0
|
|
|
|
0
|
if ($@) { |
52
|
0
|
|
|
|
|
0
|
throw 'ExtensionError' => |
53
|
|
|
|
|
|
|
"Providing a -url argument requires\nsuccesful loading " |
54
|
|
|
|
|
|
|
. "of LWP::UserAgent.\nHowever, there was an error when " |
55
|
|
|
|
|
|
|
. "I\ntried that:\n" |
56
|
|
|
|
|
|
|
. $@; |
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# apparently it's installed, so let's instantiate a client |
60
|
0
|
|
|
|
|
0
|
my $ua = LWP::UserAgent->new; |
61
|
0
|
|
|
|
|
0
|
$ua->timeout(10); |
62
|
0
|
|
|
|
|
0
|
$ua->env_proxy; |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
# fetch the resource, get an HTTP::Response object |
65
|
0
|
|
|
|
|
0
|
my $response = $ua->get($url); |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
# i.e. 200, or 304 (unchanged cache) |
68
|
0
|
0
|
0
|
|
|
0
|
if ( $response->is_success or $response->status_line =~ /304/ ) { |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
# content is a string, so we create a handle in the same way |
71
|
|
|
|
|
|
|
# as when the argument was a string |
72
|
0
|
|
|
|
|
0
|
$handle = _open_string( $response->content, $encoding ); |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
else { |
75
|
0
|
|
|
|
|
0
|
throw 'NetworkError' => $response->status_line; |
76
|
|
|
|
|
|
|
} |
77
|
0
|
|
|
|
|
0
|
return $handle; |
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
# deal with all possible data sources, return |
81
|
|
|
|
|
|
|
# a handle to whatever it is or throw an exception |
82
|
|
|
|
|
|
|
sub _open_handle { |
83
|
116
|
|
|
116
|
|
357
|
my %args = @_; |
84
|
116
|
|
|
|
|
209
|
my $handle; |
85
|
116
|
100
|
|
|
|
658
|
if ( $args{'-handle'} ) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
86
|
7
|
|
|
|
|
36
|
binmode $args{'-handle'}, ":utf8"; |
87
|
7
|
|
|
|
|
14
|
$handle = $args{'-handle'}; |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
elsif ( $args{'-file'} ) { |
90
|
1
|
|
|
|
|
6
|
$handle = _open_file( $args{'-file'}, $args{'-encoding'} ); |
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
elsif ( $args{'-string'} ) { |
93
|
108
|
|
|
|
|
491
|
$handle = _open_string( $args{'-string'}, $args{'-encoding'} ); |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
elsif ( $args{'-url'} ) { |
96
|
0
|
|
|
|
|
0
|
$handle = _open_url( $args{'-url'}, $args{'-encoding'} ); |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
else { |
99
|
0
|
|
|
|
|
0
|
throw 'BadArgs' => 'No data source provided!'; |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
# check to see if the data source contains anything |
103
|
|
|
|
|
|
|
#if ( eof $handle ) { |
104
|
|
|
|
|
|
|
# throw 'NoData' => "Source is empty!"; |
105
|
|
|
|
|
|
|
#} |
106
|
115
|
|
|
|
|
554
|
return $handle; |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
# open a Bio::Phylo::Project if asked (if the -as_project flag |
110
|
|
|
|
|
|
|
# was provided.) If the user has supplied one (the -project flag) |
111
|
|
|
|
|
|
|
# simply return that or undefined otherwise. |
112
|
|
|
|
|
|
|
sub _open_project { |
113
|
113
|
|
|
113
|
|
483
|
my ( $fac, %args ) = @_; |
114
|
113
|
50
|
|
|
|
457
|
if ( $args{'-project'} ) { |
|
|
100
|
|
|
|
|
|
115
|
0
|
|
|
|
|
0
|
return $args{'-project'}; |
116
|
|
|
|
|
|
|
} |
117
|
|
|
|
|
|
|
elsif ( $args{'-as_project'} ) { |
118
|
24
|
|
|
|
|
194
|
return $fac->create_project; |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
else { |
121
|
89
|
|
|
|
|
1002
|
return undef; |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
# this constructor is called by the Bio::Phylo::IO::parse |
126
|
|
|
|
|
|
|
# subroutine |
127
|
|
|
|
|
|
|
sub _new { |
128
|
116
|
|
|
116
|
|
255
|
my $class = shift; |
129
|
116
|
|
|
|
|
342
|
my %args = looks_like_hash @_; |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
# we need to guess the format |
132
|
116
|
100
|
|
|
|
405
|
if ( $class eq __PACKAGE__ ) { |
133
|
2
|
100
|
|
|
|
7
|
if ( my $format = _guess_format(_open_handle(%args)) ) { |
134
|
1
|
|
|
|
|
3
|
$class = 'Bio::Phylo::Parsers::' . ucfirst($format); |
135
|
1
|
|
|
|
|
3
|
return looks_like_class($class)->_new(%args); |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
else { |
138
|
1
|
|
|
|
|
3
|
throw 'BadArgs' => "No format specified and unable to guess!"; |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
# factory is either user supplied or a private static |
143
|
114
|
|
33
|
|
|
813
|
my $fac = $args{'-factory'} || $factory; |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
# values of these object fields will be accessed |
146
|
|
|
|
|
|
|
# by child classes through the appropriate protected |
147
|
|
|
|
|
|
|
# getters |
148
|
|
|
|
|
|
|
return bless { |
149
|
|
|
|
|
|
|
'_fac' => $fac, |
150
|
|
|
|
|
|
|
'_handle' => _open_handle(%args), |
151
|
|
|
|
|
|
|
'_proj' => _open_project( $fac, %args ), |
152
|
|
|
|
|
|
|
'_args' => \%args, # for child-specific arguments |
153
|
|
|
|
|
|
|
'_encoding' => $args{'-encoding'}, |
154
|
|
|
|
|
|
|
'_handlers' => $args{'-handlers'}, |
155
|
114
|
|
|
|
|
542
|
'_flush' => $args{'-flush'}, |
156
|
|
|
|
|
|
|
}, $class; |
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
# child classes can override this to specify |
160
|
|
|
|
|
|
|
# that their return value is a single scalar |
161
|
|
|
|
|
|
|
# (e.g. a tree block, as is the case for newick), |
162
|
|
|
|
|
|
|
# instead of an array of blocks |
163
|
29
|
|
|
29
|
|
79
|
sub _return_is_scalar { 0 } |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
# this is called by Bio::Phylo::IO::parse, and |
166
|
|
|
|
|
|
|
# in turn it calls the _parse method of whatever |
167
|
|
|
|
|
|
|
# the concrete child instance is. |
168
|
|
|
|
|
|
|
sub _process { |
169
|
113
|
|
|
113
|
|
252
|
my $self = shift; |
170
|
113
|
100
|
|
|
|
409
|
if ( $self->_return_is_scalar ) { |
171
|
84
|
|
|
|
|
269
|
my $result = $self->_parse; |
172
|
84
|
100
|
|
|
|
452
|
if ( my $p = $self->_project ) { |
173
|
10
|
50
|
|
|
|
52
|
if ( my $meta = $self->_project_meta ) { |
174
|
0
|
|
|
|
|
0
|
$p->add_meta($_) for @{ $meta }; |
|
0
|
|
|
|
|
0
|
|
175
|
|
|
|
|
|
|
} |
176
|
10
|
|
|
|
|
48
|
return $p->insert($result); |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
else { |
179
|
74
|
|
|
|
|
780
|
return $result; |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
else { |
183
|
29
|
|
|
|
|
106
|
my @result = $self->_parse; |
184
|
27
|
100
|
|
|
|
192
|
if ( my $p = $self->_project ) { |
185
|
14
|
50
|
|
|
|
70
|
if ( my $meta = $self->_project_meta ) { |
186
|
0
|
|
|
|
|
0
|
$p->add_meta($_) for @{ $meta }; |
|
0
|
|
|
|
|
0
|
|
187
|
|
|
|
|
|
|
} |
188
|
14
|
|
|
|
|
68
|
return $p->insert(@result); |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
else { |
191
|
13
|
|
|
|
|
142
|
return [@result]; |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
# once this is called, the handle will have read to |
197
|
|
|
|
|
|
|
# the end of the stream, so it needs to be rewound |
198
|
|
|
|
|
|
|
# if we want to read from the top |
199
|
|
|
|
|
|
|
sub _string { |
200
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
201
|
0
|
|
|
|
|
0
|
my $handle = $self->_handle; |
202
|
0
|
|
|
|
|
0
|
my $string = do { local $/; <$handle> }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
203
|
0
|
|
|
|
|
0
|
return $string; |
204
|
|
|
|
|
|
|
} |
205
|
|
|
|
24
|
|
|
sub _project_meta {}; |
206
|
140686
|
|
|
140686
|
|
420861
|
sub _logger { $logger } |
207
|
111
|
|
|
111
|
|
418
|
sub _project { shift->{'_proj'} } |
208
|
113
|
|
|
113
|
|
465
|
sub _handle { shift->{'_handle'} } |
209
|
4795
|
|
|
4795
|
|
18291
|
sub _factory { shift->{'_fac'} } |
210
|
16464
|
|
|
16464
|
|
29876
|
sub _args { shift->{'_args'} } |
211
|
0
|
|
|
0
|
|
0
|
sub _encoding { shift->{'_encoding'} } |
212
|
586
|
|
|
586
|
|
5321
|
sub _flush { shift->{'_flush'} } |
213
|
|
|
|
|
|
|
sub _handlers { |
214
|
584
|
|
|
584
|
|
1967
|
my ( $self, $type ) = @_; |
215
|
584
|
50
|
|
|
|
3649
|
if ( my $h = $self->{'_handlers'} ) { |
216
|
584
|
50
|
|
|
|
2680
|
return defined $type ? $h->{$type} : $h; |
217
|
|
|
|
|
|
|
} |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
sub _guess_format { |
221
|
2
|
|
|
2
|
|
5
|
my $handle = shift; |
222
|
2
|
|
|
|
|
57
|
my $line = $handle->getline; |
223
|
2
|
|
|
|
|
46
|
my $format; |
224
|
2
|
100
|
|
|
|
23
|
if ( $line =~ /^#nexus/i ) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
225
|
1
|
|
|
|
|
3
|
$format = 'nexus'; |
226
|
|
|
|
|
|
|
} |
227
|
|
|
|
|
|
|
elsif ( $line =~ /^<[^>]*nexml/ ) { |
228
|
0
|
|
|
|
|
0
|
$format = 'nexml'; |
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
elsif ( $line =~ /^<[^>]*phyloxml/ ) { |
231
|
0
|
|
|
|
|
0
|
$format = 'phyloxml'; |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
elsif ( $line =~ /^\s*\d+\s+\d+\s*$/ ) { |
234
|
0
|
|
|
|
|
0
|
$format = 'phylip'; |
235
|
|
|
|
|
|
|
} |
236
|
|
|
|
|
|
|
elsif ( $line =~ /^>/ ) { |
237
|
0
|
|
|
|
|
0
|
$format = 'fasta'; |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
elsif ( $line =~ /^\@/ ) { |
240
|
0
|
|
|
|
|
0
|
$format = 'fastq'; |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
elsif ( $line =~ /^\s*\(/ ) { |
243
|
0
|
|
|
|
|
0
|
$format = 'newick'; |
244
|
0
|
0
|
|
|
|
0
|
if ( $line =~ /{/ ) { |
245
|
0
|
|
|
|
|
0
|
$format = 'figtree'; |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
elsif ( $line =~ /<\? xml/ ) { |
249
|
0
|
|
|
|
|
0
|
$line = $handle; |
250
|
0
|
0
|
|
|
|
0
|
if ( $line =~ /^<[^>]*nexml/ ) { |
|
|
0
|
|
|
|
|
|
251
|
0
|
|
|
|
|
0
|
$format = 'nexml'; |
252
|
|
|
|
|
|
|
} |
253
|
|
|
|
|
|
|
elsif ( $line =~ /^<[^>]*phyloxml/ ) { |
254
|
0
|
|
|
|
|
0
|
$format = 'phyloxml'; |
255
|
|
|
|
|
|
|
} |
256
|
|
|
|
|
|
|
} |
257
|
2
|
|
|
|
|
5
|
seek( $handle, 0, 0 ); |
258
|
2
|
|
|
|
|
7
|
return $format; |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
# podinherit_insert_token |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
=head1 SEE ALSO |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
There is a mailing list at L<https://groups.google.com/forum/#!forum/bio-phylo> |
266
|
|
|
|
|
|
|
for any user or developer questions and discussions. |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=over |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=item L<Bio::Phylo::IO> |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
The parsers are called by the L<Bio::Phylo::IO> object. |
273
|
|
|
|
|
|
|
Look there for examples. |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
=item L<Bio::Phylo::Manual> |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
Also see the manual: L<Bio::Phylo::Manual> and L<http://rutgervos.blogspot.com>. |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
=back |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
=head1 CITATION |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
If you use Bio::Phylo in published research, please cite it: |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
B<Rutger A Vos>, B<Jason Caravas>, B<Klaas Hartmann>, B<Mark A Jensen> |
286
|
|
|
|
|
|
|
and B<Chase Miller>, 2011. Bio::Phylo - phyloinformatic analysis using Perl. |
287
|
|
|
|
|
|
|
I<BMC Bioinformatics> B<12>:63. |
288
|
|
|
|
|
|
|
L<http://dx.doi.org/10.1186/1471-2105-12-63> |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=cut |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
1; |