line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# IO/File/RecordStream.pm |
2
|
|
|
|
|
|
|
package IO::File::RecordStream; |
3
|
|
|
|
|
|
|
our $VERSION = '0.03'; |
4
|
|
|
|
|
|
|
|
5
|
4
|
|
|
4
|
|
2751
|
use 5.006; |
|
4
|
|
|
|
|
15
|
|
6
|
4
|
|
|
4
|
|
91
|
use strict; |
|
4
|
|
|
|
|
8
|
|
|
4
|
|
|
|
|
97
|
|
7
|
4
|
|
|
4
|
|
21
|
use warnings; |
|
4
|
|
|
|
|
7
|
|
|
4
|
|
|
|
|
176
|
|
8
|
|
|
|
|
|
|
|
9
|
4
|
|
|
4
|
|
35
|
use Moose; |
|
4
|
|
|
|
|
8
|
|
|
4
|
|
|
|
|
24
|
|
10
|
4
|
|
|
4
|
|
26956
|
use MooseX::StrictConstructor; |
|
4
|
|
|
|
|
10
|
|
|
4
|
|
|
|
|
28
|
|
11
|
4
|
|
|
4
|
|
12910
|
use namespace::autoclean; |
|
4
|
|
|
|
|
10
|
|
|
4
|
|
|
|
|
31
|
|
12
|
|
|
|
|
|
|
|
13
|
4
|
|
|
4
|
|
344
|
use autodie qw(:all); |
|
4
|
|
|
|
|
9
|
|
|
4
|
|
|
|
|
47
|
|
14
|
4
|
|
|
4
|
|
22909
|
use Scalar::Util qw(reftype openhandle); |
|
4
|
|
|
|
|
8
|
|
|
4
|
|
|
|
|
311
|
|
15
|
|
|
|
|
|
|
|
16
|
4
|
|
|
4
|
|
2129
|
use IO::Lines; |
|
4
|
|
|
|
|
49121
|
|
|
4
|
|
|
|
|
2646
|
|
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
has 'file_name' => ( |
19
|
|
|
|
|
|
|
is => 'ro', |
20
|
|
|
|
|
|
|
isa => 'Str', |
21
|
|
|
|
|
|
|
predicate => 'has_file_name', |
22
|
|
|
|
|
|
|
); |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
has 'file_handle' => ( |
25
|
|
|
|
|
|
|
is => 'ro', |
26
|
|
|
|
|
|
|
isa => 'FileHandle', |
27
|
|
|
|
|
|
|
builder => '_build_file_handle', |
28
|
|
|
|
|
|
|
lazy => 1, |
29
|
|
|
|
|
|
|
); |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
has 'end_reached' => ( |
32
|
|
|
|
|
|
|
is => 'ro', |
33
|
|
|
|
|
|
|
default => 0, |
34
|
|
|
|
|
|
|
init_arg => undef, |
35
|
|
|
|
|
|
|
writer => '_end_reached', # private writer |
36
|
|
|
|
|
|
|
); |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# A regexp matching the separator line used to separate individual records |
39
|
|
|
|
|
|
|
has 'match_separator' => ( |
40
|
|
|
|
|
|
|
is => 'ro', |
41
|
|
|
|
|
|
|
isa => 'RegexpRef', |
42
|
|
|
|
|
|
|
required => 1, |
43
|
|
|
|
|
|
|
); |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
# A code ref that can be passed a ref to the array containing the read |
46
|
|
|
|
|
|
|
# lines and that makes a new record object from it. |
47
|
|
|
|
|
|
|
has '_record_factory' => ( # keep the ref private |
48
|
|
|
|
|
|
|
is => 'ro', |
49
|
|
|
|
|
|
|
isa => 'CodeRef', |
50
|
|
|
|
|
|
|
init_arg => 'record_factory', |
51
|
|
|
|
|
|
|
required => 1, |
52
|
|
|
|
|
|
|
); |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# Allow various calling styles of the constructor: |
55
|
|
|
|
|
|
|
# new(file_handle): pass file handle to read data from |
56
|
|
|
|
|
|
|
# new(file_name): pass file name of file to read data from |
57
|
|
|
|
|
|
|
# These don't make much sense in this class because other attributes require |
58
|
|
|
|
|
|
|
# initialization as well, but in a sub-class these may be overwritten and |
59
|
|
|
|
|
|
|
# calling with only a file name and file handle is convenient. |
60
|
|
|
|
|
|
|
around BUILDARGS => sub { |
61
|
|
|
|
|
|
|
my $orig = shift; |
62
|
|
|
|
|
|
|
my $class = shift; |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
return $class->$orig(@_) unless @_ == 1; # no special handling |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
# Check if we got a file name or handle for multi-record input file. |
67
|
|
|
|
|
|
|
if (not reftype $_[0]) { # file name given |
68
|
|
|
|
|
|
|
my $input_file_name = shift; |
69
|
|
|
|
|
|
|
return $class->$orig(file_name => $input_file_name); |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
elsif (reftype $_[0] eq reftype \*STDIN) { # file handle given |
72
|
|
|
|
|
|
|
my $input_file_handle = shift; |
73
|
|
|
|
|
|
|
return $class->$orig(file_handle => $input_file_handle); |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
else { # no file name / handle |
76
|
|
|
|
|
|
|
return $class->$orig(@_); |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
}; |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
sub BUILD { |
81
|
3
|
|
|
3
|
0
|
8
|
my $self = shift; |
82
|
|
|
|
|
|
|
|
83
|
3
|
50
|
|
|
|
106
|
confess 'The value of file_handle does not seem to be an open handle' |
84
|
|
|
|
|
|
|
unless openhandle $self->file_handle; |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
# If the input file is empty, set end_reached immediately. |
87
|
3
|
50
|
|
|
|
94
|
$self->_end_reached(1) if eof $self->file_handle; |
88
|
|
|
|
|
|
|
|
89
|
3
|
|
|
|
|
112
|
return; |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
# Open file handle from file name if no handle was passed. Die if we cant. |
93
|
|
|
|
|
|
|
sub _build_file_handle { |
94
|
1
|
|
|
1
|
|
3
|
my $self = shift; |
95
|
|
|
|
|
|
|
|
96
|
1
|
50
|
|
|
|
38
|
confess 'Cannot build file handle unless a file name was specified' |
97
|
|
|
|
|
|
|
unless $self->has_file_name; |
98
|
|
|
|
|
|
|
|
99
|
1
|
|
|
|
|
76
|
open my $input_file_handle, '<', $self->file_name; |
100
|
1
|
|
|
|
|
2029
|
return $input_file_handle; |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# Returns chomped lines. |
104
|
|
|
|
|
|
|
sub _read_next_record { |
105
|
4
|
|
|
4
|
|
8
|
my $self = shift; |
106
|
|
|
|
|
|
|
|
107
|
4
|
|
|
|
|
113
|
my $record_file_handle = $self->file_handle; |
108
|
4
|
|
|
|
|
10
|
my @record_lines; |
109
|
4
|
|
|
|
|
25
|
while (<$record_file_handle>) { |
110
|
140
|
|
|
|
|
337
|
my $line = $_; |
111
|
140
|
|
|
|
|
263
|
chomp $line; |
112
|
|
|
|
|
|
|
|
113
|
140
|
100
|
|
|
|
4387
|
if ($line =~ $self->match_separator) { # end of record |
114
|
|
|
|
|
|
|
# Drop separator line and return current collection of lines. |
115
|
|
|
|
|
|
|
# Also test if file ends in a separator. |
116
|
4
|
100
|
|
|
|
57
|
$self->_end_reached(1) if eof $record_file_handle; |
117
|
|
|
|
|
|
|
return \@record_lines |
118
|
4
|
|
|
|
|
19
|
} |
119
|
|
|
|
|
|
|
else { |
120
|
|
|
|
|
|
|
# Store lines until end of record |
121
|
136
|
|
|
|
|
694
|
push @record_lines, $line; |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
|
125
|
0
|
|
|
|
|
0
|
$self->_end_reached(1); # file has been read |
126
|
0
|
|
|
|
|
0
|
return \@record_lines; |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
# Get the next record from the multi-record file. |
130
|
|
|
|
|
|
|
sub next { |
131
|
5
|
|
|
5
|
1
|
35
|
my $self = shift; |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# Are there any more entries? |
134
|
5
|
100
|
|
|
|
167
|
return if $self->end_reached; |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
# Read lines of next record. |
137
|
4
|
|
|
|
|
16
|
my $record_lines_ref = $self->_read_next_record; |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
# Construct record object using factory |
140
|
4
|
|
|
|
|
41
|
my $record_array_handle = IO::Lines->new($record_lines_ref); |
141
|
4
|
|
|
|
|
405
|
my $record = $self->_record_factory->($record_array_handle); |
142
|
|
|
|
|
|
|
|
143
|
4
|
|
|
|
|
27
|
return $record; |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
1; # End of IO::File::RecordStream |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
__END__ |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=pod |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=encoding UTF-8 |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=head1 NAME |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
IO::File::RecordStream - Read multi-line records from a file. |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=head1 SYNOPSIS |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
use Bio::RNA::Treekin; |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
=head1 DESCRIPTION |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
Auxiliary class to read records consisting of multiple lines, separated by a |
168
|
|
|
|
|
|
|
separator matching a specified regular expression. |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=head1 METHODS |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
=head2 IO::File::RecordStream->new($file_name, @args) |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
=head2 IO::File::RecordStream->new($file_handle) |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
Construct a new record stream from a file name or handle. |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=over |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=item Mandatory arguments: |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
=over |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=item file_name | file_handle |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
Name of or handle to file to read data from. Pass either |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=item match_separator |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
Quoted regular expression matching the record separator. The input file is |
191
|
|
|
|
|
|
|
split at every match. |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
=item record_factory |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
A code ref that, when called with an array ref containing the lines of a |
196
|
|
|
|
|
|
|
single record, parses the data and constructs a record object. |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=back |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=back |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
Additionally, the constructor can be called with a single file name or file |
203
|
|
|
|
|
|
|
handle (without keyword). This requires to override the C<match_separator> and |
204
|
|
|
|
|
|
|
C<record_factory> attributes to provide default values. |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
=head2 $stream->next |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
Returns the next record object. Internally, the input file is read until the |
209
|
|
|
|
|
|
|
next match of the C<match_separator>. The data is passed on to the |
210
|
|
|
|
|
|
|
C<record_factory> and the returned record object is returned. |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
=head2 $stream->file_name |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
Return the name of the file that this object reads data from. May be C<undef> |
215
|
|
|
|
|
|
|
if the data was read from a handle. Use predicate C<has_file_name> to query |
216
|
|
|
|
|
|
|
its presence. |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
=head2 $stream->has_file_name |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
Predicate query whether a file name has been used to read the data. |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
=head2 $stream->file_handle |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
Handle to the file the data is being read from. |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
=head2 $stream->end_reached |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
Query whether the input file was read completely yet. |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
=head2 $stream->match_separator |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
Returns the matching expression used to identify the record separator. |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
=head1 AUTHOR |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
Felix Kuehnl, C<< <felix@bioinf.uni-leipzig.de> >> |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
=head1 BUGS |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
Please report any bugs or feature requests by raising an issue at |
242
|
|
|
|
|
|
|
L<https://github.com/xileF1337/Bio-RNA-Treekin/issues>. |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
You can also do so by mailing to C<bug-bio-rna-treekin at rt.cpan.org>, |
245
|
|
|
|
|
|
|
or through the web interface at |
246
|
|
|
|
|
|
|
L<https://rt.cpan.org/NoAuth/ReportBug.html?Queue=Bio-RNA-Treekin>. I will be |
247
|
|
|
|
|
|
|
notified, and then you'll automatically be notified of progress on your bug as |
248
|
|
|
|
|
|
|
I make changes. |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
=head1 SUPPORT |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
perldoc Bio::RNA::Treekin |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
You can also look for information at: |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=over 4 |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
=item * Github: the official repository |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
L<https://github.com/xileF1337/Bio-RNA-Treekin> |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker (report bugs here) |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
L<https://rt.cpan.org/NoAuth/Bugs.html?Dist=Bio-RNA-Treekin> |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
L<http://annocpan.org/dist/Bio-RNA-Treekin> |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
=item * CPAN Ratings |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
L<https://cpanratings.perl.org/d/Bio-RNA-Treekin> |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=item * Search CPAN |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
L<https://metacpan.org/release/Bio-RNA-Treekin> |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
=back |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
Copyright 2019-2021 Felix Kühnl. |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify |
290
|
|
|
|
|
|
|
it under the terms of the GNU General Public License as published by |
291
|
|
|
|
|
|
|
the Free Software Foundation, either version 3 of the License, or |
292
|
|
|
|
|
|
|
(at your option) any later version. |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, |
295
|
|
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
296
|
|
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
297
|
|
|
|
|
|
|
GNU General Public License for more details. |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License |
300
|
|
|
|
|
|
|
along with this program. If not, see L<http://www.gnu.org/licenses/>. |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
=cut |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
# End of IO/File/RecordStream.pm |