| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# IO/File/RecordStream.pm |
|
2
|
|
|
|
|
|
|
package IO::File::RecordStream; |
|
3
|
|
|
|
|
|
|
our $VERSION = '0.03'; |
|
4
|
|
|
|
|
|
|
|
|
5
|
4
|
|
|
4
|
|
2751
|
use 5.006; |
|
|
4
|
|
|
|
|
15
|
|
|
6
|
4
|
|
|
4
|
|
91
|
use strict; |
|
|
4
|
|
|
|
|
8
|
|
|
|
4
|
|
|
|
|
97
|
|
|
7
|
4
|
|
|
4
|
|
21
|
use warnings; |
|
|
4
|
|
|
|
|
7
|
|
|
|
4
|
|
|
|
|
176
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
4
|
|
|
4
|
|
35
|
use Moose; |
|
|
4
|
|
|
|
|
8
|
|
|
|
4
|
|
|
|
|
24
|
|
|
10
|
4
|
|
|
4
|
|
26956
|
use MooseX::StrictConstructor; |
|
|
4
|
|
|
|
|
10
|
|
|
|
4
|
|
|
|
|
28
|
|
|
11
|
4
|
|
|
4
|
|
12910
|
use namespace::autoclean; |
|
|
4
|
|
|
|
|
10
|
|
|
|
4
|
|
|
|
|
31
|
|
|
12
|
|
|
|
|
|
|
|
|
13
|
4
|
|
|
4
|
|
344
|
use autodie qw(:all); |
|
|
4
|
|
|
|
|
9
|
|
|
|
4
|
|
|
|
|
47
|
|
|
14
|
4
|
|
|
4
|
|
22909
|
use Scalar::Util qw(reftype openhandle); |
|
|
4
|
|
|
|
|
8
|
|
|
|
4
|
|
|
|
|
311
|
|
|
15
|
|
|
|
|
|
|
|
|
16
|
4
|
|
|
4
|
|
2129
|
use IO::Lines; |
|
|
4
|
|
|
|
|
49121
|
|
|
|
4
|
|
|
|
|
2646
|
|
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
has 'file_name' => ( |
|
19
|
|
|
|
|
|
|
is => 'ro', |
|
20
|
|
|
|
|
|
|
isa => 'Str', |
|
21
|
|
|
|
|
|
|
predicate => 'has_file_name', |
|
22
|
|
|
|
|
|
|
); |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
has 'file_handle' => ( |
|
25
|
|
|
|
|
|
|
is => 'ro', |
|
26
|
|
|
|
|
|
|
isa => 'FileHandle', |
|
27
|
|
|
|
|
|
|
builder => '_build_file_handle', |
|
28
|
|
|
|
|
|
|
lazy => 1, |
|
29
|
|
|
|
|
|
|
); |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
has 'end_reached' => ( |
|
32
|
|
|
|
|
|
|
is => 'ro', |
|
33
|
|
|
|
|
|
|
default => 0, |
|
34
|
|
|
|
|
|
|
init_arg => undef, |
|
35
|
|
|
|
|
|
|
writer => '_end_reached', # private writer |
|
36
|
|
|
|
|
|
|
); |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# A regexp matching the separator line used to separate individual records |
|
39
|
|
|
|
|
|
|
has 'match_separator' => ( |
|
40
|
|
|
|
|
|
|
is => 'ro', |
|
41
|
|
|
|
|
|
|
isa => 'RegexpRef', |
|
42
|
|
|
|
|
|
|
required => 1, |
|
43
|
|
|
|
|
|
|
); |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
# A code ref that can be passed a ref to the array containing the read |
|
46
|
|
|
|
|
|
|
# lines and that makes a new record object from it. |
|
47
|
|
|
|
|
|
|
has '_record_factory' => ( # keep the ref private |
|
48
|
|
|
|
|
|
|
is => 'ro', |
|
49
|
|
|
|
|
|
|
isa => 'CodeRef', |
|
50
|
|
|
|
|
|
|
init_arg => 'record_factory', |
|
51
|
|
|
|
|
|
|
required => 1, |
|
52
|
|
|
|
|
|
|
); |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# Allow various calling styles of the constructor: |
|
55
|
|
|
|
|
|
|
# new(file_handle): pass file handle to read data from |
|
56
|
|
|
|
|
|
|
# new(file_name): pass file name of file to read data from |
|
57
|
|
|
|
|
|
|
# These don't make much sense in this class because other attributes require |
|
58
|
|
|
|
|
|
|
# initialization as well, but in a sub-class these may be overwritten and |
|
59
|
|
|
|
|
|
|
# calling with only a file name and file handle is convenient. |
|
60
|
|
|
|
|
|
|
around BUILDARGS => sub { |
|
61
|
|
|
|
|
|
|
my $orig = shift; |
|
62
|
|
|
|
|
|
|
my $class = shift; |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
return $class->$orig(@_) unless @_ == 1; # no special handling |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
# Check if we got a file name or handle for multi-record input file. |
|
67
|
|
|
|
|
|
|
if (not reftype $_[0]) { # file name given |
|
68
|
|
|
|
|
|
|
my $input_file_name = shift; |
|
69
|
|
|
|
|
|
|
return $class->$orig(file_name => $input_file_name); |
|
70
|
|
|
|
|
|
|
} |
|
71
|
|
|
|
|
|
|
elsif (reftype $_[0] eq reftype \*STDIN) { # file handle given |
|
72
|
|
|
|
|
|
|
my $input_file_handle = shift; |
|
73
|
|
|
|
|
|
|
return $class->$orig(file_handle => $input_file_handle); |
|
74
|
|
|
|
|
|
|
} |
|
75
|
|
|
|
|
|
|
else { # no file name / handle |
|
76
|
|
|
|
|
|
|
return $class->$orig(@_); |
|
77
|
|
|
|
|
|
|
} |
|
78
|
|
|
|
|
|
|
}; |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
sub BUILD { |
|
81
|
3
|
|
|
3
|
0
|
8
|
my $self = shift; |
|
82
|
|
|
|
|
|
|
|
|
83
|
3
|
50
|
|
|
|
106
|
confess 'The value of file_handle does not seem to be an open handle' |
|
84
|
|
|
|
|
|
|
unless openhandle $self->file_handle; |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
# If the input file is empty, set end_reached immediately. |
|
87
|
3
|
50
|
|
|
|
94
|
$self->_end_reached(1) if eof $self->file_handle; |
|
88
|
|
|
|
|
|
|
|
|
89
|
3
|
|
|
|
|
112
|
return; |
|
90
|
|
|
|
|
|
|
} |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
# Open file handle from file name if no handle was passed. Die if we cant. |
|
93
|
|
|
|
|
|
|
sub _build_file_handle { |
|
94
|
1
|
|
|
1
|
|
3
|
my $self = shift; |
|
95
|
|
|
|
|
|
|
|
|
96
|
1
|
50
|
|
|
|
38
|
confess 'Cannot build file handle unless a file name was specified' |
|
97
|
|
|
|
|
|
|
unless $self->has_file_name; |
|
98
|
|
|
|
|
|
|
|
|
99
|
1
|
|
|
|
|
76
|
open my $input_file_handle, '<', $self->file_name; |
|
100
|
1
|
|
|
|
|
2029
|
return $input_file_handle; |
|
101
|
|
|
|
|
|
|
} |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# Returns chomped lines. |
|
104
|
|
|
|
|
|
|
sub _read_next_record { |
|
105
|
4
|
|
|
4
|
|
8
|
my $self = shift; |
|
106
|
|
|
|
|
|
|
|
|
107
|
4
|
|
|
|
|
113
|
my $record_file_handle = $self->file_handle; |
|
108
|
4
|
|
|
|
|
10
|
my @record_lines; |
|
109
|
4
|
|
|
|
|
25
|
while (<$record_file_handle>) { |
|
110
|
140
|
|
|
|
|
337
|
my $line = $_; |
|
111
|
140
|
|
|
|
|
263
|
chomp $line; |
|
112
|
|
|
|
|
|
|
|
|
113
|
140
|
100
|
|
|
|
4387
|
if ($line =~ $self->match_separator) { # end of record |
|
114
|
|
|
|
|
|
|
# Drop separator line and return current collection of lines. |
|
115
|
|
|
|
|
|
|
# Also test if file ends in a separator. |
|
116
|
4
|
100
|
|
|
|
57
|
$self->_end_reached(1) if eof $record_file_handle; |
|
117
|
|
|
|
|
|
|
return \@record_lines |
|
118
|
4
|
|
|
|
|
19
|
} |
|
119
|
|
|
|
|
|
|
else { |
|
120
|
|
|
|
|
|
|
# Store lines until end of record |
|
121
|
136
|
|
|
|
|
694
|
push @record_lines, $line; |
|
122
|
|
|
|
|
|
|
} |
|
123
|
|
|
|
|
|
|
} |
|
124
|
|
|
|
|
|
|
|
|
125
|
0
|
|
|
|
|
0
|
$self->_end_reached(1); # file has been read |
|
126
|
0
|
|
|
|
|
0
|
return \@record_lines; |
|
127
|
|
|
|
|
|
|
} |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
# Get the next record from the multi-record file. |
|
130
|
|
|
|
|
|
|
sub next { |
|
131
|
5
|
|
|
5
|
1
|
35
|
my $self = shift; |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# Are there any more entries? |
|
134
|
5
|
100
|
|
|
|
167
|
return if $self->end_reached; |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
# Read lines of next record. |
|
137
|
4
|
|
|
|
|
16
|
my $record_lines_ref = $self->_read_next_record; |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
# Construct record object using factory |
|
140
|
4
|
|
|
|
|
41
|
my $record_array_handle = IO::Lines->new($record_lines_ref); |
|
141
|
4
|
|
|
|
|
405
|
my $record = $self->_record_factory->($record_array_handle); |
|
142
|
|
|
|
|
|
|
|
|
143
|
4
|
|
|
|
|
27
|
return $record; |
|
144
|
|
|
|
|
|
|
} |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
1; # End of IO::File::RecordStream |
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
__END__ |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=pod |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=encoding UTF-8 |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=head1 NAME |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
IO::File::RecordStream - Read multi-line records from a file. |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
use Bio::RNA::Treekin; |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
Auxiliary class to read records consisting of multiple lines, separated by a |
|
168
|
|
|
|
|
|
|
separator matching a specified regular expression. |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=head1 METHODS |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
=head2 IO::File::RecordStream->new($file_name, @args) |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
=head2 IO::File::RecordStream->new($file_handle) |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
Construct a new record stream from a file name or handle. |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=over |
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=item Mandatory arguments: |
|
181
|
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
=over |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=item file_name | file_handle |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
Name of or handle to file to read data from. Pass either |
|
187
|
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=item match_separator |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
Quoted regular expression matching the record separator. The input file is |
|
191
|
|
|
|
|
|
|
split at every match. |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
=item record_factory |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
A code ref that, when called with an array ref containing the lines of a |
|
196
|
|
|
|
|
|
|
single record, parses the data and constructs a record object. |
|
197
|
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=back |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=back |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
Additionally, the constructor can be called with a single file name or file |
|
203
|
|
|
|
|
|
|
handle (without keyword). This requires to override the C<match_separator> and |
|
204
|
|
|
|
|
|
|
C<record_factory> attributes to provide default values. |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
=head2 $stream->next |
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
Returns the next record object. Internally, the input file is read until the |
|
209
|
|
|
|
|
|
|
next match of the C<match_separator>. The data is passed on to the |
|
210
|
|
|
|
|
|
|
C<record_factory> and the returned record object is returned. |
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
=head2 $stream->file_name |
|
213
|
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
Return the name of the file that this object reads data from. May be C<undef> |
|
215
|
|
|
|
|
|
|
if the data was read from a handle. Use predicate C<has_file_name> to query |
|
216
|
|
|
|
|
|
|
its presence. |
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
=head2 $stream->has_file_name |
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
Predicate query whether a file name has been used to read the data. |
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
=head2 $stream->file_handle |
|
223
|
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
Handle to the file the data is being read from. |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
=head2 $stream->end_reached |
|
227
|
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
Query whether the input file was read completely yet. |
|
229
|
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
=head2 $stream->match_separator |
|
231
|
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
Returns the matching expression used to identify the record separator. |
|
233
|
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
=head1 AUTHOR |
|
235
|
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
Felix Kuehnl, C<< <felix@bioinf.uni-leipzig.de> >> |
|
237
|
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
=head1 BUGS |
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
Please report any bugs or feature requests by raising an issue at |
|
242
|
|
|
|
|
|
|
L<https://github.com/xileF1337/Bio-RNA-Treekin/issues>. |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
You can also do so by mailing to C<bug-bio-rna-treekin at rt.cpan.org>, |
|
245
|
|
|
|
|
|
|
or through the web interface at |
|
246
|
|
|
|
|
|
|
L<https://rt.cpan.org/NoAuth/ReportBug.html?Queue=Bio-RNA-Treekin>. I will be |
|
247
|
|
|
|
|
|
|
notified, and then you'll automatically be notified of progress on your bug as |
|
248
|
|
|
|
|
|
|
I make changes. |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
=head1 SUPPORT |
|
252
|
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
|
254
|
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
perldoc Bio::RNA::Treekin |
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
You can also look for information at: |
|
259
|
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=over 4 |
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
=item * Github: the official repository |
|
263
|
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
L<https://github.com/xileF1337/Bio-RNA-Treekin> |
|
265
|
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker (report bugs here) |
|
267
|
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
L<https://rt.cpan.org/NoAuth/Bugs.html?Dist=Bio-RNA-Treekin> |
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
L<http://annocpan.org/dist/Bio-RNA-Treekin> |
|
273
|
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
=item * CPAN Ratings |
|
275
|
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
L<https://cpanratings.perl.org/d/Bio-RNA-Treekin> |
|
277
|
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=item * Search CPAN |
|
279
|
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
L<https://metacpan.org/release/Bio-RNA-Treekin> |
|
281
|
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
=back |
|
283
|
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
|
286
|
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
Copyright 2019-2021 Felix Kühnl. |
|
288
|
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify |
|
290
|
|
|
|
|
|
|
it under the terms of the GNU General Public License as published by |
|
291
|
|
|
|
|
|
|
the Free Software Foundation, either version 3 of the License, or |
|
292
|
|
|
|
|
|
|
(at your option) any later version. |
|
293
|
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, |
|
295
|
|
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
296
|
|
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
297
|
|
|
|
|
|
|
GNU General Public License for more details. |
|
298
|
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License |
|
300
|
|
|
|
|
|
|
along with this program. If not, see L<http://www.gnu.org/licenses/>. |
|
301
|
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
=cut |
|
304
|
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
# End of IO/File/RecordStream.pm |