File Coverage

blib/lib/IO/File/RecordStream.pm
Criterion Covered Total %
statement 50 52 96.1
branch 9 12 75.0
condition n/a
subroutine 13 13 100.0
pod 1 2 50.0
total 73 79 92.4


line stmt bran cond sub pod time code
1             # IO/File/RecordStream.pm
2             package IO::File::RecordStream;
3             our $VERSION = '0.03';
4              
5 4     4   2751 use 5.006;
  4         15  
6 4     4   91 use strict;
  4         8  
  4         97  
7 4     4   21 use warnings;
  4         7  
  4         176  
8              
9 4     4   35 use Moose;
  4         8  
  4         24  
10 4     4   26956 use MooseX::StrictConstructor;
  4         10  
  4         28  
11 4     4   12910 use namespace::autoclean;
  4         10  
  4         31  
12              
13 4     4   344 use autodie qw(:all);
  4         9  
  4         47  
14 4     4   22909 use Scalar::Util qw(reftype openhandle);
  4         8  
  4         311  
15              
16 4     4   2129 use IO::Lines;
  4         49121  
  4         2646  
17              
18             has 'file_name' => (
19             is => 'ro',
20             isa => 'Str',
21             predicate => 'has_file_name',
22             );
23              
24             has 'file_handle' => (
25             is => 'ro',
26             isa => 'FileHandle',
27             builder => '_build_file_handle',
28             lazy => 1,
29             );
30              
31             has 'end_reached' => (
32             is => 'ro',
33             default => 0,
34             init_arg => undef,
35             writer => '_end_reached', # private writer
36             );
37              
38             # A regexp matching the separator line used to separate individual records
39             has 'match_separator' => (
40             is => 'ro',
41             isa => 'RegexpRef',
42             required => 1,
43             );
44              
45             # A code ref that can be passed a ref to the array containing the read
46             # lines and that makes a new record object from it.
47             has '_record_factory' => ( # keep the ref private
48             is => 'ro',
49             isa => 'CodeRef',
50             init_arg => 'record_factory',
51             required => 1,
52             );
53              
54             # Allow various calling styles of the constructor:
55             # new(file_handle): pass file handle to read data from
56             # new(file_name): pass file name of file to read data from
57             # These don't make much sense in this class because other attributes require
58             # initialization as well, but in a sub-class these may be overwritten and
59             # calling with only a file name and file handle is convenient.
60             around BUILDARGS => sub {
61             my $orig = shift;
62             my $class = shift;
63              
64             return $class->$orig(@_) unless @_ == 1; # no special handling
65              
66             # Check if we got a file name or handle for multi-record input file.
67             if (not reftype $_[0]) { # file name given
68             my $input_file_name = shift;
69             return $class->$orig(file_name => $input_file_name);
70             }
71             elsif (reftype $_[0] eq reftype \*STDIN) { # file handle given
72             my $input_file_handle = shift;
73             return $class->$orig(file_handle => $input_file_handle);
74             }
75             else { # no file name / handle
76             return $class->$orig(@_);
77             }
78             };
79              
80             sub BUILD {
81 3     3 0 8 my $self = shift;
82              
83 3 50       106 confess 'The value of file_handle does not seem to be an open handle'
84             unless openhandle $self->file_handle;
85              
86             # If the input file is empty, set end_reached immediately.
87 3 50       94 $self->_end_reached(1) if eof $self->file_handle;
88              
89 3         112 return;
90             }
91              
92             # Open file handle from file name if no handle was passed. Die if we cant.
93             sub _build_file_handle {
94 1     1   3 my $self = shift;
95              
96 1 50       38 confess 'Cannot build file handle unless a file name was specified'
97             unless $self->has_file_name;
98              
99 1         76 open my $input_file_handle, '<', $self->file_name;
100 1         2029 return $input_file_handle;
101             }
102              
103             # Returns chomped lines.
104             sub _read_next_record {
105 4     4   8 my $self = shift;
106              
107 4         113 my $record_file_handle = $self->file_handle;
108 4         10 my @record_lines;
109 4         25 while (<$record_file_handle>) {
110 140         337 my $line = $_;
111 140         263 chomp $line;
112              
113 140 100       4387 if ($line =~ $self->match_separator) { # end of record
114             # Drop separator line and return current collection of lines.
115             # Also test if file ends in a separator.
116 4 100       57 $self->_end_reached(1) if eof $record_file_handle;
117             return \@record_lines
118 4         19 }
119             else {
120             # Store lines until end of record
121 136         694 push @record_lines, $line;
122             }
123             }
124              
125 0         0 $self->_end_reached(1); # file has been read
126 0         0 return \@record_lines;
127             }
128              
129             # Get the next record from the multi-record file.
130             sub next {
131 5     5 1 35 my $self = shift;
132              
133             # Are there any more entries?
134 5 100       167 return if $self->end_reached;
135              
136             # Read lines of next record.
137 4         16 my $record_lines_ref = $self->_read_next_record;
138              
139             # Construct record object using factory
140 4         41 my $record_array_handle = IO::Lines->new($record_lines_ref);
141 4         405 my $record = $self->_record_factory->($record_array_handle);
142              
143 4         27 return $record;
144             }
145              
146             __PACKAGE__->meta->make_immutable;
147              
148             1; # End of IO::File::RecordStream
149              
150             __END__
151              
152              
153             =pod
154              
155             =encoding UTF-8
156              
157             =head1 NAME
158              
159             IO::File::RecordStream - Read multi-line records from a file.
160              
161             =head1 SYNOPSIS
162              
163             use Bio::RNA::Treekin;
164              
165             =head1 DESCRIPTION
166              
167             Auxiliary class to read records consisting of multiple lines, separated by a
168             separator matching a specified regular expression.
169              
170             =head1 METHODS
171              
172             =head2 IO::File::RecordStream->new($file_name, @args)
173              
174             =head2 IO::File::RecordStream->new($file_handle)
175              
176             Construct a new record stream from a file name or handle.
177              
178             =over
179              
180             =item Mandatory arguments:
181              
182             =over
183              
184             =item file_name | file_handle
185              
186             Name of or handle to file to read data from. Pass either
187              
188             =item match_separator
189              
190             Quoted regular expression matching the record separator. The input file is
191             split at every match.
192              
193             =item record_factory
194              
195             A code ref that, when called with an array ref containing the lines of a
196             single record, parses the data and constructs a record object.
197              
198             =back
199              
200             =back
201              
202             Additionally, the constructor can be called with a single file name or file
203             handle (without keyword). This requires to override the C<match_separator> and
204             C<record_factory> attributes to provide default values.
205              
206             =head2 $stream->next
207              
208             Returns the next record object. Internally, the input file is read until the
209             next match of the C<match_separator>. The data is passed on to the
210             C<record_factory> and the returned record object is returned.
211              
212             =head2 $stream->file_name
213              
214             Return the name of the file that this object reads data from. May be C<undef>
215             if the data was read from a handle. Use predicate C<has_file_name> to query
216             its presence.
217              
218             =head2 $stream->has_file_name
219              
220             Predicate query whether a file name has been used to read the data.
221              
222             =head2 $stream->file_handle
223              
224             Handle to the file the data is being read from.
225              
226             =head2 $stream->end_reached
227              
228             Query whether the input file was read completely yet.
229              
230             =head2 $stream->match_separator
231              
232             Returns the matching expression used to identify the record separator.
233              
234             =head1 AUTHOR
235              
236             Felix Kuehnl, C<< <felix@bioinf.uni-leipzig.de> >>
237              
238              
239             =head1 BUGS
240              
241             Please report any bugs or feature requests by raising an issue at
242             L<https://github.com/xileF1337/Bio-RNA-Treekin/issues>.
243              
244             You can also do so by mailing to C<bug-bio-rna-treekin at rt.cpan.org>,
245             or through the web interface at
246             L<https://rt.cpan.org/NoAuth/ReportBug.html?Queue=Bio-RNA-Treekin>. I will be
247             notified, and then you'll automatically be notified of progress on your bug as
248             I make changes.
249              
250              
251             =head1 SUPPORT
252              
253             You can find documentation for this module with the perldoc command.
254              
255             perldoc Bio::RNA::Treekin
256              
257              
258             You can also look for information at:
259              
260             =over 4
261              
262             =item * Github: the official repository
263              
264             L<https://github.com/xileF1337/Bio-RNA-Treekin>
265              
266             =item * RT: CPAN's request tracker (report bugs here)
267              
268             L<https://rt.cpan.org/NoAuth/Bugs.html?Dist=Bio-RNA-Treekin>
269              
270             =item * AnnoCPAN: Annotated CPAN documentation
271              
272             L<http://annocpan.org/dist/Bio-RNA-Treekin>
273              
274             =item * CPAN Ratings
275              
276             L<https://cpanratings.perl.org/d/Bio-RNA-Treekin>
277              
278             =item * Search CPAN
279              
280             L<https://metacpan.org/release/Bio-RNA-Treekin>
281              
282             =back
283              
284              
285             =head1 LICENSE AND COPYRIGHT
286              
287             Copyright 2019-2021 Felix Kühnl.
288              
289             This program is free software: you can redistribute it and/or modify
290             it under the terms of the GNU General Public License as published by
291             the Free Software Foundation, either version 3 of the License, or
292             (at your option) any later version.
293              
294             This program is distributed in the hope that it will be useful,
295             but WITHOUT ANY WARRANTY; without even the implied warranty of
296             MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
297             GNU General Public License for more details.
298              
299             You should have received a copy of the GNU General Public License
300             along with this program. If not, see L<http://www.gnu.org/licenses/>.
301              
302              
303             =cut
304              
305             # End of IO/File/RecordStream.pm