File Coverage

lib/BoutrosLab/TSVStream/IO/Role/Reader/Fixed.pm
Criterion Covered Total %
statement 124 129 96.1
branch 40 60 66.6
condition 11 15 73.3
subroutine 24 25 96.0
pod 1 7 14.2
total 200 236 84.7


line stmt bran cond sub pod time code
1             package BoutrosLab::TSVStream::IO::Role::Reader::Fixed;
2              
3             # safe Perl
4 8     8   507 use warnings;
  8         12  
  8         337  
5 8     8   28 use strict;
  8         8  
  8         124  
6 8     8   23 use Carp;
  8         8  
  8         2200  
7              
8             =head1 NAME
9              
10             BoutrosLab::TSVStream::IO::Role::Reader::Fixed
11              
12             =cut
13              
14 8     8   28 use Moose::Role;
  8         7  
  8         38  
15 8     8   26864 use Moose::Util::TypeConstraints;
  8         9  
  8         51  
16 8     8   10579 use namespace::autoclean;
  8         10  
  8         51  
17 8     8   4448 use List::MoreUtils qw(all);
  8         55065  
  8         45  
18              
19             enum 'ReadHeaderType', [qw(auto none check)];
20              
21             has header => (
22             is => 'ro',
23             lazy => 1,
24             isa => 'ReadHeaderType',
25             default => 'auto'
26             );
27              
28             has extra_class_params => (
29             is => 'ro',
30             isa => 'ArrayRef[Str]',
31             default => sub { [] }
32             );
33              
34             has pre_header_pattern => (
35             is => 'ro',
36             isa => 'Maybe[RegexpRef]',
37             default => undef
38             );
39              
40             has _is_pre_header => (
41             is => 'ro',
42             isa => 'CodeRef',
43             lazy => 1,
44             builder => '_init_is_pre_header'
45             );
46              
47             sub _init_is_pre_header {
48 13     13   15 my $self = shift;
49 13 50       388 if (my $pat = $self->pre_header_pattern) {
50 38     38   121 sub { $_[0] =~ /$pat/ }
51 13         414 }
52             else {
53 0         0 $self->_is_comment
54             }
55             }
56              
57             has pre_headers => (
58             is => 'ro',
59             isa => 'ArrayRef[Str]',
60             init_arg => undef,
61             default => sub { [] }
62             );
63              
64             has _comments => (
65             is => 'ro',
66             isa => 'ArrayRef[Str]',
67             init_arg => undef,
68             writer => '_set_comments',
69             default => sub { [] }
70             );
71              
72             around BUILDARGS => sub {
73             my $orig = shift;
74             my $class = shift;
75             my $arg = ref($_[0]) ? $_[0] : { @_ };
76              
77             my %valid_arg = (
78             file => 1,
79             handle => 1,
80             header => 1,
81             class => 1,
82             comment => 1,
83             pre_comment => 1,
84             pre_header => 1,
85             header_fix => 1,
86             extra_class_params => 1,
87              
88             pre_header_pattern => 1,
89             comment_pattern => 1
90             );
91             $arg->{_valid_arg} = \%valid_arg;
92             $arg->{_open_mode} = '<';
93             $class->$orig( $arg );
94             };
95              
96             sub _read_no_header {
97 32     32   39 my $self = shift;
98 32         1056 my $none = $self->header eq 'none';
99 32         77 ( $none, $none );
100             }
101              
102             sub _fill_dyn_fields {
103 32     32   31 return;
104             }
105              
106             sub _header {
107 95     95   84 my $self = shift;
108 95         78 my $stream_fields = shift;
109 95         3182 my $class_fields = $self->fields;
110             return $#$class_fields <= $#$stream_fields
111 95   100 204   681 && all { uc( $stream_fields->[$_] ) eq uc( $class_fields->[$_] ) } 0 .. $#$class_fields;
  204         367  
112             }
113              
114             sub BUILD {
115 119     119 0 117 my $self = shift;
116              
117 119         303 my ( $none, $ret ) = $self->_read_no_header;
118 119 50       184 return if $ret;
119              
120 119         88 my @pre;
121 119         110 my $stream_fields = [];
122 119         107 my $is_head = undef;
123 119 50       226 print "Starting pre-header checks\n" if $ENV{HEADER_PROCESS};
124 119 100       276 if (!$self->_peek) {
125 24         65 $self->_fill_dyn_fields( $none, 0, $stream_fields );
126             }
127             else {
128 95         225 while (my $line = $self->_read) {
129 121         91 my $is_pre;
130 121         132 my $lline = $line->{line};
131              
132             sub check1 {
133 0     0 0 0 my( $self, $test, $bool, $check, $line ) = @_;
134 0 0       0 if ($self->$bool) {
135 0 0       0 print " ",uc($test),($self->$check->($line) ? ":YES" : ":no "), "\n";
136             }
137             else {
138 0         0 print " ",lc($test), "\n";
139             }
140             }
141 121 50       209 print "Checking line: $lline\n" if $ENV{HEADER_PROCESS};
142 121 50       176 check1( $self, 'PH', pre_header => _is_pre_header => $lline ) if $ENV{HEADER_PROCESS};
143 121 50       168 check1( $self, 'PC', pre_comment => _is_comment => $lline ) if $ENV{HEADER_PROCESS};
144 121 50       183 check1( $self, 'CO', comment => _is_comment => $lline ) if $ENV{HEADER_PROCESS};
145              
146 121 100       3603 if ($self->pre_header) {
147 38         1120 $is_pre = $self->_is_pre_header->($lline);
148 38 50 0     1041 $is_pre ||= $self->_is_comment->($lline) if $self->pre_comment;
149             }
150             else {
151 83 100       2358 $is_pre = $self->_is_comment->($lline) if $self->comment;
152             }
153 121 100       181 if ($is_pre) {
154 26 50       47 print " -> pre\n" if $ENV{HEADER_PROCESS};
155 26         30 push @pre, $line;
156 26         66 next;
157             }
158 95         2772 $stream_fields = $self->header_fix->($line)->{fields};
159             # $stream_fields = $line->{fields};
160 95         213 $is_head = $self->_header($stream_fields);
161 95 50       349 print " -> NOT pre, none: $none, is_head: $is_head, header_proc: ",$self->header,"\n" if $ENV{HEADER_PROCESS};
162 95         218 $self->_fill_dyn_fields( $none, $is_head, $stream_fields );
163 95 100 100     655 if ($none or !$is_head && $self->header eq 'auto') {
      66        
164 24 50       46 print " *** put back\n" if $ENV{HEADER_PROCESS};
165 24         66 $self->_unread( @pre, $line );
166 24         644 return;
167             }
168 71         102 last;
169             }
170              
171 71 50       162 print " *** kept\n" if $ENV{HEADER_PROCESS};
172 71         2274 my $die = $self->_num_fields != scalar(@$stream_fields);
173              
174 71 100 100     265 if ($die || !$is_head) {
175 9         20 my $error = '';
176 9 100       31 $error = 'Headers do not match' if !$is_head;
177 9 100       31 $error .= ' and wrong number of fields' if $die;
178 9         25 $error =~ s/^ and w/W/;
179 9         38 $self->_croak( $error, $stream_fields );
180             }
181 62         54 push @{ $self->pre_headers }, ( map { $_->{line} } @pre );
  62         1896  
  18         311  
182             }
183             }
184              
185             sub read_comments {
186 6     6 0 3666 my $self = shift;
187 6         224 my $comments = $self->_comments;
188 6         222 $self->_set_comments( [] );
189 6         28 return $comments;
190             }
191              
192             sub _load_comments {
193 239     239   205 my $self = shift;
194 239 100       7992 return unless $self->comment;
195 6         188 my $comments = $self->_comments;
196 6         18 while (my $line = $self->_read) {
197 10 100       426 if (! $self->_is_comment->( $line->{line} )) {
198 4         12 $self->_unread($line);
199 4         6 return;
200             }
201 6         26 push @$comments, $line->{line};
202             }
203             }
204              
205             sub read {
206 239     239 1 77535 my $self = shift;
207 239         400 $self->_load_comments;
208 239 100       556 return unless my $values = $self->_read;
209 133         157 my $line = $values->{line};
210 133         151 $values = $values->{fields};
211 133         1271 my $error;
212             my $obj;
213 133 50       3889 $error = 'Wrong number of fields' if scalar(@$values) != $self->_num_fields;
214              
215 133 50       226 unless ($error) {
216 133         127 eval {
217             $obj = $self->class->new(
218             field_values => $values,
219 133         3692 @{ $self->extra_class_params },
  133         4010  
220             $self->_read_config
221             );
222             };
223 133 50       752 $error = $@ if $@;
224             }
225              
226 133 50       193 $self->_croak( $error, $values ) if $error;
227              
228 133         339 return $obj;
229             }
230              
231             sub filter {
232 2     2 0 4 my ( $self, $filtersub ) = @_;
233 2         27 return BoutrosLab::TSVStream::IO::Role::Reader::Filter->new(
234             reader => $self,
235             filtersub => $filtersub
236             );
237             }
238              
239             package BoutrosLab::TSVStream::IO::Role::Reader::Filter;
240              
241             # safe Perl
242 8     8   10982 use warnings;
  8         10  
  8         304  
243 8     8   30 use strict;
  8         9  
  8         132  
244 8     8   29 use Carp;
  8         7  
  8         433  
245              
246 8     8   33 use Moose;
  8         11  
  8         55  
247              
248             has reader => (
249             is => 'ro',
250             isa => 'Object',
251             required => 1
252             );
253              
254             has filtersub => (
255             is => 'ro',
256             isa => 'CodeRef',
257             required => 1
258             );
259              
260             sub read {
261 7     7 0 2720 my $self = shift;
262 7         230 while (my $record = $self->reader->read) {
263 7 100       208 return $record if $self->filtersub->($record);
264             }
265 3         26 return;
266             }
267              
268             sub filter {
269 1     1 0 922 my ( $self, $filtersub ) = @_;
270 1         4 return BoutrosLab::TSVStream::IO::Role::Reader::Filter->new(
271             reader => $self,
272             filtersub => $filtersub
273             );
274             }
275              
276             =head1 SYNOPSIS
277              
278             $class->reader( ... );
279              
280             # ($class will use the role BoutrosLab::TSVStream which will provide
281             # the reader method, that method will return a Reader object with:
282             # ...
283             # return BoutrosLab::TSVStream::IO::Role::Reader::Fixed->new(
284             # handle => $fd, # (required)
285             # class => $class, # (required) class
286             # file => $file, # (optional) used (as filename) in error messages
287             # header => $str, # (optional) one of: check none (default 'check')
288             # );
289              
290             while (my $record = $reader->read) {
291             # ... $record is a $class object
292             # ... use $record->field1, $record->field2, etc. - all of the methods of $class object
293             }
294              
295             =head1 DESCRIPTION
296              
297             This object provides an iterator to read through the lines
298             of a data stream (C<$fd>), converting each from a line with
299             tab separated fields into an object of a class (C<$classs>)
300             that has attributes for those fields.
301              
302             Usually, the data stream will start with a line that has the
303             fieldnames in a tab separated list, and the rest of the stream
304             has lines that contain the field values in a tab separated list.
305              
306             Any error diagnostics will refer to the stream using the
307             C<$file> filename if it is provided.
308              
309             The C<$class> class will have a class attribute named
310             C<_fields>. Usually, this will be a read-only method that
311             returns a list of fieldnames that will be used to validate
312             the first line in the data stream (which should contain the
313             field names as the column vlues).
314              
315             A class C<$class> object will be created for each line.
316             The object will be initialized with a list of names and values
317             matching the fields and the contents .of the line.
318              
319             If C<header> is provided, it can be 'check', or 'none'.
320             This controls what is done to the handle initially.
321              
322             If 'check' is specified, the first line of the stream is read
323             and it is checked to ensure that it matches the C<fields> both
324             in name and order. The fields list must be complete. However,
325             it is permitted for the field names to mismatch by having
326             different capitalization - the comparison is not case sensitive.
327              
328             If 'none' is specified, the stream is not checked for a header
329             line. (You would use this option either if the file does not
330             have a header line, or if you are scanning from the middle of
331             a file handle that is no longer at the start of the file.)
332              
333             =cut
334              
335             =head1 ATTRIBUTES
336              
337             =head2 handle - the filehandle to be read
338              
339             =head2 file - the name of the stream, usually a filename, for diagnostic purposes
340              
341             =head2 class - the class that records transformed into
342              
343             =head2 fields - list of field names, usually provided by class
344              
345             handle, file, class and fields are provided by the ...::Base role
346              
347             =head2 header - 'auto', 'check', or 'none' (default 'auto')
348              
349             The C<'check'> setting causes the first line of the stream to
350             be read and validated against the C<fields> list. The field
351             names are accepted if they match (but differences in upper/lower
352             case are ignored). If they do not match, an exception is thrown.
353              
354             If the C<'none'> setting is provided, the stream should already be
355             positioned at a data value (i.e. the stream was previously opened and
356             is no longer positioned at the start, or else the stream was originally
357             created without a leading header line).
358              
359             The default C<'auto'> setting causes the first line to be read and
360             validated as for the C<'check'> setting, but if the line does not
361             match the list of fields it is assumed to instead be the first data
362             line of a stream that has no headers, and processing continues as
363             if the C<'none'> setting were specified instead.
364              
365             =head1 BUILDARGS
366              
367             The BUILDARGS opens a handle if only a file name was provided.
368              
369             =head1 BUILD
370              
371             The BUILD method handles any requirements for reading and processing a
372             header line.
373              
374             =head1 METHODS
375              
376             =head2 read - read a line from the stream end turn it into a class element
377              
378             =cut
379              
380             =head1 AUTHOR
381              
382             John Macdonald - Boutros Lab
383              
384             =head1 ACKNOWLEDGEMENTS
385              
386             Paul Boutros, Phd, PI - Boutros Lab
387              
388             The Ontario Institute for Cancer Research
389              
390             =cut
391              
392             1;