File Coverage

blib/lib/TIGR/FASTA/Reader.pm
Criterion Covered Total %
statement 19 265 7.1
branch 0 120 0.0
condition 0 105 0.0
subroutine 7 23 30.4
pod 12 12 100.0
total 38 525 7.2


line stmt bran cond sub pod time code
1             # $Id: FASTAreader.pm,v 1.1 2004/04/28 15:03:43 aphillip Exp $
2              
3             package TIGR::FASTA::Reader;
4             {
5            
6             =head1 NAME
7              
8             TIGR::FASTA::Reader - TIGR::FASTA::Reader class for parsing and navigating
9             FASTA format files.
10              
11             =head1 SYNOPSIS
12              
13             use TIGR::FASTA::Reader;
14             my $obj_instance = new TIGR::FASTA::Reader ($foundation_obj_ref,
15             $error_array_ref, $fasta_file_name);
16              
17             =head1 DESCRIPTION
18              
19             This module iterates over a FASTA formatted database file. It provides
20             data extraction and simple analysis routines. This module utilizes
21             acceptance validation of FASTA formatted files via the TIGR::FASTA::Grammar
22             module.
23              
24             =cut
25              
26             BEGIN {
27 1     1   1149 require 5.006_00;
28             }
29              
30 1     1   7 use strict;
  1         3  
  1         36  
31 1     1   7 use IO::File;
  1         3  
  1         119  
32 1     1   6 use TIGR::Foundation;
  1         2  
  1         75  
33 1     1   6 use TIGR::FASTA::Grammar ':public';
  1         1  
  1         116  
34 1     1   6 use TIGR::FASTA::Grammar ':private';
  1         1  
  1         70  
35 1     1   5 use TIGR::FASTA::Record;
  1         2  
  1         2509  
36              
37              
38             ## internal variables and identifiers
39              
40             our $REVISION = (qw$Revision: 1.1 $)[-1];
41             our $VERSION = '1.21';
42             our $VERSION_STRING = "$VERSION (Build $REVISION)";
43             our @DEPEND = ();
44              
45             my $SYS_ERR = 0; # this flag specifies non-user related error
46             my $USR_ERR = 1; # this flag specifies user related error
47              
48             ## external variables
49              
50             my $UNBOUND_FASTA_SEPARATOR = $TIGR::FASTA::Grammar::UNBOUND_FASTA_SEPARATOR;
51            
52             # debugging scheme
53             #
54             # Debugging via the TIGR Foundation uses increasing log levels based on
55             # nesting. 'MAIN' starts at level 1. Every nest increments the level by
56             # 1.
57             # Subroutines always start nesting at level 2. As debugging levels
58             # increase, logging is more verbose. This makes sense as you log at
59             # greater depth (ie. deeper branching).
60             #
61             # The following definitions help emphasize the debugging in the program.
62             #
63             my $DEBUG_LEVEL_1 = 1;
64             my $DEBUG_LEVEL_2 = 2;
65             my $DEBUG_LEVEL_3 = 3;
66             my $DEBUG_LEVEL_4 = 4;
67             my $DEBUG_LEVEL_5 = 5;
68             my $DEBUG_LEVEL_6 = 6;
69             my $DEBUG_LEVEL_7 = 7;
70             my $DEBUG_LEVEL_8 = 8;
71             my $DEBUG_LEVEL_9 = 9;
72              
73             ## prototypes
74              
75             sub new(;$$$);
76             sub open($;$);
77             sub close();
78             sub index();
79             sub seekIndex($);
80             sub getRecordByIdentifier($);
81             sub seekIdentifer($);
82             sub get();
83             sub next();
84             sub hasNext();
85             sub count();
86             sub path();
87             sub _initialize();
88             sub _parseDBfile();
89             sub _nullRecordHandler($$);
90             sub _errorHandler($$$);
91              
92              
93             ## implementation
94              
95             =over
96              
97             =item $obj_instance = new TIGR::FASTA::Reader ($foundation_object,
98             $error_array_ref, $db_file);
99              
100             This method returns a new instance of a TIGR::FASTA::Reader object. It takes
101             three optional parameters: a TIGR::Foundation object (C<$foundation_object>),
102             a reference to an array for logging user error messages (C<$error_array_ref>),
103             and FASTA file (C<$db_file>). The new instance is returned on success. If
104             the file supplied cannot be opened or is invalid, this method returns
105             undefined. This method also returns undefined if the parameters supplied are
106             invalid. Errors in parsing are written to the array at C<$error_array_ref>
107             and the log file.
108              
109             =cut
110              
111             sub new(;$$$) {
112 0     0 1   my $pkg = shift;
113 0           my @method_args = @_;
114              
115 0           my $error_condition = 0;
116 0           my $self = {};
117 0           bless $self, $pkg;
118 0           $self->_initialize(); # set up internal variables;
119              
120 0 0 0       if ( ( scalar (@method_args) > 0 ) &&
121             ( ( ref ($method_args[0]) ) =~ /foundation/i ) ) {
122 0           $self->{foundation} = shift @method_args;
123 0           $self->_errorHandler("Got TIGR::Foundation in new()", $DEBUG_LEVEL_3,
124             $SYS_ERR);
125             }
126             else {
127 0           $self->{foundation} = undef;
128 0           $self->_errorHandler("No TIGR::Foundation in new()", $DEBUG_LEVEL_3,
129             $SYS_ERR);
130             }
131              
132 0 0 0       if ( ( scalar (@method_args) > 0 ) &&
133             ( ( ref ($method_args[0]) ) =~ /array/i ) ) {
134 0           $self->{error_ref} = shift @method_args;
135 0           $self->_errorHandler("Got Error ARRAY in new()", $DEBUG_LEVEL_3,
136             $SYS_ERR);
137             }
138             else {
139 0           $self->{error_ref} = undef;
140 0           $self->_errorHandler("No Error ARRAY in new()", $DEBUG_LEVEL_3,
141             $SYS_ERR);
142             }
143              
144 0 0 0       if ( ( scalar (@method_args) > 0 ) &&
145             ( ! ref ($method_args[0]) ) ) {
146 0           my $filename = shift @method_args;
147 0 0         if(defined($filename)) {
148 0           $self->{db_file_name} = $filename ;
149 0           $self->_errorHandler("Got file name in new()", $DEBUG_LEVEL_4,
150             $SYS_ERR);
151             }
152             else {
153 0           $self->_errorHandler("undef passed as filename", $DEBUG_LEVEL_4,
154             $USR_ERR);
155             }
156             }
157             else {
158 0           $self->{db_file_name} = undef;
159 0           $self->_errorHandler("No file name in new()", $DEBUG_LEVEL_3,
160             $SYS_ERR);
161             }
162              
163             # check for invocation errors
164 0 0 0       if ( ( scalar (@method_args) > 0 ) ) {
    0          
165 0           $error_condition = 1;
166 0           $self->_errorHandler("Too many parameters passed to new() method",
167             $DEBUG_LEVEL_3, $SYS_ERR);
168             }
169             elsif ( defined ( $self->{db_file_name} ) &&
170             ! defined ( $self->open($self->{db_file_name}, "r") ) ) {
171             # the error message is logged via the open() routine
172 0           $self = undef;
173             }
174 0 0         return ( $error_condition == 0 ) ? $self : undef;
175             }
176              
177              
178             =item $result = $obj_instance->open($file_name, $flag);
179              
180             This method opens a FASTA file for reading. This method also parses the file
181             for correctness. The file, C<$file_name>, is opened using the C flags
182             specified by C<$flag>. On success, this method returns 1. If the file cannot
183             be opened or parsing fails, this method returns undefined.
184              
185             =cut
186              
187             sub open($;$) {
188 0     0 1   my $self = shift;
189 0           my $db_file_name = shift;
190 0           my $open_flags = shift;
191              
192 0           my $error_condition = 0;
193              
194 0 0 0       if ( ( ! defined ($open_flags) ) ||
195             ( $open_flags !~ /^r$/i ) ) {
196 0           $open_flags = "r";
197             }
198 0           $self->_errorHandler("Open flags = \'$open_flags\' in open()",
199             $DEBUG_LEVEL_3, $SYS_ERR);
200              
201             # close a previously open file
202 0 0         if ( defined ($self->{db_handle}) ) {
203 0           $self->_errorHandler("Closing old handle in open()", $DEBUG_LEVEL_3,
204             $SYS_ERR);
205 0           $self->close();
206             }
207              
208 0 0 0       if (!(
    0 0        
      0        
209             ( defined ( $db_file_name ) ) &&
210             ( $self->{db_file_name} = $db_file_name ) &&
211             ( defined ( $self->{db_file_name} )) &&
212             ( defined ( $self->{db_handle} =
213             new IO::File $self->{db_file_name}, $open_flags ))
214             ) ) {
215 0           $error_condition = 1;
216 0           $self->_errorHandler(
217             "Cannot open file \'$self->{db_file_name}\'", $DEBUG_LEVEL_3,
218             $USR_ERR);
219             }
220             elsif ( ( defined ( $self->{db_file_name} ) ) &&
221             ( defined ( $self->{db_handle} ) ) &&
222             ( $self->_parseDBfile() == 0 ) ) {
223 0           $error_condition = 1;
224 0           $self->_errorHandler("Encountered errors in file " .
225             "\'$self->{db_file_name}\'.", $DEBUG_LEVEL_3, $USR_ERR);
226             }
227              
228 0 0         if ( $error_condition == 1 ) {
229 0           $self->_initialize(); # reset object state
230             }
231              
232 0 0         return ($error_condition == 1) ? undef : 1;
233             }
234              
235              
236             =item $result = $obj_instance->close();
237              
238             This method closes the object file stream and resets all internal data
239             structures. The result of the operation is returned. If the file stream
240             is closed successfully, this object returns true (1), otherwise false
241             (undefined).
242              
243             =cut
244              
245             sub close() {
246 0     0 1   my $self = shift;
247 0           my $return_val = undef;
248              
249 0 0         if ( defined ( $self->{db_handle} ) ) {
250 0           $return_val = $self->{db_handle}->close();
251 0 0         if (!$return_val) {
252 0           $return_val = undef;
253 0           $self->_errorHandler(
254             "Error closing FASTA file: $self->{db_file_name}",
255             $DEBUG_LEVEL_4, $USR_ERR);
256             }
257             }
258 0           $self->_initialize();
259 0           return $return_val;
260             }
261              
262              
263             =item $record_num = $obj_instance->index();
264              
265             This method returns the record number of the active record. If no record has
266             been selected (ie. made active), then this method returns undefined. If
267             the active record pointer is before the first record, this method returns
268             '-1'.
269              
270             =cut
271              
272             sub index() {
273 0     0 1   my $self = shift;
274 0           my $return_val = undef;
275              
276 0 0         if ( defined ($self->{active_record}) ) {
277 0           $return_val = $self->{active_record};
278             }
279             else {
280 0           $return_val = undef;
281             }
282 0           return $return_val;
283             }
284              
285              
286             =item $result = $obj_instance->seekIndex($num);
287              
288             This method selects a record by record order index. The C<$num> ordered
289             record is selected. If C<$num> is out of range for the database or not -1
290             (indicating to seek one record before the first record), this
291             function returns undefined and the active record pointer is not changed.
292             Otherwise, the requested record is made active and the method returns 1.
293              
294             =cut
295              
296             sub seekIndex($) {
297 0     0 1   my $self = shift;
298 0           my $active_num = shift;
299 0           my $return_val;
300              
301 0 0 0       if ( (defined ($active_num) ) &&
      0        
      0        
302             ( ($active_num =~ /^\d+$/) ||
303             ($active_num == -1 ) ) &&
304             ($active_num < $self->count()) ) {
305 0           $self->_errorHandler(
306             "Setting active record num to $active_num.",
307             $DEBUG_LEVEL_3, $SYS_ERR);
308 0           $self->{active_record} = $active_num;
309 0           $return_val = 1;
310             }
311             else {
312 0 0         if ( ! defined ($active_num) ) {
313 0           $active_num = "";
314             }
315             $self->_errorHandler(
316 0           "Cannot set active record num to $active_num, out of " .
317             $self->count(), $DEBUG_LEVEL_3, $SYS_ERR);
318 0           $return_val = undef;
319             }
320 0           return $return_val;
321             }
322            
323              
324             =item $result = $obj_instance->next();
325              
326             This method selects the next record in numerical order to be the active
327             record. It returns the record on success, undefined on failure. If the active
328             record is equal to -1, the first record is selected.
329              
330             =cut
331              
332             sub next() {
333 0     0 1   my $self = shift;
334 0           my $return_val = undef;
335              
336 0 0         if ( defined ( $self->hasNext() ) ) {
337 0           $self->{active_record}++;
338 0           $return_val = $self->get();
339             }
340             # set undefined if no more records left or no records at all
341             else {
342 0           $return_val = undef;
343             }
344              
345 0           return $return_val;
346             }
347              
348              
349             =item $result = $obj_instance->hasNext();
350              
351             This method returns true (1) if there are more elements beyond the
352             current element. If not, this method returns false (undefined).
353              
354             =cut
355              
356             sub hasNext() {
357 0     0 1   my $self = shift;
358 0           my $return_val = undef;
359              
360 0 0 0       if ( ( defined ($self->{active_record}) ) &&
      0        
361             ( $self->{active_record} >= -1 ) &&
362             ( $self->{active_record} < ( $self->count() - 1 ) )
363             ) {
364 0           $return_val = 1;
365             }
366             else {
367 0           $return_val = undef;
368             }
369 0           return $return_val;
370             }
371              
372              
373             =item $result =
374             $obj_instance->getRecordByIdentifier($identifier);
375              
376             This method selects a record by record minimal identifier.
377             If C<$identifier> does not exist in the set of records, this function
378             returns undefined and the previously active record remains active. Otherwise,
379             the requested record is made active and the method returns a
380             C object representation of the current(active) record.
381              
382             =cut
383              
384             sub getRecordByIdentifier($) {
385            
386 0     0 1   my $self = shift;
387 0           my $identifier = shift;
388 0           my $fasta_record = undef;
389 0           my $seek_result = undef;
390 0 0         if((defined $identifier)) {
391            
392 0           my $seek_result = $self->seekIdentifier($identifier);
393 0 0 0       if( (defined ($seek_result)) && ($seek_result == 1)) {
394 0           $fasta_record = $self->get();
395             }
396             }
397             else {
398 0           $self->_errorHandler("undefined identifier passed", $DEBUG_LEVEL_3,
399             $USR_ERR);
400             }
401 0           return $fasta_record;
402             }
403            
404              
405             =item $result = $obj_instance->seekIdentifier($identifier);
406              
407             This method selects a record by record minimal identifier.
408             If C<$identifier> does not exist in the set of records, this function
409             returns undefined and the previously active record remains active. Otherwise,
410             the requested record is made active and the method returns 1.
411              
412             =cut
413              
414             sub seekIdentifier($) {
415 0     0 1   my $self = shift;
416 0           my $identifier = shift;
417 0           my $number = undef;
418              
419 0 0 0       if ( (defined $identifier) &&
420             (exists $self->{identifier_to_number_hash}->{$identifier}) ) {
421 0           $number = $self->{identifier_to_number_hash}->{$identifier};
422 0           $self->_errorHandler(
423             "Got record number $number for $identifier.", $DEBUG_LEVEL_3,
424             $SYS_ERR);
425             }
426 0           return $self->seekIndex($number);
427             }
428            
429              
430             =item $record_contents = $obj_instance->get();
431              
432             This method returns a C object representation of the current
433             (active) record. If no defined record is active, this method returns
434             undefined.
435              
436             =cut
437              
438             sub get() {
439 0     0 1   my $self = shift;
440             my $db_handle = defined ($self->{db_handle}) ?
441 0 0         $self->{db_handle} : undef;
442 0           my $header = "";
443 0           my $data = "";
444 0           my $f_obj = undef;
445 0           my $pos_str = undef;
446 0           my @pos_arr = ();
447 0           my $record = undef;
448              
449             # open FASTA file for reading
450 0 0         if (! defined ($db_handle) ) {
451 0           $self->_errorHandler("db_handle not defined: " .
452             "cannot access FASTA file \'$self->{db_file_name}\'.",
453             $DEBUG_LEVEL_3);
454             }
455              
456             # search and extract the FASTA record
457 0 0 0       if (( defined ( $self->{active_record} )) &&
      0        
      0        
      0        
458             ( $self->{active_record} > -1 ) &&
459             ( $self->{active_record} < $self->count() ) &&
460             ( defined ( $db_handle )) &&
461             ( defined ( $pos_str =
462             $self->{number_to_fp_array}->{$self->{active_record}} ))
463             ) {
464 0           @pos_arr = split " ", $pos_str;
465              
466             # seek to the start position of the record in the file and read the
467             # record information
468 0 0 0       if((defined $pos_arr[0]) &&
      0        
      0        
469             (defined($db_handle->seek($pos_arr[0],SEEK_SET))) &&
470             (defined $pos_arr[1]) &&
471             (defined (read $db_handle, $record,
472             (($pos_arr[1]-$pos_arr[0])+1)))) {
473            
474 0           ($header) = $record =~ /(>.*)\n/;
475 0           $record =~ s/(>.*)\n//;
476 0           $record =~ s/[\s\n]+//g;
477 0           $f_obj = new TIGR::FASTA::Record $header, $record;
478             }
479             }
480 0           return $f_obj;
481             }
482            
483              
484             =item $db_name = $obj_instance->path();
485              
486             This method returns the path to the file used for processing.
487              
488             =cut
489              
490             sub path() {
491 0     0 1   my $self = shift;
492             # the existence of db_file_name is checked in new()
493 0           return $self->{db_file_name};
494             }
495              
496              
497             =item $cnt = $obj_instance->count();
498              
499             This method returns the number of records in the database file.
500              
501             =cut
502              
503             sub count() {
504 0     0 1   my $self = shift;
505 0           return $self->{num_records};
506             }
507              
508              
509             # $obj_instance->_initialize();
510              
511             #This method resets the object to its initial state. Internal data structures
512             #are reset. This method does not return.
513              
514             sub _initialize() {
515 0     0     my $self = shift;
516              
517 0           $self->{num_records} = 0; # number of recs
518              
519             # look up methods for records here
520             # the active record is stored as a sequence number
521 0           $self->{active_record} = undef; # current working record
522 0           $self->{number_to_fp_array} = (); # map seq# to file loc
523 0           $self->{number_to_identifier_array} = (); # map seq# to identifier
524 0           $self->{identifier_to_number_hash} = (); # map seq identifier to seq#
525 0           $self->{error_cnt} = 0; # parse error tabulator
526 0           $self->{db_file_name} = "";
527 0           $self->{db_handle} = undef;
528             }
529              
530              
531             # $obj_instance->_parseDBfile();
532              
533             #This method parses the FASTA database file passed via the C method.
534             #It defines all of the sequence look-ups and validates every record. This
535             #method finds the number of sequences and maximum sequence length. This
536             #method is called from the C method. The active record is un-selected
537             #by this method.
538              
539             sub _parseDBfile() {
540 0     0     my $self = shift;
541 0           my $last_line_length_lt_std_flag = 0;
542 0           my $line_number = 0;
543 0           my $record_identifier = "";
544 0           my $preceding_header_flag = 0;
545 0           my $first_data_line_length = undef;
546 0           my $empty_line_found = 0;
547             my $db_handle = defined ( $self->{db_handle} ) ?
548 0 0         $self->{db_handle} : undef;
549             #the file position where the record starts
550 0           my $pos1 = 0;
551             #the file position where the record ends
552 0           my $pos2 = undef;
553             #the start and end positions of a record separated with a space
554 0           my $string = "";
555             # variable to give the length of each line
556 0           my $line_len = undef;
557             # the sum of the length of all the lines in a file
558 0           my $sum_len = 0;
559             # loop through FASTA file
560 0   0       while ( ( defined ( $db_handle ) ) &&
      0        
561             ( defined ( my $line = <$db_handle> ) ) &&
562             ( ++$line_number ) ) {
563 0           chomp $line;
564 0           $line_len = length($line);
565 0           $sum_len += $line_len;
566 0           $sum_len++;
567            
568             # check FASTA data
569 0 0 0       if ( ( defined ( $record_identifier ) ) &&
    0 0        
    0          
570             ( $record_identifier !~ // ) &&
571             ( ( isValidFASTAdata($line) ) != 0 ) ) {
572            
573             # check if previous line was empty
574 0 0         if ( $empty_line_found == 1 ) {
575 0           $self->{error_cnt}++;
576 0           $self->_errorHandler("ERROR: Empty line found at line ".
577             ($line_number - 1). " - empty lines are ".
578             "allowed only at the end of a file",
579             $DEBUG_LEVEL_5, $USR_ERR);
580 0           $empty_line_found = 0;
581             }
582            
583 0 0         if($preceding_header_flag == 1) {
584 0           $first_data_line_length = setValidFASTAlineLength($line);
585             }
586            
587             # check $last_line_length_lt_std_flag for an error on previous line
588 0 0         if(defined ($first_data_line_length)) {
589 0 0         if ( $last_line_length_lt_std_flag == 1 ) {
590 0           $self->{error_cnt}++;
591 0           $self->_errorHandler("Expected: FASTA data definition " .
592             "lines should be $first_data_line_length bases " .
593             "(characters) across. Only the last line of a sequence ".
594             "data definition may be less than " .
595             "$first_data_line_length bases (characters) " .
596             "across, if applicable. See line " .
597             ($line_number - 1) . '.', $DEBUG_LEVEL_5, $USR_ERR);
598             }
599 0           $last_line_length_lt_std_flag = 0;
600            
601             # check current line for over-length problem
602 0 0         if ( $line_len > $first_data_line_length ) {
    0          
603 0           $self->{error_cnt}++;
604 0           $self->_errorHandler("Expected: FASTA data definition " .
605             "lines should be $first_data_line_length bases " .
606             "(characters) across. Only the last line of a sequence ".
607             "data definition may be less than " .
608             "$first_data_line_length bases (characters) ".
609             "across, if applicable. See line " . $line_number . '.',
610             $DEBUG_LEVEL_5,$USR_ERR);
611             }
612            
613             #check current line for under-length problem; report only if not
614             #the last line in the data definition
615             elsif ( $line_len < $first_data_line_length ) {
616 0           $last_line_length_lt_std_flag = 1;
617             }
618             }
619 0           $preceding_header_flag = 0;
620             }
621             # check for FASTA header
622             elsif ( ( isValidFASTAheader($line) ) != 0 ) {
623 0 0         if ( ! defined ( $self->{active_record} ) ) {
624 0           $self->{active_record} = -1;
625             }
626            
627 0           $self->{active_record}++;
628            
629 0 0 0       if( (defined $line_number) &&
630             ($line_number > 1) ) {
631 0           $pos2 = (($sum_len - $line_len)-2);
632 0           $pos1 = $pos2+1;
633              
634 0 0 0       if((defined $pos1) && ( defined $string)) {
635 0           $string .= "$pos2";
636             # store the start and end of a fasta record in a hash
637 0           $self->{number_to_fp_array}->{($self->{active_record})-1} =
638             $string;
639             }
640             }
641            
642 0 0         if(defined $pos1) {
643 0           $string = "$pos1 ";
644             }
645            
646             # check if previous line was a FASTA header
647 0 0         if ( $preceding_header_flag == 1 ) {
648 0           $self->_nullRecordHandler($self->{active_record} - 1,
649             $line_number);
650             }
651            
652             # check if previous line was empty
653 0 0         if ( $empty_line_found == 1 ) {
654 0           $self->{error_cnt}++;
655 0           $self->_errorHandler("ERROR: Empty line found at line ".
656             ($line_number - 1). " - empty lines are ".
657             "allowed only at the end of a file",
658             $DEBUG_LEVEL_5, $USR_ERR);
659 0           $empty_line_found = 0;
660             }
661             # if it's a valid FASTA header, then don't need to check again
662             # extract the record IDENTIFIER
663 0           $record_identifier = _headerToIdentifier($line);
664 0 0         if ( defined (
665             $self->{identifier_to_number_hash}->{$record_identifier} ) ) {
666 0           $self->{error_cnt}++;
667 0           $self->_errorHandler("Expected: unique FASTA " .
668             "identifier. \'$record_identifier\' is a duplicate at " .
669             "line $line_number.", $DEBUG_LEVEL_5, $USR_ERR);
670             }
671             else {
672             $self->{identifier_to_number_hash}->{$record_identifier} =
673 0           $self->{active_record};
674             $self->{number_to_identifier_array}->{$self->{active_record}} =
675 0           $record_identifier;
676             }
677              
678             # set up the variables for parsing a new record
679 0           $last_line_length_lt_std_flag = 0;
680 0           $preceding_header_flag = 1;
681 0           $self->{num_records}++;
682             }
683             # handle empty space
684             # empty space after the last record is allowed
685             elsif($line eq "") {
686 0           $empty_line_found = 1;
687 0           next;
688             }
689             # handle error data types
690             else {
691 0           $self->{error_cnt}++;
692             # check if previous line was empty
693 0 0         if ( $empty_line_found == 1 ) {
694 0           $self->{error_cnt}++;
695 0           $self->_errorHandler("ERROR: Empty line found at line ".
696             ($line_number - 1). " - empty lines are ".
697             "allowed only at the end of a file",
698             $DEBUG_LEVEL_5, $USR_ERR);
699 0           $empty_line_found = 0;
700             }
701            
702             # line has a separator token in it, so it may be header
703 0 0 0       if ( $line =~ /$UNBOUND_FASTA_SEPARATOR/ ) {
    0          
    0          
704 0           $self->_errorHandler("Expected: record header " .
705             "information in FASTA record header. Got: \'$line\' at " .
706             "line $line_number.", $DEBUG_LEVEL_6, $USR_ERR);
707 0           $last_line_length_lt_std_flag = 0;
708             }
709             # if last data line was small, expect this to be a header too
710             elsif ( $last_line_length_lt_std_flag == 1 ) {
711 0           $self->_errorHandler("Expected: FASTA record header " .
712             "beginning with \'>\'. Got: \'$line\' at line ".
713             "$line_number.",$DEBUG_LEVEL_6, $USR_ERR);
714 0           $last_line_length_lt_std_flag = 0;
715             }
716             elsif ( ( defined ( $record_identifier ) ) &&
717             ( $record_identifier !~ // ) ) {
718 0           $self->_errorHandler("Expected: valid FASTA data " .
719             "definition for record identifier \'$record_identifier\'. " .
720             "Check sequence content at line $line_number for invalid " .
721             "bases (data type: invalid data).", $DEBUG_LEVEL_6,
722             $USR_ERR);
723             }
724             else {
725 0           $self->_errorHandler("Expected: FASTA record header " .
726             "followed by definition of sequence. Invalid input at " .
727             "line $line_number.", $DEBUG_LEVEL_6, $USR_ERR);
728             }
729             }
730             } # end while
731            
732 0           $pos2 = $sum_len;
733 0 0 0       if((defined $pos1) && ( defined $string)) {
734 0           $string .= "$pos2";
735             # store the start and end of a fasta record in a hash
736 0           $self->{number_to_fp_array}->{$self->{active_record}} = $string;
737             }
738            
739             # check terminal case data definition
740 0 0         if ( $preceding_header_flag == 1 ) {
741 0           $self->_nullRecordHandler($self->{active_record}, $line_number);
742             }
743            
744 0           $self->{active_record} = -1; # set counter to the beginning
745            
746 0 0         return ( $self->{error_cnt} == 0 ) ? 1 : 0;
747             }
748              
749              
750             # $obj_instance->_nullRecordHandler($$);
751              
752             #This method handles the case of a null or equivalently empty record
753             #encountered during parsing. It logs the appropriate message to the
754             #TIGR Foundation object. The only arguments are the record number
755             #and the line number.
756              
757             sub _nullRecordHandler($$) {
758 0     0     my $self = shift;
759 0           my $active_num = shift;
760 0           my $line_number = shift;
761 0           my $preceding_rec_line_number = undef;
762 0           my $record_identifier = undef;
763              
764 0 0         if ( defined ( $self->{number_to_identifier_array}->
765             {$active_num} ) ) {
766             $record_identifier = $self->{number_to_identifier_array}->
767 0           {$active_num};
768             }
769             else {
770 0           $record_identifier = "";
771             }
772            
773 0 0         if(defined $line_number) {
774 0 0 0       if((defined $active_num) &&
    0 0        
775             ($active_num < ($self->{active_record}))) {
776 0           $preceding_rec_line_number = $line_number-1;
777             }
778             elsif((defined $active_num) &&
779             ($active_num == ($self->{active_record}))) {
780 0           $preceding_rec_line_number = $line_number;
781             }
782             }
783             else {
784 0           $preceding_rec_line_number = "";
785             }
786              
787 0           $self->{error_cnt}++;
788 0 0         if ( $self->{db_handle}->eof() == 1 ) {
789 0           $self->_errorHandler("Expected: FASTA record header " .
790             "followed by definition of sequence. Record identifier " .
791             "\'" . $record_identifier . "\' is undefined from line " .
792             $preceding_rec_line_number . ". Got end of file after line " .
793             $line_number . ".", $DEBUG_LEVEL_5, $USR_ERR);
794             }
795             else {
796 0           $self->_errorHandler("Expected: FASTA record header " .
797             "followed by definition of sequence. Record identifier " .
798             "\'" . $record_identifier . "\' is undefined from line " .
799             $preceding_rec_line_number . ". Got FASTA header at line " .
800             $line_number . ".", $DEBUG_LEVEL_5, $USR_ERR);
801             }
802             }
803              
804              
805             # $message = $obj_instance->_errorHandler($message, $tf_level,
806             # $internal_log_flag);
807              
808             #This method handles logging to the TIGR::Foundation module and
809             #internal error record reference array. The C<$message> argument is logged
810             #to the appropriate service. The C<$tf_level> parameter specifies the
811             #logging level for TIGR::Foundation, while the C<$internal_log_flag> parameter
812             #specifies if C<$message> should be written to the internal array reference
813             #specified in C. If a TIGR::Foundation instance does not exist,
814             #no logging to that facility occurs. This method returns C<$message>.
815              
816             sub _errorHandler($$$) {
817            
818 0     0     my $self = shift;
819              
820 0           my ( $message, $tf_level, $log_facility ) = @_;
821              
822 0 0 0       if ( defined ($message) &&
      0        
823             defined ($tf_level) &&
824             defined ($log_facility) ) {
825              
826 0 0         if ( defined ($self->{foundation}) ) {
827 0 0         if ( $log_facility != $USR_ERR ) { # all user errors go to .error
828 0           $self->{foundation}->logLocal($message, $tf_level);
829             }
830             else {
831 0           $self->{foundation}->logError($message);
832             }
833             }
834              
835 0 0 0       if ( ( defined ($self->{error_ref}) ) &&
836             ( $log_facility == $USR_ERR ) ) {
837 0           push @{$self->{error_ref}}, $message;
  0            
838             }
839             }
840 0           return $message;
841             }
842              
843             =head1 USAGE
844              
845             To use this module, load the C package via the
846             C function. Then, create a new instance of the object via the
847             C method, as shown below. There are several invocations possible
848             for this method since all parameters to C are optional.
849             To access records from the C instance, the
850             C package must be loaded via the C function.
851             An example script using this module follows. The C
852             module is included for completeness but does not have to be used.
853              
854             #!/usr/local/bin/perl -w
855              
856             # This script accepts FASTA files with the '-i' option
857             # on the command line and validates every one in turn.
858             # Parse errors are collected to the '@errors_list' array.
859             # This program concatenates all of the records together to
860             # one output file specified with the '-o' option.
861             # NOTE: The '-i' option must be specified before every input file.
862             # NOTE: The 'TIGR::FASTA::Writer' module is intended for writing
863             # FASTA records.
864              
865             use strict;
866             use TIGR::FASTA::Reader;
867             use TIGR::FASTA::Record;
868              
869             MAIN:
870             {
871             my $tf_object = new TIGR::Foundation;
872             my @errors_list = ();
873             my @input_files = ();
874             my $output_file = undef;
875              
876             # Capture the return code from the TIGR::Foundation method
877             my $result = $tf_object->TIGR_GetOptions('i=s' => \@input_files,
878             'o=s' => \$output_file);
879             if ( $result != 1 ) {
880             $tf_object->bail("Invalid command line options.");
881             }
882              
883             # Create a TIGR::FASTA::Reader instance using TIGR::Foundation and
884             # an error message list.
885             my $fasta_reader = new TIGR::FASTA::Reader $tf_object, \@errors_list;
886              
887             if ( !( defined ( $output_file ) &&
888             open OUTFILE, ">$output_file" ) ) {
889             $tf_object->bail("Cannot open output file for writing.");
890             }
891              
892             foreach my $in_file ( @input_files ) {
893             $fasta_reader->open($in_file) or
894             $tf_object->logLocal("Cannot open or read file $in_file", 2);
895              
896             if ( scalar(@errors_list) > 0 ) { # are there parse errors?
897             while ( @errors_list ) { # get the messages from the list
898             my $message = shift @errors_list;
899             print STDERR $message, "\n";
900             }
901             }
902              
903             while ( $fasta_reader->hasNext() ) {
904             # print each record to OUTFILE
905             print OUTFILE $fasta_reader->next()->toString();
906             }
907             }
908             }
909              
910             =cut
911              
912             }
913              
914             1;