File Coverage

blib/lib/Bio/ToolBox/Data/Stream.pm

Criterion	Covered	Total	%
statement	99	156	63.4
branch	39	88	44.3
condition	19	47	40.4
subroutine	11	13	84.6
pod	8	8	100.0
total	176	312	56.4

line	stmt	bran	cond	sub	pod	time	code
1							package Bio::ToolBox::Data::Stream;
2							our $VERSION = '1.66';
3
4							=head1 NAME
5
6							Bio::ToolBox::Data::Stream - Read, Write, and Manipulate Data File Line by Line
7
8							=head1 SYNOPSIS
9
10							use Bio::ToolBox::Data;
11
12							### Open a pre-existing file
13							my $Stream = Bio::ToolBox::Data->new(
14							in => 'regions.bed',
15							stream => 1,
16							);
17
18							# or directly
19							my $Stream = Bio::ToolBox::Data::Stream->new(
20							in => 'regions.bed',
21							);
22
23							### Open a new file for writing
24							my $Stream = Bio::ToolBox::Data::Stream->new(
25							out => 'output.txt',
26							columns => [qw(chromosome start stop name)],
27							);
28
29
30							### Working line by line
31							while (my $line = $Stream->next_line) {
32							# get the positional information from the file data
33							# assuming that the input file had these identifiable columns
34							# each line is Bio::ToolBox::Data::Feature item
35							my $seq_id = $line->seq_id;
36							my $start = $line->start;
37							my $stop = $line->end;
38
39							# change values
40							$line->value(1, 100); # index, new value
41							}
42
43
44							### Working with two file streams
45							my $inStream = Bio::ToolBox::Data::Stream->new(
46							file => 'regions.bed',
47							);
48							my $outStream = $inStream->duplicate('regions_ext100.bed');
49							my $sc = $inStream->start_column;
50							my $ec = $inStream->end_column;
51							while (my $line = $inStream->next_line) {
52							# adjust positions by 100 bp
53							my $s = $line->start;
54							my $e = $line->end;
55							$line->value($sc, $s - 100);
56							$line->value($ec, $e + 100);
57							$outStream->write_row($line);
58							}
59
60
61							### Finishing
62							# close your file handles when you are done
63							$Stream->close_fh;
64
65							=head1 DESCRIPTION
66
67							This module works similarly to the L object, except that
68							rows are read from a file handle rather than a memory structure. This
69							allows very large files to be read, manipulated, and even written without
70							slurping the entire contents into a memory.
71
72							For an introduction to the L object and methods, refer to
73							its documentation and the L documentation.
74
75							Typically, manipulations are only performed on one row at a time, not on an
76							entire table. Therefore, large scale table manipulations, such as sorting, is
77							not possible.
78
79							A typical workflow consists of opening two Stream objects, one for reading and
80							one for writing. Rows are read, one at a time, from the read Stream, manipulated
81							as necessary, and then written to the write Stream. Each row is passed as a
82							L object. It can be manipulated as such, or the
83							corresponding values may be dumped as an array. Working with the row data
84							as an array is required when adding or deleting columns, since these manipulations
85							are not allowed with a Feature object. The write Stream can then be passed
86							either the Feature object or the array of values to be written.
87
88
89							=head1 METHODS
90
91							=head2 Initializing the structure
92
93							A new Bio::ToolBox::Data::Stream object may be generated directly, or indirectly
94							through the L module.
95
96							=over 4
97
98							=item new
99
100							my $Stream = Bio::ToolBox::Data::Stream->new(
101							in => $filename,
102							);
103							my $Stream = Bio::ToolBox::Data->new(
104							stream => 1,
105							in => $filename,
106							);
107
108							Options to the new function are listed below. Streams are inherently either
109							read or write mode, determined by the mode given through the options.
110
111							=over 4
112
113							=item in
114
115							Provide the path of the file to open for reading. File types are
116							recognized by the extension, and compressed files (.gz) are supported. File
117							types supported include all those listed in L.
118
119							=item out
120
121							Provide the path of the file to open for writing. No check is made
122							for pre-existing files; if it exists it will be overwritten! A new data
123							object is prepared, therefore column names must be provided.
124
125							=item noheader
126
127							Boolean option indicating that the input file does not have file headers,
128							in which case dummy headers are provided. This is not necessary for
129							defined file types that don't normally have file headers, such as
130							BED, GFF, or UCSC files. Ignored for output files.
131
132							=item columns
133
134							my $Stream = Bio::ToolBox::Data::Stream->new(
135							out => $filename,
136							columns => [qw(Column1 Column2 ...)],
137							);
138
139							When a new file is written, provide the names of the columns as an
140							anonymous array. If no columns are provided, then a completely empty
141							data structure is made. Columns must be added with the add_column()
142							method below.
143
144							=item gff
145
146							When writing a GFF file, provide a GFF version. When this is given, the
147							nine standard column names and metadata are automatically provided based
148							on the file format specification. Note that the column names are not
149							actually written in the file, but are maintained for internal use.
150							Acceptable versions include 1, 2, 2.5 (GTF), and 3 (GFF3).
151
152							=item bed
153
154							When writing a BED file, provide the number of bed columns that the file
155							will have. When this is given, the standard column names and metadata
156							will be automatically provided based on the standard file format
157							specification. Note that column names are not actually written to the file,
158							but are maintained for internal use. Acceptable values are integers from
159							3 to 12.
160
161							=item ucsc
162
163							When writing a UCSC-style file format, provide the number of bed columns
164							that the file will have. When this is given, the standard column names and
165							metadata will be automatically provided based on the file format specification.
166							Note that column names are not actually written to the file, but are maintained
167							for internal use. Acceptable values include 10 (refFlat without gene names),
168							11 (refFlat with gene names), 12 (knownGene gene prediction table), and 15
169							(an extended gene prediction or genePredExt table).
170
171							=item gz
172
173							Boolean value to change the compression status of the output file. If
174							overwriting an input file, the default is maintain the compression status,
175							otherwise no compression. Pass a 0 for no compression, 1 for standard
176							gzip compression, or 2 for block gzip (bgzip) compression for tabix
177							compatibility.
178
179							=back
180
181							=item duplicate
182
183							my $Out_Stream = $Stream->duplicate($new_filename);
184
185							For an opened-to-read Stream object, you may duplicate the object as a new
186							opened-to_write Stream object that maintains the same columns and metadata.
187							A new different filename must be provided.
188
189							=back
190
191							=head2 General Metadata
192
193							There is a variety of general metadata regarding the Data structure that
194							is available.
195
196							The following methods may be used to access or set these
197							metadata properties. Note that metadata is only written at the beginning
198							of the file, and so must be set prior to iterating through the file.
199
200							=over 4
201
202							=item feature
203
204							Returns or sets the name of the features used to collect
205							the list of features. The actual feature types are listed
206							in the table, so this metadata is merely descriptive.
207
208							=item feature_type
209
210							Returns one of three specific values describing the contents
211							of the data table inferred by the presence of specific column
212							names. This provides a clue as to whether the table features
213							represent genomic regions (defined by coordinate positions) or
214							named database features. The return values include:
215
216							=over 4
217
218							=item coordinate: Table includes at least chromosome and start
219
220							=item named: Table includes name, type, and/or Primary_ID
221
222							=item unknown: unrecognized
223
224							=back
225
226							=item program
227
228							Returns or sets the name of the program generating the list.
229
230							=item database
231
232							Returns or sets the name or path of the database from which the
233							features were derived.
234
235							=item gff
236
237							Returns or sets the version of loaded GFF files. Supported versions
238							included 1, 2, 2.5 (GTF), and 3.
239
240							=item bed
241
242							Returns or sets the BED file version. Here, the BED version is simply
243							the number of columns.
244
245							=item ucsc
246
247							Returns or sets the UCSC file format version. Here, the version is
248							simply the number of columns. Supported versions include 10 (gene
249							prediction), 11 (refFlat, or gene prediction with gene name), 12
250							(knownGene table), 15 (extended gene prediction), or 16 (extended
251							gene prediction with bin).
252
253							=item vcf
254
255							Returns or sets the VCF file version number. VCF support is limited.
256
257							=back
258
259							=head2 File information
260
261							These methods provide information about the file from which the
262							data table was loaded. This does not include parsed annotation tables.
263
264							=over 4
265
266							=item filename
267
268							=item path
269
270							=item basename
271
272							=item extension
273
274							Returns the filename, full path, basename, and extension of
275							the filename. Concatenating the last three values will reconstitute
276							the first original filename.
277
278							=item add_file_metadata
279
280							$Data->add_file_metadata('/path/to/file.txt');
281
282							Add filename metadata. This will automatically parse the path,
283							basename, and recognized extension from the passed filename and
284							set the appropriate metadata attributes.
285
286							=back
287
288							=head2 Comments
289
290							Comments are the other commented lines from a text file (lines
291							beginning with a #) that were not parsed as metadata.
292
293							=over 4
294
295							=item comments
296
297							Returns a copy of the array containing commented lines.
298
299							=item add_comment
300
301							Appends the text string to the comment array.
302
303							=item delete_comment
304
305							Deletes a comment. Provide the array index of the comment to
306							delete. If an index is not provided, ALL comments will be deleted!
307
308							=item vcf_headers
309
310							For VCF files, this will partially parse the VCF headers into a
311							hash structure that can be queried or manipulated. Each header
312							line is parsed for the primary key, being the first word after the
313							## prefix, e.g. INFO, FORMAT, FILTER, contig, etc. For the simple
314							values, they are stored as the value. For complex entries, such as
315							with INFO and FORMAT, a second level hash is created with the ID
316							extracted and used as the second level key. The value is always the
317							always the remainder of the string.
318
319							For example, the following would be a simple parsed vcf header in
320							code representation.
321
322							$vcf_header = {
323							FORMAT => {
324							GT = q(ID=GT,Number=1,Type=String,Description="Genotype"),
325							AD = q(ID=AD,Number=.,Type=Integer,Description="ref,alt Allelic depths"),
326							},
327							fileDate => 20150715,
328							}
329
330							=item rewrite_vcf_headers
331
332							If you have altered the vcf headers exported by the vcf_headers()
333							method, then this method will rewrite the hash structure as new
334							comment lines. Do this prior to writing the new file stream
335							or else you will lose your changed VCF header metadata.
336
337							=back
338
339							=head2 Column Metadata
340
341							Information about the columns may be accessed. This includes the
342							names of the column and shortcuts to specific identifiable columns,
343							such as name and coordinates. In addition, each column may have
344							additional metadata. Each metadata is a series of key =E
345							value pairs. The minimum keys are 'index' (the 0-based index
346							of the column) and 'name' (the column header name). Additional
347							keys and values may be queried or set as appropriate. When the
348							file is written, these are stored as commented metadata lines at
349							the beginning of the file. Setting metadata is futile after
350							reading or writing has begun.
351
352							=over 4
353
354							=item list_columns
355
356							Returns an array or array reference of the column names
357							in ascending (left to right) order.
358
359							=item number_columns
360
361							Returns the number of columns in the Data table.
362
363							=item last_column
364
365							Returns the array index of the last (rightmost) column in the
366							Data table.
367
368							=item name
369
370							$Stream->name($index, $new_name);
371							my $name = $Stream->name($i);
372
373							Convenient method to return the name of the column given the
374							index number. A column may also be renamed by passing a new name.
375
376							=item metadata
377
378							$Stream->metadata($index, $key, $new_value);
379							my $value = $Stream->metadata($index, $key)
380
381							Returns or sets the metadata value for a specific $key for a
382							specific column $index.
383
384							This may also be used to add a new metadata key. Simply provide
385							the name of a new $key that is not present
386
387							If no key is provided, then a hash or hash reference is returned
388							representing the entire metadata for that column.
389
390							=item copy_metadata
391
392							$Stream->copy_metadata($source, $target);
393
394							This method will copy the metadata (everything except name and
395							index) between the source column and target column. Returns 1 if
396							successful.
397
398							=item delete_metadata
399
400							$Stream->delete_metadata($index, $key);
401
402							Deletes a column-specific metadata $key and value for a specific
403							column $index. If a $key is not provided, then all metadata keys
404							for that index will be deleted.
405
406							=item find_column
407
408							my $i = $Stream->find_column('Gene');
409							my $i = $Stream->find_column('^Gene$')
410
411							Searches the column names for the specified column name. This
412							employs a case-insensitive grep search, so simple substitutions
413							may be made.
414
415							=item chromo_column
416
417							=item start_column
418
419							=item stop_column
420
421							=item strand_column
422
423							=item name_column
424
425							=item type_column
426
427							=item id_column
428
429							These methods will return the identified column best matching
430							the description. Returns C if that column is not present.
431							These use the L method with a predefined list of
432							aliases.
433
434							=back
435
436							=head2 Modifying Columns
437
438							These methods allow modification to the number and order of the
439							columns in a Stream object. These methods can only be employed
440							prior to opening a file handle for writing, i.e. before the first
441							L method is called. This enables one, for example, to
442							duplicate a read-only Stream object to create a write-only Stream,
443							add or delete columns, and then begin the row iteration.
444
445							=over 4
446
447							=item add_column
448
449							my $i = $Stream->add_column($name);
450
451							Appends a new column at the rightmost position (highest
452							index). It adds the column header name and creates a
453							new column metadata hash. Pass a text string representing
454							the new column name. It returns the new column index if
455							successful.
456
457							=item copy_column
458
459							my $j = $Stream->copy_column($i);
460
461							This will copy a column, appending the duplicate column at
462							the rightmost position (highest index). It will duplicate
463							column metadata as well. It will return the new index
464							position.
465
466							=item delete_column
467
468							Deletes one or more specified columns. Any remaining
469							columns rightwards will have their indices shifted
470							down appropriately. If you had identified one of the
471							shifted columns, you may need to re-find or calculate
472							its new index.
473
474							=item reorder_column
475
476							$Data->reorder_column($c,$b,$a,$a);
477
478							Reorders columns into the specified order. Provide the
479							new desired order of indices. Columns could be duplicated
480							or deleted using this method. The columns will adopt their
481							new index numbers.
482
483							=back
484
485							=head2 Row Data Access
486
487							Once a file Stream object has been opened, and metadata and/or
488							columns adjusted as necessary, then the file contents can be
489							iterated through, one row at a time. This is typically a one-way
490							direction. If you need to go back or start over, the easiest thing
491							to do is re-open the file as a new Stream object.
492
493							There are two main methods, L for reading and L
494							for writing. They cannot and should not be used on the same Stream
495							object.
496
497							=over 4
498
499							=item next_row
500
501							=item next_line
502
503							=item read_line
504
505							This method reads the next line in the file handle and returns a
506							L object. This object represents the
507							values in the current file row.
508
509							Note that strand values and 0-based start coordinates are automatically
510							converted to BioPerl conventions if required by the file type.
511
512							=item add_row
513
514							=item add_line
515
516							=item write_row
517
518							=item write_line
519
520							$Data->add_row(\@values);
521							$Data->add_row($Row); # Bio::ToolBox::Data::Feature object
522
523							This method writes a new row or line to a file handle. The first
524							time this method is called the file handle is automatically opened for
525							writing. Up to this point, columns may be manipulated. After this point,
526							columns cannot be adjusted (otherwise the file structure becomes
527							inconsistent).
528
529							This method may be implemented in one of three ways, based on the type
530							data that is passed.
531
532							=over 4
533
534							=item * A Feature object
535
536							A Feature object representing a row from another L
537							data table or Stream. The values from this object will be automatically
538							obtained. Modified strand and 0-based coordinates may be adjusted back
539							as necessary.
540
541							=item * An array reference of values
542
543							Pass an array reference of values. The number of elements should match the
544							number of expected columns. The values will be automatically joined using tabs.
545							This implementation should be used if you using values from another Stream
546							and the number of columns have been modified.
547
548							Manipulation of strand and 0-based starts may be performed if the
549							metadata indicates this should be done.
550
551							=item * A string
552
553							Pass a text string. This assumes the column values are already tab
554							concatenated. A new line character is appended if one is not included.
555							No data manipulation (strand or 0-based starts) or sanity checking of the
556							required number of columns is performed. Use with caution!
557
558							=back
559
560							=item iterate
561
562							$Stream->iterate( sub {
563							my $row = shift;
564							my $number = $row->value($index);
565							my $log_number = log($number);
566							$row->value($index, $log_number);
567							} );
568
569							A convenience method that will process a code reference for every line
570							in the file. Pass a subroutine or code reference. The subroutine will
571							receive the line as a L object, just as with
572							the L method.
573
574							=back
575
576							=head2 File Handle methods
577
578							The below methods work with the file handle. When you are finished with
579							a Stream, you should be kind and close the file handle properly.
580
581							=over 4
582
583							=item mode
584
585							Returns the write mode of the Stream object. Read-only objects
586							return false (0) and write-only Stream objects return true (1).
587
588							=item close_fh
589
590							Closes the file handle.
591
592							=item fh
593
594							Returns the L compatible file handle object representing
595							the file handle. Use with caution.
596
597							=back
598
599							=head1 SEE ALSO
600
601							L, L
602
603							=cut
604
605	2			2		1813	use strict;
	2					5
	2					80
606	2			2		12	use Carp qw(carp cluck croak confess);
	2					5
	2					135
607	2			2		17	use base 'Bio::ToolBox::Data::core';
	2					3
	2					878
608	2			2		871	use Bio::ToolBox::Data::Feature;
	2					8
	2					3645
609
610							1;
611
612
613							#### Initialize ####
614
615							sub new {
616	28			28	1	854	my $class = shift;
617	28					82	my %args = @_;
618
619							# file arguments
620	28		50			89	$args{in} \|\|= $args{file} \|\| undef;
			66
621	28		100			146	$args{out} \|\|= undef;
622	28	50	66			73	unless ($args{in} or $args{out}) {
623	0					0	cluck "a filename must be specified with 'in' or 'out' argument keys!\n";
624	0					0	return;
625							}
626	28	50	66			150	if (defined $args{in} and defined $args{out}) {
627	0					0	cluck "cannot define both 'in' and 'out' arguments!\n";
628	0					0	return;
629							}
630	28		50			119	$args{noheader} \|\|= 0;
631
632							# prepare object
633	28					122	my $self = $class->SUPER::new();
634
635							# open an existing file for reading
636	28	100				93	if ($args{in}) {
		50
637
638							# check and open file
639	26					113	my $filename = $self->check_file($args{in});
640	26	50				81	unless ($filename) {
641	0					0	carp sprintf "file '%s' does not exist!", $args{in};
642	0					0	return;
643							}
644	26					123	$self->add_file_metadata($filename);
645	26	50				97	$self->open_to_read_fh or return;
646	26					93	$self->{mode} = 0; # read mode
647
648							# parse column headers
649	26					143	$self->parse_headers($args{noheader});
650	26					62	$self->{line_count} = $self->{header_line_count};
651
652							# push a dummy row, this will get tossed when the first next_row() is called
653	26					73	$self->{data_table}->[1] = $self->{'column_names'};
654							}
655
656							# prepare to write to a new stream
657							elsif ($args{out}) {
658
659							# add file name information
660	2					9	$self->add_file_metadata($args{out});
661
662							# we will not open the file handle quite yet in case the user
663							# wants to modify metadata
664	2					5	$self->{mode} = 1; # set to write mode
665	2					4	$self->{fh} = undef;
666
667							# get names of columns user may have passed
668	2					4	my @columns;
669	2	100				9	if (exists $args{columns}) {
		50
670	1					2	@columns = @{ $args{columns} };
	1					4
671							}
672							elsif (exists $args{datasets}) {
673	0					0	@columns = @{ $args{datasets} };
	0					0
674							}
675
676							# add the column names
677	2	100	33			13	if (@columns) {
		50	33
		50	0
		0
678	1					3	foreach my $c (@columns) {
679	4					9	$self->add_column($c);
680							}
681							}
682							elsif (exists $args{gff} and $args{gff}) {
683							# use standard names for the number of columns indicated
684							# we trust that the user knows the subtle difference between gff versions
685	0					0	$self->add_gff_metadata($args{gff});
686	0	0				0	unless ($self->extension =~ /g[tf]f/) {
687							$self->{extension} = $args{gff} == 2.5 ? '.gtf' :
688	0	0				0	$args{gff} == 3 ? '.gff3' : '.gff';
		0
689							}
690							}
691							elsif (exists $args{bed} and $args{bed}) {
692							# use standard names for the number of columns indicated
693	1	50	33			10	unless ($args{bed} =~ /^\d{1,2}$/ and $args{bed} >= 3) {
694	0					0	carp "bed parameter must be an integer 3-12!";
695	0					0	return;
696							}
697	1					6	$self->add_bed_metadata($args{bed});
698	1	50				4	unless ($self->extension =~ /bed\|peak/) {
699	0					0	$self->{extension} = '.bed';
700							}
701							}
702							elsif (exists $args{ucsc} and $args{ucsc}) {
703							# a ucsc format such as refFlat, genePred, or genePredExt
704	0					0	my $u = $self->add_ucsc_metadata($args{ucsc});
705	0	0				0	unless ($u) {
706	0					0	carp "unrecognized number of columns for ucsc format!";
707	0					0	return;
708							};
709	0	0				0	unless ($self->extension =~ /ucsc\|ref+lat\|genepred/) {
710	0					0	$self->{extension} = '.ucsc';
711							}
712							}
713							# else it will be an empty object with no columns
714
715							# append gz if necessary
716	2	0	33			6	if (exists $args{gz} and $args{gz} and $self->extension !~ /gz$/) {
			0
717	0					0	$self->{extension} .= '.gz';
718							}
719
720							# rebuild the filename after modifying the extension
721	2					10	$self->{filename} = $self->{path} . $self->{basename} . $self->{extension};
722
723							# add feature
724	2		50			47	$args{feature} \|\|= $args{features} \|\| undef;
			33
725	2	100				8	$self->feature($args{feature}) unless $self->feature;
726							}
727
728	28					177	return $self;
729							}
730
731
732							sub duplicate {
733	1			1	1	416	my ($self, $filename) = @_;
734	1	50				4	unless ($filename) {
735	0					0	carp "a new filename must be provided!";
736	0					0	return;
737							}
738	1	50				5	if ($filename eq $self->filename) {
739	0					0	carp "provided filename is not unique from that in metadata!";
740	0					0	return;
741							}
742
743							# duplicate the data structure
744	1					4	my $columns = $self->list_columns;
745	1	50				4	my $Dup = $self->new(
746							'out' => $filename,
747							'columns' => $columns,
748							) or return;
749
750							# copy the metadata
751	1					4	for (my $i = 0; $i < $self->number_columns; $i++) {
752							# column metadata
753	4					12	my %md = $self->metadata($i);
754	4					15	$Dup->{$i} = \%md;
755							}
756	1					4	foreach (qw(feature program db bed gff vcf ucsc headers)) {
757							# various keys
758	8					15	$Dup->{$_} = $self->{$_};
759							}
760	1					7	my @comments = $self->comments;
761	1					2	push @{$Dup->{comments}}, @comments;
	1					4
762
763	1					4	return $Dup;
764							}
765
766
767
768							### Column manipulation
769
770							sub add_column {
771	4			4	1	9	my ($self, $name) = @_;
772	4	50				22	return unless $name;
773	4	50				11	unless ($self->mode) {
774	0					0	cluck "We have a read-only Stream object, cannot add columns";
775	0					0	return;
776							}
777	4	50				16	if (defined $self->{fh}) {
778							# Stream file handle is opened
779	0					0	cluck "Cannot modify columns when a Stream file handle is opened!";
780	0					0	return;
781							}
782
783	4					12	my $column = $self->number_columns;
784	4					14	$self->{$column} = {
785							'name' => $name,
786							'index' => $column,
787							};
788	4					8	$self->{data_table}->[0][$column] = $name;
789	4					15	$self->{number_columns}++;
790	4	50				10	delete $self->{column_indices} if exists $self->{column_indices};
791	4	50	33			10	if ($self->gff or $self->bed or $self->ucsc or $self->vcf) {
			33
			33
792							# check if we maintain integrity, at least insofar what we test
793	0					0	$self->verify(1); # silence so user doesn't get these messages
794							}
795	4					10	return $column;
796							}
797
798							sub copy_column {
799	0			0	1	0	my $self = shift;
800	0	0				0	unless ($self->mode) {
801	0					0	confess "We have a read-only Stream object, cannot add columns";
802							}
803	0	0				0	if (defined $self->{fh}) {
804							# Stream file handle is opened
805	0					0	confess "Cannot modify columns when a Stream file handle is opened!";
806							}
807	0					0	my $index = shift;
808	0	0				0	return unless defined $index;
809
810	0					0	my $new_index = $self->add_column( $self->name($index) );
811	0					0	$self->copy_metadata($index, $new_index);
812	0					0	return $new_index;
813							}
814
815
816
817							#### Row Access ####
818
819							next_line = read_line = \&next_row;
820
821							sub next_row {
822	201			201	1	319	my $self = shift;
823	201	50				402	if ($self->{mode}) {
824	0					0	confess "Stream object is write-only! cannot read";
825							}
826
827							# read and add the next line in the file
828	201	100				3958	my $line = $self->{fh}->getline or return;
829	196					4472	$self->{line_count}++;
830	196	50				504	if (substr($line,0,1) eq '#') {
831							# we shouldn't have internal comment lines, but just in case....
832							# could be a gff3 pragma
833	0					0	$self->add_comment($line);
834	0					0	return $self->next_row;
835							}
836
837							# add the current line to the data table as row 1
838	196					260	pop @{ $self->{data_table} }; # remove the old line
	196					325
839	196					657	$self->add_data_line($line);
840
841							# return the feature
842	196					515	return Bio::ToolBox::Data::Feature->new(
843							'data' => $self,
844							'index' => 1,
845							);
846							}
847
848
849							add_row = add_line = *write_line = \&write_row;
850
851							sub write_row {
852	78			78	1	239	my $self = shift;
853	78					110	my $data = shift;
854	78	50				153	unless ($self->{mode}) {
855	0					0	confess "Stream object is read-only! cannot write";
856							}
857
858							# open the file handle if it hasn't been opened yet
859	78	100				150	unless (defined $self->{fh}) {
860							# we first write a standard empty data file with metadata and headers
861	2					8	my $newfile = $self->write_file($self->filename);
862	2	50				7	unless ($newfile) {
863	0					0	die "unable to write file!";
864							}
865
866							# just in case the filename is changed when writing the file
867	2	50				8	if ($newfile ne $self->filename) {
868	0					0	$self->add_file_metadata($newfile);
869							}
870
871							# then we re-open the file for appending
872	2	50				8	my $fh = $self->open_to_write_fh($newfile, undef, 1) or
873							die "unable to append to file $newfile!";
874	2					7	$self->{fh} = $fh;
875							}
876
877							# identify what kind of data we are dealing with
878	78					127	my $data_ref = ref $data;
879	78	100				155	if ($data_ref eq 'Bio::ToolBox::Data::Feature') {
		50
880							# user passed a Feature object
881	39					92	$self->{fh}->print( join("\t", ($data->row_values)), "\n" );
882							}
883							elsif ($data_ref eq 'ARRAY') {
884							# user passed an array of values
885	39					129	$self->{fh}->print( join("\t", @$data), "\n");
886							}
887							else {
888							# assume the passed data is a string
889							# make sure it has a newline
890	0	0				0	unless ($data =~ /\n$/) {
891	0					0	$data .= "\n";
892							}
893	0					0	$self->{fh}->print($data);
894							}
895	78					601	return 1;
896							}
897
898							sub iterate {
899	0			0	1	0	my $self = shift;
900	0					0	my $code = shift;
901	0	0				0	unless (ref $code eq 'CODE') {
902	0					0	cluck "iterate_function() method requires a code reference!";
903	0					0	return;
904							}
905	0					0	while (my $row = $self->next_row) {
906	0					0	&$code($row);
907							}
908	0					0	return 1;
909							}
910
911
912
913
914							#### File handle ####
915
916							sub mode {
917	4			4	1	7	my $self = shift;
918	4					11	return $self->{mode};
919							}
920
921							sub DESTROY {
922	28			28		26989	my $self = shift;
923	28					116	$self->close_fh;
924							}
925
926
927							####################################################
928
929							__END__