File Coverage

lib/PDL/IO/Misc.pd

Criterion	Covered	Total	%
statement	321	472	68.0
branch	149	290	51.3
condition	47	97	48.4
subroutine	22	24	91.6
pod	3	7	42.8
total	542	890	60.9

line	stmt	bran	cond	sub	pod	time	code
1							# and endian-ness of machine
2							require PDL::Core::Dev;
3							my $isbigendian = PDL::Core::Dev::isbigendian();
4
5							use strict;
6							use warnings;
7							pp_addpm({At=>'Top'},<<'EOD');
8	22			22		209	use strict;
	22					49
	22					920
9	22			22		110	use warnings;
	22					48
	22					1469
10
11							=head1 NAME
12
13							PDL::IO::Misc - misc IO routines for PDL
14
15							=head1 DESCRIPTION
16
17							Some basic I/O functionality: tables, byte-swapping
18
19							=head1 SYNOPSIS
20
21							use PDL::IO::Misc;
22
23							=cut
24
25							EOD
26
27							pp_addpm({At=>'Bot'},<<'EOD');
28
29							=head1 AUTHOR
30
31							Copyright (C) Karl Glazebrook 1997, Craig DeForest 2001,
32							2003, and Chris Marshall 2010. All rights reserved. There is
33							no warranty. You are allowed to redistribute this software
34							/ documentation under certain conditions. For details, see
35							the file COPYING in the PDL distribution. If this file is
36							separated from the PDL distribution, the copyright notice
37							should be included in the file.
38
39							=cut
40
41							EOD
42
43							pp_add_exported('',"rcols wcols swcols rgrep");
44
45							############################## PM CODE ########################################
46
47							pp_addpm(<<'!NO!SUBS!');
48	22			22		146
	22					56
	22					152
49	22			22		180	use PDL::Primitive;
	22					42
	22					4288
50	22			22		152	use PDL::Types;
	22					40
	22					1586
51	22			22		130	use PDL::Options;
	22					40
	22					202
52	22			22		183	use PDL::Bad;
	22					70
	22					1704
53	22			22		8696	use Carp;
	22					24080
	22					1692
54	22			22		166	use Symbol qw/ gensym /;
	22					39
	22					17595
55							use List::Util;
56
57							!NO!SUBS!
58
59							sub defswap {
60							my ($bytes, $doc) = @_;
61							my $sized_type = "struct {char n[$bytes];}";
62							pp_def(
63							"bswap$bytes",
64							Pars => '[io] x()',
65							RedoDimsCode => <
66							if (sizeof(\$GENERIC(x)) < $bytes)
67							\$CROAK("Tried to bswap size $bytes but data type only %zd bytes", sizeof(\$GENERIC(x)));
68							EOF
69							Code => <
70							int i;
71							int n = sizeof(\$x()) / $bytes;
72							$sized_type aa = (void ) &\$x();
73							for (i=0;i
74							$sized_type bb; memcpy(&bb, aa+i, $bytes);
75							PDL_Byte a = (PDL_Byte) (void*) (aa+i);
76							PDL_Byte b = (PDL_Byte) &bb;
77							@{[map "a[$_] = b[".($bytes-$_-1)."];", 0..$bytes-1]}
78							}
79							EOF
80							Doc => $doc,
81							);
82							}
83							defswap(2, "Swaps pairs of bytes in argument x()");
84							defswap(4, "Swaps quads of bytes in argument x()");
85							defswap(8, "Swaps octets of bytes in argument x()");
86							defswap(12, "Swaps 12s of bytes in argument x()");
87							defswap(16, "Swaps 16s of bytes in argument x()");
88							defswap(24, "Swaps 24s of bytes in argument x()");
89							defswap(32, "Swaps 32s of bytes in argument x()");
90
91							pp_addpm(<<'!NO!SUBS!');
92
93							# Internal routine to extend PDL array by size $n along last dimension
94							# - Would be nice to have a proper extend function rather than hack
95							# - Is a NO-OP when handed a perl ARRAY ref rather than an ndarray arg
96	30			30		43	sub _ext_lastD { # Called by rcols and rgrep
97	30	50				59	my ($x,$n) = @_;
98	30					71	if (ref($_[0]) ne 'ARRAY') {
99	30					35	my @nold = $x->dims;
100	30					30	my @nnew = @nold;
101	30					55	$nnew[-1] += $n; # add $n to the last dimension
102	30					300	my $y = zeroes($x->type,@nnew); # New pdl
103	30					151	my $yy = $y->mv(-1,0)->slice("0:".($nold[-1]-1))->mv(0,-1);
104	30					146	$yy .= $x;
105							$_[0] = $y;
106	30					65	}
107							1;
108							}
109
110							# Implements PDL->at() for either 1D PDL or ARRAY arguments
111							# TODO: Need to add support for multidim ndarrays parallel to rcols
112	32			32		22	sub _at_1D ($$) { # Called by wcols and swcols
113	32					22	my $data = $_[0];
114	32	50				32	my $index = $_[1];
115	0					0	if (ref $data eq 'ARRAY') {
116							return $data->[$index];
117	32					53	} else {
118							return $data->at($index);
119							}
120							}
121
122							# squeezes "fluffy" perl list values into column data type
123	59			59		69	sub _burp_1D {
124	59					54	my $data = $_[0][0];
125	59					56	my $databox = $_[0][1];
126	59					51	my $index = $_[1];
	59					73
127	59	50				104	my $start = $index - @{$databox} + 1;
		100
128	0					0	if (ref $data eq 'ARRAY') {
	0					0
	0					0
129							push @{$data}, @{$databox};
130							} elsif ( ref($databox->[0]) eq "ARRAY" ) {
131							# could add POSIX::strtol for hex and octal support but
132	4					20	# can't break float conversions (how?)
133							$data->slice(":,$start:$index") .= pdl($data->type, $databox);
134							} else {
135							# could add POSIX::strtol for hex and octal support but
136	55					176	# can't break float conversions (how?)
137							$data->slice("$start:$index") .= pdl($data->type, $databox);
138	59					387	}
139							$_[0] = [ $data, [] ];
140							}
141
142							# taken outside of rcols() to avoid clutter
143	12			12		25	sub _handle_types ($$$) {
144	12					14	my $ncols = shift;
145	12					14	my $deftype = shift;
146							my $types = shift;
147	12	50				37
148							barf "Unknown PDL type given for DEFTYPE.\n"
149							unless ref($deftype) eq "PDL::Type";
150	12	50				35
151							my @cols = ref($types) eq "ARRAY" ? @$types : ();
152	12	100				27
153							if ( $#cols > -1 ) {
154	2	50				20	# truncate if required
155							$#cols = $ncols if $#cols > $ncols;
156
157	2					6	# check input values are sensible
158	2	50				7	for ( 0 .. $#cols ) {
159							barf "Unknown value '$cols[$_]' in TYPES array.\n"
160							unless ref($cols[$_]) eq "PDL::Type";
161							}
162							}
163
164	12					80	# fill in any missing columns
	20					25
165							for ( ($#cols+1) .. $ncols ) { push @cols, $deftype; }
166	12					32
167							return @cols;
168							} # sub: _handle_types
169
170	22			22		177
	22					42
	22					4368
171							# Whether an object is an IO handle
172	17			17		29	use Scalar::Util;
173							sub _is_io_handle {
174							my $h = shift;
175	17					42	# reftype catches almost every handle, except: *MYHANDLE
176	17		66			155	# fileno catches *MYHANDLE, but doesn't catch handles that aren't files
177							my $reftype = Scalar::Util::reftype($h);
178							return defined(fileno($h)) \|\| (defined($reftype) && $reftype eq 'GLOB');
179							}
180
181
182							=head2 rcols
183
184							=for ref
185
186							Read specified ASCII cols from a file into ndarrays and perl
187							arrays (also see L).
188
189							=for usage
190
191							Usage:
192							($x,$y,...) = rcols( *HANDLE\|"filename", { EXCLUDE => '/^!/' }, $col1, $col2, ... )
193							$x = rcols( *HANDLE\|"filename", { EXCLUDE => '/^!/' }, [] )
194							($x,$y,...) = rcols( *HANDLE\|"filename", $col1, $col2, ..., { EXCLUDE => '/^!/' } )
195							($x,$y,...) = rcols( *HANDLE\|"filename", "/foo/", $col1, $col2, ... )
196
197							For each column number specified, a 1D output PDL will be
198							generated. Anonymous arrays of column numbers generate
199							2D output ndarrays with dim0 for the column data and dim1
200							equal to the number of columns in the anonymous array(s).
201
202							An empty anonymous array as column specification will
203							produce a single output data ndarray with dim(1) equal
204							to the number of columns available.
205
206							There are two calling conventions - the old version, where a
207							pattern can be specified after the filename/handle, and the
208							new version where options are given as as hash reference.
209							This reference can be given as either the second or last
210							argument.
211
212							The default behaviour is to ignore lines beginning with
213							a # character and lines that only consist of whitespace.
214							Options exist to only read from lines that match, or do
215							not match, supplied patterns, and to set the types of the
216							created ndarrays.
217
218							Can take file name or *HANDLE, and if no explicit column
219							numbers are specified, all are assumed. For the allowed types,
220							see L.
221
222							Options (case insensitive):
223
224							EXCLUDE or IGNORE
225							- ignore lines matching this pattern (default B<'/^#/'>).
226
227							INCLUDE or KEEP
228							- only use lines which match this pattern (default B<''>).
229
230							LINES
231							- a string pattern specifying which line numbers to use.
232							Line numbers start at 0 and the syntax is 'a:b:c' to use
233							every c'th matching line between a and b (default B<''>).
234
235							DEFTYPE
236							- default data type for stored data (if not specified, use the type
237							stored in C<$PDL::IO::Misc::deftype>, which starts off as B).
238
239							TYPES
240							- reference to an array of data types, one element for each column
241							to be read in. Any missing columns use the DEFTYPE value (default B<[]>).
242
243							COLSEP
244							- splits on this string/pattern/qr{} between columns of data. Defaults to
245							$PDL::IO::Misc::defcolsep.
246
247							PERLCOLS
248							- an array of column numbers which are to be read into perl arrays
249							rather than ndarrays. Any columns not specified in the explicit list
250							of columns to read will be returned after the explicit columns.
251							(default B).
252
253							COLIDS
254							- if defined to an array reference, it will be assigned the column
255							ID values obtained by splitting the first line of the file in the
256							identical fashion to the column data.
257
258							CHUNKSIZE
259							- the number of input data elements to batch together before appending
260							to each output data ndarray (Default value is 100). If CHUNKSIZE is
261							greater than the number of lines of data to read, the entire file is
262							slurped in, lines split, and perl lists of column data are generated.
263							At the end, effectively pdl(@column_data) produces any result ndarrays.
264
265							VERBOSE
266							- be verbose about IO processing (default C<$PDL::vebose>)
267
268							=for example
269
270							For example:
271
272							$x = PDL->rcols 'file1'; # file1 has only one column of data
273							$x = PDL->rcols 'file2', []; # file2 can have multiple columns, still 1 ndarray output
274							# (empty array ref spec means all possible data fields)
275
276							($x,$y) = rcols 'table.csv', { COLSEP => ',' }; # read CSV data file
277							($x,$y) = rcols *STDOUT; # default separator for lines like '32 24'
278
279							# read in lines containing the string foo, where the first
280							# example also ignores lines that begin with a # character.
281							($x,$y,$z) = rcols 'file2', 0,4,5, { INCLUDE => '/foo/' };
282							($x,$y,$z) = rcols 'file2', 0,4,5, { INCLUDE => '/foo/', EXCLUDE => '' };
283
284							# ignore the first 27 lines of the file, reading in as ushort's
285							($x,$y) = rcols 'file3', { LINES => '27:-1', DEFTYPE => ushort };
286							($x,$y) = rcols 'file3', { LINES => '27:', TYPES => [ ushort, ushort ] };
287
288							# read in the first column as a perl array and the next two as ndarrays
289							# with the perl column returned after the ndarray outputs
290							($x,$y,$name) = rcols 'file4', 1, 2 , { PERLCOLS => [ 0 ] };
291							printf "Number of names read in = %d\n", 1 + $#$name;
292
293							# read in the first column as a perl array and the next two as ndarrays
294							# with PERLCOLS changing the type of the first returned value to perl list ref
295							($name,$x,$y) = rcols 'file4', 0, 1, 2, { PERLCOLS => [ 0 ] };
296
297							# read in the first column as a perl array returned first followed by the
298							# the next two data columns in the file as a single Nx2 ndarray
299							($name,$xy) = rcols 'file4', 0, [1, 2], { PERLCOLS => [ 0 ] };
300
301
302							NOTES:
303
304							1. Quotes are required on patterns or use the qr{} quote regexp syntax.
305
306							2. Columns are separated by whitespace by default, use the COLSEP option
307							separator to specify an alternate split pattern or string or specify an
308							alternate default separator by setting C<$PDL::IO::Misc::defcolsep> .
309
310							3. Legacy support is present to use C<$PDL::IO::Misc::colsep> to set the
311							column separator but C<$PDL::IO::Misc::colsep> is not defined by default.
312							If you set the variable to a defined value it will get picked up.
313
314							4. LINES => '-1:0:3' may not work as you expect, since lines are skipped
315							when read in, then the whole array reversed.
316
317							5. For consistency with wcols and rcols 1D usage, column data is loaded
318							into the rows of the pdls (i.e., dim(0) is the elements read per column
319	22			22		170	in the file and dim(1) is the number of columns of data read.
	22					54
	22					61027
320
321							=cut
322
323							use vars qw/ $colsep $defcolsep $deftype /;
324
325							$defcolsep = ' '; # Default column separator
326							$deftype = double; # Default type for ndarrays
327
328							my $defchunksize = 100; # Number of perl list items to append to ndarray
329							my $usecolsep; # This is the colsep value that is actually used
330
331							# NOTE: XXX
332							# need to look at the line-selection code. For instance, if want
333							# lines => '-1:0:3',
334							# read in all lines, reverse, then apply the step
335							# -> fix point 4 above
336	11			11	1	2567	#
337							# perhaps should just simplify the LINES option - ie remove
338							# support for reversed arrays?
339	12			12	0	27
340	12	50				53	sub rcols{ PDL->rcols(@_) }
341
342							sub PDL::rcols {
343	12					49	my $class = shift;
344	12	100				69	barf 'Usage ($x,$y,...) = rcols( *HANDLE\|"filename", ["/pattern/" or \%options], $col1, $col2, ..., [ \%options] )'
345	12	100	50			813	if $#_<0;
346	12					32
347							my $is_handle = _is_io_handle $_[0];
348							my $fh = $is_handle ? $_[0] : gensym;
349	12					255	open $fh, $_[0] or die "File $_[0] not found\n" unless $is_handle;
350							shift;
351
352							# set up default options
353							my $opt = PDL::Options->new( {
354							CHUNKSIZE => undef,
355							COLIDS => undef,
356							COLSEP => undef,
357							DEFTYPE => $deftype,
358							EXCLUDE => '/^#/',
359							INCLUDE => undef,
360							LINES => '',
361	12					76	PERLCOLS => undef,
362							TYPES => [],
363							VERBOSE=> $PDL::verbose,
364	12	100				74	} );
365							$opt->synonyms( { IGNORE => 'EXCLUDE', KEEP => 'INCLUDE' } );
366	10	100				78
	1	50				7
		100
367	0					0	# has the user supplied any options
368	2					12	if ( defined($_[0]) ) {
369							# ensure the old-style behaviour by setting the exclude pattern to undef
370							if ( $_[0] =~ m\|^/.*/$\| ) { $opt->options( { EXCLUDE => undef, INCLUDE => shift } ); }
371							elsif ( ref($_[0]) eq "Regexp" ) { $opt->options( { EXCLUDE => undef, INCLUDE => shift } ); }
372	12	100	100			71	elsif ( ref($_[0]) eq "HASH" ) { $opt->options( shift ); }
373							}
374
375	12					36	# maybe the last element is a hash array as well
376							$opt->options( pop ) if defined($_[-1]) and ref($_[-1]) eq "HASH";
377
378	12	50				35	# a reference to a hash array
379	12	100				61	my $options = $opt->current();
380
381							# handle legacy colsep variable
382	12					22	$usecolsep = (defined $colsep) ? qr{$colsep} : undef;
383	24	100	100			86	$usecolsep = qr{$options->{COLSEP}} if $options->{COLSEP};
384	12	50				63
385	12					72	# what are the patterns?
386	12					181	foreach my $pattern ( qw( INCLUDE EXCLUDE ) ) {
387							if ( $options->{$pattern} and ref($options->{$pattern}) ne "Regexp" ) {
388	0					0	if ( $options->{$pattern} =~ m\|^/.*/$\| ) {
389							$options->{$pattern} =~ s\|^/(.*)/$\|$1\|;
390							$options->{$pattern} = qr($options->{$pattern});
391							} else {
392							barf "rcols() - unable to process $pattern value.\n";
393							}
394	12		33			49	}
395	12					24	}
396
397							# CHUNKSIZE controls memory/time tradeoff of ndarray IO
398	12					22	my $chunksize = $options->{CHUNKSIZE} \|\| $defchunksize;
399							my $nextburpindex = -1;
400	12					18
401	12	50				27	# which columns are to be read into ndarrays and which into perl arrays?
	0					0
402							my @end_perl_cols = (); # unique perl cols to return at end
403	12					13
404	12					22	my @perl_cols = (); # perl cols index list from PERLCOLS option
	0					0
405							@perl_cols = @{ $$options{PERLCOLS} } if $$options{PERLCOLS};
406
407	12					22	my @is_perl_col; # true if index corresponds to a perl column
408							for (@perl_cols) { $is_perl_col[$_] = 1; };
409							# print STDERR "rcols: \@is_perl_col is @is_perl_col\n";
410
411							my ( @explicit_cols ) = @_; # call specified columns to read
412	12					15	# print STDERR "rcols: \@explicit_cols is @explicit_cols\n";
413	12	100				32
414	1	50				8	# work out which line numbers are required
		0
415	1					2	# - the regexp's are a bit over the top
	1					3
416							my ( $x, $y, $c );
417	0					0	if ( $$options{LINES} ne '' ) {
	0					0
	0					0
418							if ( $$options{LINES} =~ /^\s([+-]?\d)\s:\s([+-]?\d)\s$/ ) {
419	0					0	$x = $1; $y = $2;
420							} elsif ( $$options{LINES} =~ /^\s([+-]?\d)\s:\s([+-]?\d)\s:\s([+]?\d)\s*$/ ) {
421							$x = $1; $y = $2; $c = $3;
422							} else {
423							barf "rcols() - unable to parse LINES option.\n";
424	12					20	}
425	12	100	66			32	}
426	12	100	66			24
427	12		50			30	# Since we do not know how many lines there are in advance, things get a bit messy
428							my ( $index_start, $index_end ) = ( 0, -1 );
429							$index_start = $x if defined($x) and $x ne '';
430							$index_end = $y if defined($y) and $y ne '';
431							my $line_step = $c \|\| 1;
432	12					23
433	12	100	66			70	# $line_rev = 0/1 for normal order/reversed
		50	33
		0	0
		0	0
		0
434							# $line_start/_end refer to the first and last line numbers that we want
435	11					15	# (the values of which we may not know until we've read in all the file)
	11					14
436							my ( $line_start, $line_end, $line_rev );
437							if ( ($index_start >= 0 and $index_end < 0) ) {
438	1					1	# eg 0:-1
	1					2
439							$line_rev = 0; $line_start = $index_start;
440							} elsif ( $index_end >= 0 and $index_start < 0 ) {
441	0					0	# eg -1:0
	0					0
	0					0
442							$line_rev = 1; $line_start = $index_end;
443							} elsif ( $index_end >= $index_start and $index_start >= 0 ) {
444	0					0	# eg 0:10
	0					0
	0					0
445							$line_rev = 0; $line_start = $index_start; $line_end = $index_end;
446							} elsif ( $index_start > $index_end and $index_end >= 0 ) {
447	0					0	# eg 10:0
448							$line_rev = 1; $line_start = $index_end; $line_end = $index_start;
449							} elsif ( $index_start <= $index_end ) {
450	0					0	# eg -5:-1
451							$line_rev = 0;
452							} else {
453	12					9	# eg -1:-5
454							$line_rev = 1;
455	12					14	}
456
457	12					13	my @ret;
458	12					14
459	12					12	my ($k,$fhline);
460	12					14
461	12					19	my $line_num = -1;
462							my $line_ctr = $line_step - 1; # ensure first line is always included
463	12					14	my $index = -1;
464							my $pdlsize = 0;
465							my $extend = 10000;
466
467	12	50				11	my $line_store; # line numbers of saved data
	12					21
468	0	0				0
469	0					0	RCOLS_IO: {
470	0	0				0
471	0					0	if ($options->{COLIDS}) {
472	0	0				0	print STDERR "rcols: processing COLIDS option\n" if $options->{VERBOSE};
473	0					0	undef $!;
	0					0
474							if (defined($fhline = <$fh>) ) { # grab first line's fields for column IDs
475	0	0				0	$fhline =~ s/\r?\n$//; # handle DOS on unix files better
476	0					0	my @v = defined($usecolsep) ? split($usecolsep,$fhline) : split(' ',$fhline);
477							@{$options->{COLIDS}} = @v;
478							} else {
479							die "rcols: reading COLIDS info, $!" if $!;
480	12					208	last RCOLS_IO;
481							}
482							}
483	55					294
484							while( defined($fhline = <$fh>) ) {
485	55					62
486							# chomp $fhline;
487							$fhline =~ s/\r?\n$//; # handle DOS on unix files better
488
489							$line_num++;
490
491							# the order of these checks is important, particularly whether we
492							# check for line_ctr before or after the pattern matching
493							# Prior to PDL 2.003 the line checks were done BEFORE the
494	55	50	33			69	# pattern matching
495	55	50	33			132	#
496	55	100	100			297	# need this first check, even with it almost repeated at end of loop,
497	47	100	100			132	# incase the pattern matching excludes $line_num == $line_end, say
498	41	50				66	last if defined($line_end) and $line_num > $line_end;
499	41					34	next if defined($line_start) and $line_num < $line_start;
500							next if $options->{EXCLUDE} and $fhline =~ /$options->{EXCLUDE}/;
501	41					31	next if $options->{INCLUDE} and not $fhline =~ /$options->{INCLUDE}/;
502	41	100				105	next unless ++$line_ctr == $line_step;
503							$line_ctr = 0;
504
505	41	100				62	$index++;
	90					185
506							my @v = defined($usecolsep) ? split($usecolsep,$fhline) : split(' ',$fhline);
507
508							# map empty fields '' to undef value
509	41	100				61	@v = map { $_ eq '' ? undef : $_ } @v;
510
511							# if the first line, set up the output ndarrays using all the columns
512	11	100				30	# if the user doesn't specify anything
513	3					8	if ( $index == 0 ) {
514
515	11	100	66			42	# Handle implicit multicolumns in command line
516	2	100				236	if ($#explicit_cols < 0) { # implicit single col data
	2					8
517	1					5	@explicit_cols = ( 0 .. $#v );
518							}
519							if (scalar(@explicit_cols)==1 and ref($explicit_cols[0]) eq "ARRAY") {
520	11					22	if ( !scalar(@{$explicit_cols[0]}) ) { # implicit multi-col data
521	11					19	@explicit_cols = ( [ 0 .. $#v ] );
522	11					19	}
523	20	100				28	}
524	2	50				7	my $implicit_pdls = 0;
525							my $is_explicit = {};
526	18					45	foreach my $col (@explicit_cols) {
527							if (ref($col) eq "ARRAY") {
528							$implicit_pdls++ if !scalar(@$col);
529	11	50				29	} else {
530	0					0	$is_explicit->{$col} = 1;
531							}
532	11					17	}
533	20	50	66			48	if ($implicit_pdls > 1) {
534	0					0	die "rcols: only one implicit multicolumn ndarray spec allowed, found $implicit_pdls!\n";
	0					0
535							}
536							foreach my $col (@explicit_cols) {
537							if (ref($col) eq "ARRAY" and !scalar(@$col)) {
538							@$col = grep { !$is_explicit->{$_} } ( 0 .. $#v );
539	11					12	}
540	11					17	}
541	11					19
542							# remove declared perl columns from pdl data list
543	20	100				46	$k = 0;
		50
544	2					4	my @pdl_cols = ();
	5					11
	2					3
545	2					2	foreach my $col (@explicit_cols) {
	2					5
546							# strip out declared perl cols so they won't be read into ndarrays
547	18					28	if ( ref($col) eq "ARRAY" ) {
548							@$col = grep { !$is_perl_col[$_] } @{$col};
549							push @pdl_cols, [ @{$col} ];
550							} elsif (!$is_perl_col[$col]) {
551	11					16	push @pdl_cols, $col;
552	11					20	}
553	20	50	66			54	}
554	0					0	# strip out perl cols in explicit col list for return at end
	0					0
555							@end_perl_cols = @perl_cols;
556							foreach my $col (@explicit_cols) {
557							if ( ref($col) ne "ARRAY" and defined($is_perl_col[$col]) ) {
558							@end_perl_cols = grep { $_ != $col } @end_perl_cols;
559	11					59	}
560	11	50				28	};
561	0					0
562	0					0	# sort out the types of the ndarrays
563	0					0	my @types = _handle_types( $#pdl_cols, $$options{DEFTYPE}, $$options{TYPES} );
564							if ( $options->{VERBOSE} ) { # dbg aid
565	0					0	print "Reading data into ndarrays of type: [ ";
566							foreach my $t ( @types ) {
567							print $t->shortctype() . " ";
568	11					15	}
569	11					118	print "]\n";
570							}
571
572	20	100				35	$k = 0;
573							for (@explicit_cols) {
574	2					5	# Using mixed list+ndarray data structure for performance tradeoff
	2					120
575							# between memory usage (perl list) and speed of IO (PDL operations)
576	18	50				242	if (ref($_) eq "ARRAY") {
577	18	50				42	# use multicolumn ndarray here
578							push @ret, [ $class->zeroes($types[$k++],scalar(@{$_}),1), [] ];
579							} else {
580	11					20	push @ret, ($is_perl_col[$_] ? [ [], [] ] : [ $class->zeroes($types[$k],1), [] ]);
	0					0
581							$k++ unless $is_perl_col[$_];
582	11					69	}
583							}
584							for (@end_perl_cols) { push @ret, [ [], [] ]; }
585
586	41					43	$line_store = [ $class->zeroes(long,1), [] ]; # only need to store integers
587	41	100				56	}
588	10					25
	28					42
589	10					18	# if necessary, extend PDL in buffered manner
590							$k = 0;
591							if ( $pdlsize < $index ) {
592							for (@ret, $line_store) { _ext_lastD( $_->[0], $extend ); }
593	41					60	$pdlsize += $extend;
594	41					46	}
595	73	100				88
596	9					7	# - stick perl arrays onto end of $ret
	9					47
597							$k = 0;
598	64					50	for (@explicit_cols, @end_perl_cols) {
	64					153
599							if (ref($_) eq "ARRAY") {
600							push @{ $ret[$k++]->[1] }, [ @v[ @$_ ] ];
601							} else {
602							push @{ $ret[$k++]->[1] }, $v[$_];
603	41					37	}
	41					51
604							}
605
606	41	100				56	# store the line number
607	11					17	push @{$line_store->[1]}, $line_num;
	31					51
608	11					16
609							# need to burp out list if needed
610							if ( $index >= $nextburpindex ) {
611							for (@ret, $line_store) { _burp_1D($_,$index); }
612	41	50	33			259	$nextburpindex = $index + $chunksize;
613							}
614
615							# Thanks to Frank Samuelson for this
616							last if defined($line_end) and $line_num == $line_end;
617	12	100				137	}
618
619							}
620
621	12					21	close($fh) unless $is_handle;
622	32	100	100			50
	31					82
623	32					83	# burp one final time if needed and
624							# clean out additional ARRAY ref level for @ret
625							for (@ret, $line_store) {
626							_burp_1D($_,$index) if defined $_ and scalar @{$_->[1]};
627	12	100				93	$_ = $_->[0];
628	1	50				3	}
629	1	50				3
630	0					0	# have we read anything in? if not, return empty ndarrays
631	0	0				0	if ( $index == -1 ) {
632	0					0	print "Warning: rcols() did not read in any data.\n" if $options->{VERBOSE};
633							if ( wantarray ) {
634	0					0	foreach ( 0 .. $#explicit_cols ) {
635							if ( $is_perl_col[$_] ) {
636							$ret[$_] = PDL->null;
637	0					0	} else {
	0					0
638	0					0	$ret[$_] = [];
639							}
640	1					8	}
641							for ( @end_perl_cols ) { push @ret, []; }
642							return ( @ret );
643							} else {
644							return PDL->null;
645							}
646							}
647	11	50	66			53
			66
648	10					18	# if the user has asked for lines => 0:-1 or 0:10 or 1:10 or 1:-1,
649							# - ie not reversed and the last line number is known -
650	18	50				160	# then we can skip the following nastiness
651							if ( $line_rev == 0 and $index_start >= 0 and $index_end >= -1 ) {
652	10	50				30	for (@ret) {
653	0	0				0	## $_ = $_->mv(-1,0)->slice("0:${index}")->mv(0,-1) unless ref($_) eq 'ARRAY';
654	0					0	$_ = $_->mv(-1,0)->slice("0:${index}") unless ref($_) eq 'ARRAY'; # cols are dim(0)
	0					0
655							};
656	0					0	if ( $options->{VERBOSE} ) {
657							if ( ref($ret[0]) eq 'ARRAY' ) {
658							print "Read in ", scalar( @{ $ret[0] } ), " elements.\n";
659	10	100				248	} else {
660							print "Read in ", $ret[0]->nelem, " elements.\n";
661							}
662							}
663							wantarray ? return(@ret) : return $ret[0];
664	1					5	}
665
666							# Work out which line numbers we want. First we clean up the ndarray
667	1	50				2	# containing the line numbers that have been read in
668	1	50	33			8	$line_store = $line_store->slice("0:${index}");
		50
669	0					0
670	0					0	# work out the min/max line numbers required
671	0					0	if ( $line_rev ) {
672							if ( defined($line_start) and defined($line_end) ) {
673	1					1	my $dummy = $line_start;
674							$line_start = $line_end;
675	0					0	$line_end = $dummy;
676							} elsif ( defined($line_start) ) {
677							$line_end = $line_start;
678	1	50				4	} else {
679	1	50				3	$line_start = $line_end;
680							}
681	1					2	}
682							$line_start = $line_num + 1 + $index_start if $index_start < 0;
683	22			22		420	$line_end = $line_num + 1 + $index_end if $index_end < 0;
	22					55
	22					80037
	1					1
684	1	50				3
685	1					7	my $indices;
686
687	0					0	{ no warnings 'precedence';
688							if ( $line_rev ) {
689							$indices = which( $line_store >= $line_end & $line_store <= $line_start )->slice('-1:0');
690							} else {
691							$indices = which( $line_store >= $line_start & $line_store <= $line_end );
692	1					5	}
693	2	50				5	}
694	0					0
695	0					0	# truncate the ndarrays
696							for my $col ( @explicit_cols ) {
697							if ( ref($col) eq "ARRAY" ) {
698	2	50				16	for ( @$col ) {
699							$ret[$_] = $ret[$_]->index($indices);
700							}
701							} else {
702	1					6	$ret[$col] = $ret[$col]->index($indices) unless $is_perl_col[$col] };
703	1					2	}
704	2	50				5
705	0					0	# truncate/reverse/etc the perl arrays
	0					0
706	0					0	my @indices_array = list $indices;
707	0					0	foreach ( @explicit_cols, @end_perl_cols ) {
	0					0
	0					0
708							if ( $is_perl_col[$_] ) {
709							my @temp = @{ $ret[$_] };
710							$ret[$_] = [];
711							foreach my $i ( @indices_array ) { push @{ $ret[$_] }, $temp[$i] };
712	1	50				4	}
713	0					0	}
714	0					0
715	0	0				0	# print some diagnostics
716	0	0				0	if ( $options->{VERBOSE} ) {
717	0					0	my $done = 0;
718	0					0	foreach my $col (@explicit_cols) {
719							last if $done;
720	0					0	next if $is_perl_col[$col];
721	0	0				0	print "Read in ", $ret[$col]->nelem, " elements.\n";
722	0					0	$done = 1;
723	0					0	}
724							foreach my $col (@explicit_cols, @end_perl_cols) {
725							last if $done;
726							print "Read in ", $ret[$col]->nelem, " elements.\n";
727							$done = 1;
728	1					2	}
729	2	50				5	}
730	2	50				11
731							# fix 2D pdls to match what wcols generates
732							foreach my $col (@ret) {
733	1	50				27	next if ref($col) eq "ARRAY";
734							$col = $col->transpose if $col->ndims == 2;
735							}
736
737							wantarray ? return(@ret) : return $ret[0];
738							}
739
740
741							=head2 wcols
742
743							=for ref
744
745							Write ASCII columns into file from 1D or 2D ndarrays and/or 1D listrefs efficiently.
746
747							Can take file name or *HANDLE, and if no file/filehandle is given defaults to STDOUT.
748
749							Options (case insensitive):
750
751							HEADER - prints this string before the data. If the string
752							is not terminated by a newline, one is added. (default B<''>).
753
754							COLSEP - prints this string between columns of data. Defaults to
755							$PDL::IO::Misc::defcolsep.
756
757							FORMAT - A printf-style format string that is cycled through
758							column output for user controlled formatting.
759
760							=for usage
761
762							Usage: wcols $data1, $data2, $data3,..., *HANDLE\|"outfile", [\%options]; # or
763							wcols $format_string, $data1, $data2, $data3,..., *HANDLE\|"outfile", [\%options];
764
765							where the $dataN args are either 1D ndarrays, 1D perl array refs,
766							or 2D ndarrays (as might be returned from rcols() with the [] column
767							syntax and/or using the PERLCOLS option). dim(0) of all ndarrays
768							written must be the same size. The printf-style $format_string,
769							if given, overrides any FORMAT key settings in the option hash.
770
771							e.g.,
772
773							=for example
774
775							$x = random(4); $y = ones(4);
776							wcols $x, $y+2, 'foo.dat';
777							wcols $x, $y+2, *STDERR;
778							wcols $x, $y+2, '\|wc';
779
780							$x = sequence(3); $y = zeros(3); $c = random(3);
781							wcols $x,$y,$c; # Orthogonal version of 'print $x,$y,$c' :-)
782
783							wcols "%10.3f", $x,$y; # Formatted
784							wcols "%10.3f %10.5g", $x,$y; # Individual column formatting
785
786							$x = sequence(3); $y = zeros(3); $units = [ 'm/sec', 'kg', 'MPH' ];
787							wcols $x,$y, { HEADER => "# x y" };
788							wcols $x,$y, { Header => "# x y", Colsep => ', ' }; # case insensitive option names!
789							wcols " %4.1f %4.1f %s",$x,$y,$units, { header => "# Day Time Units" };
790
791							$a52 = sequence(5,2); $y = ones(5); $c = [ 1, 2, 4 ];
792							wcols $a52; # now can write out 2D pdls (2 columns data in output)
793							wcols $y, $a52, $c # ...and mix and match with 1D listrefs as well
794
795							NOTES:
796
797							1. Columns are separated by whitespace by default, use
798							C<$PDL::IO::Misc::defcolsep> to modify the default value or
799							the COLSEP option
800
801							2. Support for the C<$PDL::IO::Misc::colsep> global value
802							of PDL-2.4.6 and earlier is maintained but the initial value
803							of the global is undef until you set it. The value will be
804							then be picked up and used as if defcolsep were specified.
805
806							3. Dim 0 corresponds to the column data dimension for both
807							rcols and wcols. This makes wcols the reverse operation
808							of rcols.
809
810	4	50		4	0	4700	=cut
811
812							*wcols = \&PDL::wcols;
813	4	50				15
814							sub PDL::wcols {
815							barf 'Usage: wcols($optional_format_string, 1_or_2D_pdls, *HANDLE\|"filename", [\%options])' if @_<1;
816	4					10
817	4					0	# handle legacy colsep variable
818	4	100				15	$usecolsep = (defined $colsep) ? $colsep : $defcolsep;
819	2					4
820	2					9	# if last argument is a reference to a hash, parse the options
821	2	50				19	my ($format_string, $step, $fh);
	0	50				0
		50
822	0					0	my $header;
823	2					6	if ( ref( $_[-1] ) eq "HASH" ) {
824							my $opt = pop;
825	0					0	foreach my $key ( sort keys %$opt ) {
826							if ( $key =~ /^H/i ) { $header = $opt->{$key}; } # option: HEADER
827							elsif ( $key =~ /^COLSEP/i ) { $usecolsep = $opt->{$key}; } # option: COLSEP
828							elsif ( $key =~ /^FORMAT/i ) { $format_string = $opt->{$key}; } # option: FORMAT
829	4	100	100			20	else {
830	3	100				11	print "Warning: wcols does not understand option <$key>.\n";
831							}
832	3					6	}
833	3					25	}
834	3					4	if (ref(\$_[0]) eq "SCALAR" \|\| $format_string) {
835							$format_string = shift if (ref(\$_[0]) eq "SCALAR");
836	4					7	# 1st arg not ndarray, explicit format string overrides option hash FORMAT
837	4					4	$step = $format_string;
838	4		33			30	$step =~ s/(%%\|[^%])//g; # use step to count number of format items
839							$step = length ($step);
840							}
841	4	50				8	my $file = $_[-1];
842	4					6	my $file_opened;
	4					7
843							my $is_handle = !UNIVERSAL::isa($file,'PDL') &&
844							!UNIVERSAL::isa($file,'ARRAY') &&
845	0	0				0	_is_io_handle $file;
846	0					0	if ($is_handle) { # file handle passed directly
847	0	0				0	$fh = $file; pop;
848	0	0	0			0	}
849	0	0				0	else{
850							if (ref(\$file) eq "SCALAR") { # Must be a file name
851	0					0	$fh = gensym;
852	0					0	if (!$is_handle) {
853							$file = ">$file" unless $file =~ /^\\|/ or $file =~ /^\>/;
854							open $fh, $file or barf "File $file can not be opened for writing\n";
855							}
856	0					0	pop;
857							$file_opened = 1;
858							}
859							else{ # Not a filehandle or filename, assume something else
860	4					7	# (probably ndarray) and send to STDOUT
861	4	50				35	$fh = *STDOUT;
	0					0
862	4					6	}
863	4					8	}
864	8	50				14
865	0	0				0	my @p = @_;
	0					0
866	0					0	my $n = (ref $p[0] eq 'ARRAY') ? $#{$p[0]}+1 : $p[0]->dim(0);
867							my @dogp = (); # need to break 2D pdls into a their 1D pdl components
868	8	50	33			34	for (@p) {
869	8	50				14	if ( ref $_ eq 'ARRAY' ) {
870	0					0	barf "wcols: 1D args must have same number of elements\n" if scalar(@{$_}) != $n;
871							push @dogp, $_;
872	8					9	} else {
873							barf "wcols: 1D args must have same number of elements\n" if $_->dim(0) != $n or $_->getndims > 2;
874							if ( $_->getndims == 2 ) {
875							push @dogp, $_->dog;
876	4	50				6	} else {
877	0	0				0	push @dogp, $_;
878	0					0	}
879							}
880	4					3	}
881	4					5	if ( defined $header ) {
882	4					6	$header .= "\n" unless $header =~ m/\n$/;
883	16	100				16	print $fh $header;
884	12					12	}
885	12					12	my $i;
886	12					13	my $pcnt = scalar @dogp;
887	24					21	for ($i=0; $i<$n; $i++) {
	24					20
888	24	100				32	if ($format_string) {
889	12					34	my @d;
890	12	50				17	my $pdone = 0;
891	12					24	for (@dogp) {
892							push @d,_at_1D($_,$i); $pdone++;
893							if (@d == $step) {
894	12	50	33			37	printf $fh $format_string,@d;
895	0					0	printf $fh $usecolsep unless $pdone==$pcnt;
896	0	0				0	$#d = -1;
897	0					0	}
898							}
899	0					0	if (@d && !$i) {
900							my $str;
901	0					0	if ($#dogp>0) {
902							$str = ($#dogp+1).' columns don\'t';
903	0					0	} else {
904							$str = '1 column doesn\'t';
905							}
906							$str .= " fit in $step column format ".
907							'(even repeated) -- discarding surplus';
908	4					4	carp $str;
909	4					4	# printf $fh $format_string,@d;
910	8					5	# printf $fh $usecolsep;
911	8	100				13	}
912							} else {
913							my $pdone = 0;
914	16					54	for (@dogp) {
915							$pdone++;
916	4	50				8	print $fh _at_1D($_,$i) . ( ($pdone==$pcnt) ? '' : $usecolsep );
917	4					14	}
918							}
919							print $fh "\n";
920							}
921							close($fh) if $file_opened;
922							return 1;
923							}
924
925							=head2 swcols
926
927							=for ref
928
929							generate string list from C format specifier and a list of ndarrays
930
931							C takes an (optional) format specifier of the printf
932							sort and a list of 1D ndarrays as input. It returns a perl
933							array (or array reference if called in scalar context)
934							where each element of the array is the string generated by
935							printing the corresponding element of the ndarray(s) using
936							the format specified. If no format is specified it uses the
937							default print format.
938
939							=for usage
940
941							Usage: @str = swcols format, pdl1,pdl2,pdl3,...;
942							or $str = swcols format, pdl1,pdl2,pdl3,...;
943
944	0			0	0	0	=cut
945
946	0					0	*swcols = \&PDL::swcols;
947
948	0	0				0	sub PDL::swcols{
949	0					0	my ($format_string,$step);
950	0					0
951	0					0	my @outlist;
952
953							if (ref(\$_[0]) eq "SCALAR") {
954	0					0	$step = $format_string = shift; # 1st arg not ndarray
955	0	0				0	$step =~ s/(%%\|[^%])//g; # use step to count number of format items
	0					0
956	0					0	$step = length ($step);
957	0	0				0	}
958	0	0				0
	0					0
959							my @p = @_;
960	0	0	0			0	my $n = (ref $p[0] eq 'ARRAY') ? $#{$p[0]}+1 : $p[0]->nelem;
961							for (@p) {
962							if ( ref $_ eq 'ARRAY' ) {
963							barf "swcols: 1D args must have same number of elements\n" if scalar(@{$_}) != $n;
964	0					0	} else {
965	0					0	barf "swcols: 1D args must have same number of elements\n" if $_->nelem != $n or $_->getndims!=1;
966	0	0				0	}
967	0					0	}
968	0					0
969	0					0	my $i;
970	0	0				0	for ($i=0; $i<$n; $i++) {
971	0					0	if ($format_string) {
972	0					0	my @d;
973							for (@p) {
974							push @d,_at_1D($_,$i);
975	0	0	0			0	if (@d == $step) {
976	0					0	push @outlist,sprintf $format_string,@d;
977	0	0				0	$#d = -1;
978	0					0	}
979							}
980	0					0	if (@d && !$i) {
981							my $str;
982	0					0	if ($#p>0) {
983							$str = ($#p+1).' columns don\'t';
984	0					0	} else {
985							$str = '1 column doesn\'t';
986							}
987							$str .= " fit in $step column format ".
988							'(even repeated) -- discarding surplus';
989	0					0	carp $str;
990	0					0	# printf $fh $format_string,@d;
991							# printf $fh $usecolsep;
992							}
993							} else {
994	0	0				0	for (@p) {
995							push @outlist,sprintf _at_1D($_,$i),$usecolsep;
996							}
997							}
998							}
999							wantarray ? return @outlist: return \@outlist;
1000							}
1001
1002
1003							=head2 rgrep
1004
1005							=for ref
1006
1007							Read columns into ndarrays using full regexp pattern matching.
1008
1009							Options:
1010
1011							=for options
1012
1013							UNDEFINED: This option determines what will be done for undefined
1014							values. For instance when reading a comma-separated file of the type
1015							C<1,2,,4> where the C<,,> indicates a missing value.
1016							The default value is to assign C<$PDL::undefval> to undefined values,
1017							but if C is set this is used instead. This would normally
1018							be set to a number, but if it is set to C and PDL is compiled
1019							with Badvalue support (see L) then undefined values are set to
1020							the appropriate badvalue and the column is marked as bad.
1021							DEFTYPE: Sets the default type of the columns - see the documentation for
1022							L
1023							TYPES: A reference to a Perl array with types for each column - see
1024							the documentation for L
1025							BUFFERSIZE: The number of lines to extend the ndarray by. It might speed
1026							up the reading a little bit by setting this to the number of lines in the
1027							file, but in general L is a better choice
1028
1029							Usage
1030
1031							=for usage
1032
1033							($x,$y,...) = rgrep(sub, *HANDLE\|"filename")
1034
1035							e.g.
1036
1037							=for example
1038
1039							($x,$y) = rgrep {/Foo (.) Bar (.) Mumble/} $file;
1040
1041	1	50		1	1	1474	i.e. the vectors C<$x> and C<$y> get the progressive values
1042							of C<$1>, C<$2> etc.
1043
1044	1					3	=cut
	1					2
1045	1					2
1046							sub rgrep (&@) {
1047	1					5	barf 'Usage ($x,$y,...) = rgrep(sub, *HANDLE\|"filename", [{OPTIONS}])'
1048	1	50				12	if $#_ > 2;
1049	1	50	0			3
1050							my (@ret,@v,$nret); my ($m,$n)=(-1,0); # Count/PDL size
1051	1	50				4	my $pattern = shift;
1052	0					0
1053							my $is_handle = _is_io_handle $_[0];
1054							my $fh = $is_handle ? $_[0] : gensym;
1055							open $fh, $_[0] or die "File $_[0] not found\n" unless $is_handle;
1056	1					12
1057							if (ref($pattern) ne "CODE") {
1058							die "Got a ".ref($pattern)." for rgrep?!";
1059							}
1060
1061
1062							# set up default options
1063	1		50			6	my $opt = PDL::Options->new( {
1064	1					7	DEFTYPE => $deftype,
1065							TYPES => [],
1066	1					3	UNDEFINED => $PDL::undefval,
1067							BUFFERSIZE => 10000
1068							} );
1069							# Check if the user specified options
1070	1					3	my $u_opt = $_[1] \|\| {};
1071							$opt->options( $u_opt);
1072	1	50				6
1073							my $options = $opt->current();
1074	1					20
1075	5	50				10	# If UNDEFINED is set to .bad. then undefined are set to
1076							# bad - unless we have a Perl that is not compiled with Bad support
1077	5					64	my $undef_is_bad = ($$options{UNDEFINED} =~ /bad/i);
1078	5	100				8	barf "Unknown PDL type given for DEFTYPE.\n"
1079	1					2	unless ref($$options{DEFTYPE}) eq "PDL::Type";
1080
1081							while(<$fh>) {
1082	1					26	next unless @v = &$pattern;
1083	1					4
1084							$m++; # Count got
1085	2					29	if ($m==0) {
1086							$nret = $#v; # Last index of values to return
1087
1088	4	50				10	# Handle various columns as in rcols - added 18/04/05
1089							my @types = _handle_types( $nret, $$options{DEFTYPE}, $$options{TYPES} );
1090	5	100				8	for (0..$nret) {
1091	1					2	# Modified 18/04/05 to use specified precision.
1092	2					11	$ret[$_] = [ PDL->zeroes($types[$_], 1), [] ];
1093							}
1094	1					4	} else { # perhaps should only carp once...
1095							carp "Non-rectangular rgrep" if $nret != $#v;
1096	5					8	}
1097							if ($n<$m) {
1098							for (0..$nret) {
1099							_ext_lastD( $ret[$_]->[0], $$options{BUFFERSIZE} ); # Extend PDL in buffered manner
1100	10	50				14	}
1101							$n += $$options{BUFFERSIZE};
1102	0	0				0	}
1103	0					0	for(0..$nret) {
1104							# Set values - '1*' is to ensure numeric
1105	0					0	# We now (JB - 18/04/05) also check for defined values or not
1106							# Ideally this should include Badvalue support..
1107	0					0	if ($v[$_] eq '') {
1108							# Missing value - let us treat this specially
1109							if ($undef_is_bad) {
1110	10					26	set $ret[$_]->[0], $m, $$options{DEFTYPE}->badvalue();
1111							# And set bad flag on $ref[$_]!
1112							$ret[$_]->[0]->badflag(1);
1113							} else {
1114							set $ret[$_]->[0], $m, $$options{UNDEFINED};
1115	1	50				4	}
1116	1					2	} else {
	2					9
1117	1	50				18	set $ret[$_]->[0], $m, 1*$v[$_];
1118							}
1119							}
1120							}
1121
1122							close($fh) unless $is_handle;
1123							for (@ret) { $_ = $_->[0]->slice("0:$m")->copy; }; # Truncate
1124							wantarray ? return(@ret) : return $ret[0];
1125							}
1126
1127							=head2 isbigendian
1128
1129							=for ref
1130
1131							Determine endianness of machine - returns 0 or 1 accordingly
1132
1133							=cut
1134
1135							!NO!SUBS!
1136
1137							# $isbigendian is set up at top of file
1138	7946			7946	0	35689	pp_addpm( "sub PDL::isbigendian { return $isbigendian; };\n*isbigendian = \\&PDL::isbigendian;\n");
1139							pp_add_exported("", "isbigendian");
1140
1141							################################ XS CODE ######################################
1142
1143							pp_add_exported('', 'rcube');
1144							pp_addpm(<<'EOPM');
1145							=head2 rcube
1146
1147							=for ref
1148
1149							Read list of files directly into a large data cube (for efficiency)
1150
1151							=for usage
1152
1153							$cube = rcube \&reader_function, @files;
1154
1155							=for example
1156
1157							$cube = rcube \&rfits, glob("*.fits");
1158
1159							This IO function allows direct reading of files into a large data cube,
1160							Obviously one could use cat() but this is more memory efficient.
1161
1162							The reading function (e.g. rfits, readfraw) (passed as a reference)
1163							and files are the arguments.
1164
1165							The cube is created as the same X,Y dims and datatype as the first
1166							image specified. The Z dim is simply the number of images.
1167
1168							=cut
1169
1170							sub rcube {
1171
1172	0			0	1		my $reader = shift;
1173
1174	0	0					barf "Usage: blah" unless ref($reader) eq "CODE";
1175
1176	0						my $k=0;
1177	0						my ($im,$cube,$nx,$ny);
1178	0						my $nz = scalar(@_);
1179
1180	0						for my $file (@_) {
1181	0	0					print "Slice ($k) - reading file $file...\n" if $PDL::verbose;
1182	0						$im = &$reader($file);
1183	0						($nx, $ny) = dims $im;
1184	0	0					if ($k == 0) {
1185	0	0					print "Creating $nx x $ny x $nz cube...\n" if $PDL::verbose;
1186	0						$cube = $im->zeroes($im->type,$nx,$ny,$nz);
1187							}
1188							else {
1189	0	0	0				barf "Dimensions do not match for file $file!\n" if
1190							$im->getdim(0) != $nx or $im->getdim(1) != $ny ;
1191
1192							}
1193	0						$cube->slice(":,:,($k)") .= $im;
1194	0						$k++;
1195							}
1196
1197	0						return $cube;
1198							}
1199
1200
1201							EOPM
1202
1203							pp_def('rasc',
1204							Pars => '[o] nums(n); int [o] ierr(n)',
1205							OtherPars => 'PerlIO *fp; IV num => n',
1206							PMFunc => '',
1207							PMCode => <<'EOF',
1208							sub rasc {PDL->rasc(@_)}
1209							sub PDL::rasc {
1210							my ($pdl, $file, $num) = @_;
1211							$num = -1 unless defined $num;
1212							my $is_openhandle = defined fileno $file;
1213							my $fi;
1214							if ($is_openhandle) {
1215							$fi = $file;
1216							} else {
1217							barf 'usage: rasc $pdl, "filename"\|FILEHANDLE, [$num_to_read]'
1218							if !defined $file \|\| ref $file;
1219							open $fi, "<", $file or barf "Can't open $file";
1220							}
1221							$pdl->_rasc_int(my $ierr=null,$fi,$num);
1222							close $fi unless $is_openhandle;
1223							return all $ierr > 0;
1224							}
1225							EOF
1226							GenericTypes => ['F','D'],
1227							CHeader => <<'EOH', ###### Read ASCII Function ##########
1228							#define SWALLOWLINE(fp) while ((s = PerlIO_getc(fp)) != '\n' && s != EOF)
1229							#define TRAILING_WHITESPACE_CHECK(s, label) \
1230							if (s!=' ' && s!='\t' && s!='\r' && s!='\n' && s!=',') { nread = -1; goto label; }
1231							EOH
1232							Code => <<'EOF',
1233							int ns, i, j;
1234							ns = $SIZE(n);
1235							broadcastloop %{
1236							for (i=0;i
1237							$GENERIC(nums) f = 0;
1238							int nread = 0;
1239							int s = PerlIO_getc($COMP(fp));
1240							int afterp = 0, aftere=0;
1241							int expo = 0;
1242							$GENERIC(nums) sig = 1.0, esig = 1.0;
1243							$GENERIC(nums) div = 1.0;
1244							if (s == EOF) { nread = 0; goto endread_$GENERIC(nums); }
1245							while (1) {
1246							if (s == EOF) { nread = 0; goto endread_$GENERIC(nums); } /* signal end of line */
1247							if (s == '#')
1248							SWALLOWLINE($COMP(fp));
1249							if ((s >='0' && s <='9') \|\| s =='.' \|\| s == 'e' \|\| s == 'E'
1250							\|\| s == '+' \|\| s == '-') break;
1251							if (s!=' ' && s!='\t' && s!='\r' && s!='\n' && s!=',')
1252							{ nread = -1; goto endread_$GENERIC(nums); } /* garbage */
1253							s = PerlIO_getc($COMP(fp)); /* else skip whitespace */
1254							}
1255							/* parse number */
1256							while (1) {
1257							switch (s) {
1258							case '0': case '1': case '2': case '3': case '4':
1259							case '5': case '6': case '7': case '8': case '9':
1260							if (aftere)
1261							expo = (expo*10) + (s - '0');
1262							else if (afterp) {
1263							div /= 10.0;
1264							f += div*(s - '0');
1265							} else
1266							f = (f*10) + (s - '0');
1267							break;
1268							case '+':
1269							/* ignore */
1270							break;
1271							case '-':
1272							if (aftere)
1273							esig = -1;
1274							else
1275							sig = -1;
1276							break;
1277							case 'e': case 'E':
1278							if (aftere) { nread = -1; goto endread_$GENERIC(nums); }
1279							aftere = 1;
1280							break;
1281							case '.':
1282							if (afterp \|\| aftere) { nread = -1; goto endread_$GENERIC(nums); }
1283							afterp = 1;
1284							break;
1285							default:
1286							goto endread_$GENERIC(nums);
1287							break;
1288							}
1289							nread++;
1290							s = PerlIO_getc($COMP(fp));
1291							}
1292							endread_$GENERIC(nums):
1293							if (($ierr(n=>i) = nread) <= 0)
1294							break;
1295							f *= sig;
1296							int i2;
1297							for (i2=0;i2
1298							f *= (esig > 0 ? 10.0 : 0.1);
1299							$nums(n=>i) = f;
1300							TRAILING_WHITESPACE_CHECK(s, endread_$GENERIC(nums));
1301							}
1302							for (j=i+1; j
1303							$ierr(n=>j) = $ierr(n=>i); /* inherit error flags */
1304							%}
1305							EOF
1306							Doc => <<'EOF',
1307							=for ref
1308
1309							Simple function to slurp in ASCII numbers quite quickly,
1310							although error handling is marginal (to nonexistent).
1311
1312							=for usage
1313
1314							$pdl->rasc("filename"\|FILEHANDLE [,$noElements]);
1315
1316							Where:
1317							filename is the name of the ASCII file to read or open file handle
1318							$noElements is the optional number of elements in the file to read.
1319							(If not present, all of the file will be read to fill up $pdl).
1320							$pdl can be of type float or double (for more precision).
1321
1322							=for example
1323
1324							# (test.num is an ascii file with 20 numbers. One number per line.)
1325							$in = PDL->null;
1326							$num = 20;
1327							$in->rasc('test.num',20);
1328							$imm = zeroes(float,20,2);
1329							$imm->rasc('test.num');
1330							EOF
1331							);
1332
1333							pp_done();