File Coverage

lib/PDL/IO/Misc.pd
Criterion Covered Total %
statement 321 472 68.0
branch 149 290 51.3
condition 47 97 48.4
subroutine 22 24 91.6
pod 3 7 42.8
total 542 890 60.9


line stmt bran cond sub pod time code
1             # and endian-ness of machine
2             require PDL::Core::Dev;
3             my $isbigendian = PDL::Core::Dev::isbigendian();
4              
5             use strict;
6             use warnings;
7             pp_addpm({At=>'Top'},<<'EOD');
8 22     22   209 use strict;
  22         49  
  22         920  
9 22     22   110 use warnings;
  22         48  
  22         1469  
10              
11             =head1 NAME
12              
13             PDL::IO::Misc - misc IO routines for PDL
14              
15             =head1 DESCRIPTION
16              
17             Some basic I/O functionality: tables, byte-swapping
18              
19             =head1 SYNOPSIS
20              
21             use PDL::IO::Misc;
22              
23             =cut
24              
25             EOD
26              
27             pp_addpm({At=>'Bot'},<<'EOD');
28              
29             =head1 AUTHOR
30              
31             Copyright (C) Karl Glazebrook 1997, Craig DeForest 2001,
32             2003, and Chris Marshall 2010. All rights reserved. There is
33             no warranty. You are allowed to redistribute this software
34             / documentation under certain conditions. For details, see
35             the file COPYING in the PDL distribution. If this file is
36             separated from the PDL distribution, the copyright notice
37             should be included in the file.
38              
39             =cut
40              
41             EOD
42              
43             pp_add_exported('',"rcols wcols swcols rgrep");
44              
45             ############################## PM CODE ########################################
46              
47             pp_addpm(<<'!NO!SUBS!');
48 22     22   146  
  22         56  
  22         152  
49 22     22   180 use PDL::Primitive;
  22         42  
  22         4288  
50 22     22   152 use PDL::Types;
  22         40  
  22         1586  
51 22     22   130 use PDL::Options;
  22         40  
  22         202  
52 22     22   183 use PDL::Bad;
  22         70  
  22         1704  
53 22     22   8696 use Carp;
  22         24080  
  22         1692  
54 22     22   166 use Symbol qw/ gensym /;
  22         39  
  22         17595  
55             use List::Util;
56              
57             !NO!SUBS!
58              
59             sub defswap {
60             my ($bytes, $doc) = @_;
61             my $sized_type = "struct {char n[$bytes];}";
62             pp_def(
63             "bswap$bytes",
64             Pars => '[io] x()',
65             RedoDimsCode => <
66             if (sizeof(\$GENERIC(x)) < $bytes)
67             \$CROAK("Tried to bswap size $bytes but data type only %zd bytes", sizeof(\$GENERIC(x)));
68             EOF
69             Code => <
70             int i;
71             int n = sizeof(\$x()) / $bytes;
72             $sized_type *aa = (void *) &\$x();
73             for (i=0;i
74             $sized_type bb; memcpy(&bb, aa+i, $bytes);
75             PDL_Byte *a = (PDL_Byte*) (void*) (aa+i);
76             PDL_Byte *b = (PDL_Byte*) &bb;
77             @{[map "a[$_] = b[".($bytes-$_-1)."];", 0..$bytes-1]}
78             }
79             EOF
80             Doc => $doc,
81             );
82             }
83             defswap(2, "Swaps pairs of bytes in argument x()");
84             defswap(4, "Swaps quads of bytes in argument x()");
85             defswap(8, "Swaps octets of bytes in argument x()");
86             defswap(12, "Swaps 12s of bytes in argument x()");
87             defswap(16, "Swaps 16s of bytes in argument x()");
88             defswap(24, "Swaps 24s of bytes in argument x()");
89             defswap(32, "Swaps 32s of bytes in argument x()");
90              
91             pp_addpm(<<'!NO!SUBS!');
92              
93             # Internal routine to extend PDL array by size $n along last dimension
94             # - Would be nice to have a proper extend function rather than hack
95             # - Is a NO-OP when handed a perl ARRAY ref rather than an ndarray arg
96 30     30   43 sub _ext_lastD { # Called by rcols and rgrep
97 30 50       59 my ($x,$n) = @_;
98 30         71 if (ref($_[0]) ne 'ARRAY') {
99 30         35 my @nold = $x->dims;
100 30         30 my @nnew = @nold;
101 30         55 $nnew[-1] += $n; # add $n to the last dimension
102 30         300 my $y = zeroes($x->type,@nnew); # New pdl
103 30         151 my $yy = $y->mv(-1,0)->slice("0:".($nold[-1]-1))->mv(0,-1);
104 30         146 $yy .= $x;
105             $_[0] = $y;
106 30         65 }
107             1;
108             }
109              
110             # Implements PDL->at() for either 1D PDL or ARRAY arguments
111             # TODO: Need to add support for multidim ndarrays parallel to rcols
112 32     32   22 sub _at_1D ($$) { # Called by wcols and swcols
113 32         22 my $data = $_[0];
114 32 50       32 my $index = $_[1];
115 0         0 if (ref $data eq 'ARRAY') {
116             return $data->[$index];
117 32         53 } else {
118             return $data->at($index);
119             }
120             }
121              
122             # squeezes "fluffy" perl list values into column data type
123 59     59   69 sub _burp_1D {
124 59         54 my $data = $_[0][0];
125 59         56 my $databox = $_[0][1];
126 59         51 my $index = $_[1];
  59         73  
127 59 50       104 my $start = $index - @{$databox} + 1;
    100          
128 0         0 if (ref $data eq 'ARRAY') {
  0         0  
  0         0  
129             push @{$data}, @{$databox};
130             } elsif ( ref($databox->[0]) eq "ARRAY" ) {
131             # could add POSIX::strtol for hex and octal support but
132 4         20 # can't break float conversions (how?)
133             $data->slice(":,$start:$index") .= pdl($data->type, $databox);
134             } else {
135             # could add POSIX::strtol for hex and octal support but
136 55         176 # can't break float conversions (how?)
137             $data->slice("$start:$index") .= pdl($data->type, $databox);
138 59         387 }
139             $_[0] = [ $data, [] ];
140             }
141              
142             # taken outside of rcols() to avoid clutter
143 12     12   25 sub _handle_types ($$$) {
144 12         14 my $ncols = shift;
145 12         14 my $deftype = shift;
146             my $types = shift;
147 12 50       37  
148             barf "Unknown PDL type given for DEFTYPE.\n"
149             unless ref($deftype) eq "PDL::Type";
150 12 50       35  
151             my @cols = ref($types) eq "ARRAY" ? @$types : ();
152 12 100       27  
153             if ( $#cols > -1 ) {
154 2 50       20 # truncate if required
155             $#cols = $ncols if $#cols > $ncols;
156              
157 2         6 # check input values are sensible
158 2 50       7 for ( 0 .. $#cols ) {
159             barf "Unknown value '$cols[$_]' in TYPES array.\n"
160             unless ref($cols[$_]) eq "PDL::Type";
161             }
162             }
163              
164 12         80 # fill in any missing columns
  20         25  
165             for ( ($#cols+1) .. $ncols ) { push @cols, $deftype; }
166 12         32  
167             return @cols;
168             } # sub: _handle_types
169              
170 22     22   177  
  22         42  
  22         4368  
171             # Whether an object is an IO handle
172 17     17   29 use Scalar::Util;
173             sub _is_io_handle {
174             my $h = shift;
175 17         42 # reftype catches almost every handle, except: *MYHANDLE
176 17   66     155 # fileno catches *MYHANDLE, but doesn't catch handles that aren't files
177             my $reftype = Scalar::Util::reftype($h);
178             return defined(fileno($h)) || (defined($reftype) && $reftype eq 'GLOB');
179             }
180              
181              
182             =head2 rcols
183              
184             =for ref
185              
186             Read specified ASCII cols from a file into ndarrays and perl
187             arrays (also see L).
188              
189             =for usage
190              
191             Usage:
192             ($x,$y,...) = rcols( *HANDLE|"filename", { EXCLUDE => '/^!/' }, $col1, $col2, ... )
193             $x = rcols( *HANDLE|"filename", { EXCLUDE => '/^!/' }, [] )
194             ($x,$y,...) = rcols( *HANDLE|"filename", $col1, $col2, ..., { EXCLUDE => '/^!/' } )
195             ($x,$y,...) = rcols( *HANDLE|"filename", "/foo/", $col1, $col2, ... )
196              
197             For each column number specified, a 1D output PDL will be
198             generated. Anonymous arrays of column numbers generate
199             2D output ndarrays with dim0 for the column data and dim1
200             equal to the number of columns in the anonymous array(s).
201              
202             An empty anonymous array as column specification will
203             produce a single output data ndarray with dim(1) equal
204             to the number of columns available.
205              
206             There are two calling conventions - the old version, where a
207             pattern can be specified after the filename/handle, and the
208             new version where options are given as as hash reference.
209             This reference can be given as either the second or last
210             argument.
211              
212             The default behaviour is to ignore lines beginning with
213             a # character and lines that only consist of whitespace.
214             Options exist to only read from lines that match, or do
215             not match, supplied patterns, and to set the types of the
216             created ndarrays.
217              
218             Can take file name or *HANDLE, and if no explicit column
219             numbers are specified, all are assumed. For the allowed types,
220             see L.
221              
222             Options (case insensitive):
223              
224             EXCLUDE or IGNORE
225             - ignore lines matching this pattern (default B<'/^#/'>).
226              
227             INCLUDE or KEEP
228             - only use lines which match this pattern (default B<''>).
229              
230             LINES
231             - a string pattern specifying which line numbers to use.
232             Line numbers start at 0 and the syntax is 'a:b:c' to use
233             every c'th matching line between a and b (default B<''>).
234              
235             DEFTYPE
236             - default data type for stored data (if not specified, use the type
237             stored in C<$PDL::IO::Misc::deftype>, which starts off as B).
238              
239             TYPES
240             - reference to an array of data types, one element for each column
241             to be read in. Any missing columns use the DEFTYPE value (default B<[]>).
242              
243             COLSEP
244             - splits on this string/pattern/qr{} between columns of data. Defaults to
245             $PDL::IO::Misc::defcolsep.
246              
247             PERLCOLS
248             - an array of column numbers which are to be read into perl arrays
249             rather than ndarrays. Any columns not specified in the explicit list
250             of columns to read will be returned after the explicit columns.
251             (default B).
252              
253             COLIDS
254             - if defined to an array reference, it will be assigned the column
255             ID values obtained by splitting the first line of the file in the
256             identical fashion to the column data.
257              
258             CHUNKSIZE
259             - the number of input data elements to batch together before appending
260             to each output data ndarray (Default value is 100). If CHUNKSIZE is
261             greater than the number of lines of data to read, the entire file is
262             slurped in, lines split, and perl lists of column data are generated.
263             At the end, effectively pdl(@column_data) produces any result ndarrays.
264              
265             VERBOSE
266             - be verbose about IO processing (default C<$PDL::vebose>)
267              
268             =for example
269              
270             For example:
271              
272             $x = PDL->rcols 'file1'; # file1 has only one column of data
273             $x = PDL->rcols 'file2', []; # file2 can have multiple columns, still 1 ndarray output
274             # (empty array ref spec means all possible data fields)
275              
276             ($x,$y) = rcols 'table.csv', { COLSEP => ',' }; # read CSV data file
277             ($x,$y) = rcols *STDOUT; # default separator for lines like '32 24'
278              
279             # read in lines containing the string foo, where the first
280             # example also ignores lines that begin with a # character.
281             ($x,$y,$z) = rcols 'file2', 0,4,5, { INCLUDE => '/foo/' };
282             ($x,$y,$z) = rcols 'file2', 0,4,5, { INCLUDE => '/foo/', EXCLUDE => '' };
283              
284             # ignore the first 27 lines of the file, reading in as ushort's
285             ($x,$y) = rcols 'file3', { LINES => '27:-1', DEFTYPE => ushort };
286             ($x,$y) = rcols 'file3', { LINES => '27:', TYPES => [ ushort, ushort ] };
287              
288             # read in the first column as a perl array and the next two as ndarrays
289             # with the perl column returned after the ndarray outputs
290             ($x,$y,$name) = rcols 'file4', 1, 2 , { PERLCOLS => [ 0 ] };
291             printf "Number of names read in = %d\n", 1 + $#$name;
292              
293             # read in the first column as a perl array and the next two as ndarrays
294             # with PERLCOLS changing the type of the first returned value to perl list ref
295             ($name,$x,$y) = rcols 'file4', 0, 1, 2, { PERLCOLS => [ 0 ] };
296              
297             # read in the first column as a perl array returned first followed by the
298             # the next two data columns in the file as a single Nx2 ndarray
299             ($name,$xy) = rcols 'file4', 0, [1, 2], { PERLCOLS => [ 0 ] };
300              
301              
302             NOTES:
303              
304             1. Quotes are required on patterns or use the qr{} quote regexp syntax.
305              
306             2. Columns are separated by whitespace by default, use the COLSEP option
307             separator to specify an alternate split pattern or string or specify an
308             alternate default separator by setting C<$PDL::IO::Misc::defcolsep> .
309              
310             3. Legacy support is present to use C<$PDL::IO::Misc::colsep> to set the
311             column separator but C<$PDL::IO::Misc::colsep> is not defined by default.
312             If you set the variable to a defined value it will get picked up.
313              
314             4. LINES => '-1:0:3' may not work as you expect, since lines are skipped
315             when read in, then the whole array reversed.
316              
317             5. For consistency with wcols and rcols 1D usage, column data is loaded
318             into the rows of the pdls (i.e., dim(0) is the elements read per column
319 22     22   170 in the file and dim(1) is the number of columns of data read.
  22         54  
  22         61027  
320              
321             =cut
322              
323             use vars qw/ $colsep $defcolsep $deftype /;
324              
325             $defcolsep = ' '; # Default column separator
326             $deftype = double; # Default type for ndarrays
327              
328             my $defchunksize = 100; # Number of perl list items to append to ndarray
329             my $usecolsep; # This is the colsep value that is actually used
330              
331             # NOTE: XXX
332             # need to look at the line-selection code. For instance, if want
333             # lines => '-1:0:3',
334             # read in all lines, reverse, then apply the step
335             # -> fix point 4 above
336 11     11 1 2567 #
337             # perhaps should just simplify the LINES option - ie remove
338             # support for reversed arrays?
339 12     12 0 27  
340 12 50       53 sub rcols{ PDL->rcols(@_) }
341              
342             sub PDL::rcols {
343 12         49 my $class = shift;
344 12 100       69 barf 'Usage ($x,$y,...) = rcols( *HANDLE|"filename", ["/pattern/" or \%options], $col1, $col2, ..., [ \%options] )'
345 12 100 50     813 if $#_<0;
346 12         32  
347             my $is_handle = _is_io_handle $_[0];
348             my $fh = $is_handle ? $_[0] : gensym;
349 12         255 open $fh, $_[0] or die "File $_[0] not found\n" unless $is_handle;
350             shift;
351              
352             # set up default options
353             my $opt = PDL::Options->new( {
354             CHUNKSIZE => undef,
355             COLIDS => undef,
356             COLSEP => undef,
357             DEFTYPE => $deftype,
358             EXCLUDE => '/^#/',
359             INCLUDE => undef,
360             LINES => '',
361 12         76 PERLCOLS => undef,
362             TYPES => [],
363             VERBOSE=> $PDL::verbose,
364 12 100       74 } );
365             $opt->synonyms( { IGNORE => 'EXCLUDE', KEEP => 'INCLUDE' } );
366 10 100       78  
  1 50       7  
    100          
367 0         0 # has the user supplied any options
368 2         12 if ( defined($_[0]) ) {
369             # ensure the old-style behaviour by setting the exclude pattern to undef
370             if ( $_[0] =~ m|^/.*/$| ) { $opt->options( { EXCLUDE => undef, INCLUDE => shift } ); }
371             elsif ( ref($_[0]) eq "Regexp" ) { $opt->options( { EXCLUDE => undef, INCLUDE => shift } ); }
372 12 100 100     71 elsif ( ref($_[0]) eq "HASH" ) { $opt->options( shift ); }
373             }
374              
375 12         36 # maybe the last element is a hash array as well
376             $opt->options( pop ) if defined($_[-1]) and ref($_[-1]) eq "HASH";
377              
378 12 50       35 # a reference to a hash array
379 12 100       61 my $options = $opt->current();
380              
381             # handle legacy colsep variable
382 12         22 $usecolsep = (defined $colsep) ? qr{$colsep} : undef;
383 24 100 100     86 $usecolsep = qr{$options->{COLSEP}} if $options->{COLSEP};
384 12 50       63  
385 12         72 # what are the patterns?
386 12         181 foreach my $pattern ( qw( INCLUDE EXCLUDE ) ) {
387             if ( $options->{$pattern} and ref($options->{$pattern}) ne "Regexp" ) {
388 0         0 if ( $options->{$pattern} =~ m|^/.*/$| ) {
389             $options->{$pattern} =~ s|^/(.*)/$|$1|;
390             $options->{$pattern} = qr($options->{$pattern});
391             } else {
392             barf "rcols() - unable to process $pattern value.\n";
393             }
394 12   33     49 }
395 12         24 }
396              
397             # CHUNKSIZE controls memory/time tradeoff of ndarray IO
398 12         22 my $chunksize = $options->{CHUNKSIZE} || $defchunksize;
399             my $nextburpindex = -1;
400 12         18  
401 12 50       27 # which columns are to be read into ndarrays and which into perl arrays?
  0         0  
402             my @end_perl_cols = (); # unique perl cols to return at end
403 12         13  
404 12         22 my @perl_cols = (); # perl cols index list from PERLCOLS option
  0         0  
405             @perl_cols = @{ $$options{PERLCOLS} } if $$options{PERLCOLS};
406              
407 12         22 my @is_perl_col; # true if index corresponds to a perl column
408             for (@perl_cols) { $is_perl_col[$_] = 1; };
409             # print STDERR "rcols: \@is_perl_col is @is_perl_col\n";
410              
411             my ( @explicit_cols ) = @_; # call specified columns to read
412 12         15 # print STDERR "rcols: \@explicit_cols is @explicit_cols\n";
413 12 100       32  
414 1 50       8 # work out which line numbers are required
    0          
415 1         2 # - the regexp's are a bit over the top
  1         3  
416             my ( $x, $y, $c );
417 0         0 if ( $$options{LINES} ne '' ) {
  0         0  
  0         0  
418             if ( $$options{LINES} =~ /^\s*([+-]?\d*)\s*:\s*([+-]?\d*)\s*$/ ) {
419 0         0 $x = $1; $y = $2;
420             } elsif ( $$options{LINES} =~ /^\s*([+-]?\d*)\s*:\s*([+-]?\d*)\s*:\s*([+]?\d*)\s*$/ ) {
421             $x = $1; $y = $2; $c = $3;
422             } else {
423             barf "rcols() - unable to parse LINES option.\n";
424 12         20 }
425 12 100 66     32 }
426 12 100 66     24  
427 12   50     30 # Since we do not know how many lines there are in advance, things get a bit messy
428             my ( $index_start, $index_end ) = ( 0, -1 );
429             $index_start = $x if defined($x) and $x ne '';
430             $index_end = $y if defined($y) and $y ne '';
431             my $line_step = $c || 1;
432 12         23  
433 12 100 66     70 # $line_rev = 0/1 for normal order/reversed
    50 33        
    0 0        
    0 0        
    0          
434             # $line_start/_end refer to the first and last line numbers that we want
435 11         15 # (the values of which we may not know until we've read in all the file)
  11         14  
436             my ( $line_start, $line_end, $line_rev );
437             if ( ($index_start >= 0 and $index_end < 0) ) {
438 1         1 # eg 0:-1
  1         2  
439             $line_rev = 0; $line_start = $index_start;
440             } elsif ( $index_end >= 0 and $index_start < 0 ) {
441 0         0 # eg -1:0
  0         0  
  0         0  
442             $line_rev = 1; $line_start = $index_end;
443             } elsif ( $index_end >= $index_start and $index_start >= 0 ) {
444 0         0 # eg 0:10
  0         0  
  0         0  
445             $line_rev = 0; $line_start = $index_start; $line_end = $index_end;
446             } elsif ( $index_start > $index_end and $index_end >= 0 ) {
447 0         0 # eg 10:0
448             $line_rev = 1; $line_start = $index_end; $line_end = $index_start;
449             } elsif ( $index_start <= $index_end ) {
450 0         0 # eg -5:-1
451             $line_rev = 0;
452             } else {
453 12         9 # eg -1:-5
454             $line_rev = 1;
455 12         14 }
456              
457 12         13 my @ret;
458 12         14  
459 12         12 my ($k,$fhline);
460 12         14  
461 12         19 my $line_num = -1;
462             my $line_ctr = $line_step - 1; # ensure first line is always included
463 12         14 my $index = -1;
464             my $pdlsize = 0;
465             my $extend = 10000;
466              
467 12 50       11 my $line_store; # line numbers of saved data
  12         21  
468 0 0       0  
469 0         0 RCOLS_IO: {
470 0 0       0  
471 0         0 if ($options->{COLIDS}) {
472 0 0       0 print STDERR "rcols: processing COLIDS option\n" if $options->{VERBOSE};
473 0         0 undef $!;
  0         0  
474             if (defined($fhline = <$fh>) ) { # grab first line's fields for column IDs
475 0 0       0 $fhline =~ s/\r?\n$//; # handle DOS on unix files better
476 0         0 my @v = defined($usecolsep) ? split($usecolsep,$fhline) : split(' ',$fhline);
477             @{$options->{COLIDS}} = @v;
478             } else {
479             die "rcols: reading COLIDS info, $!" if $!;
480 12         208 last RCOLS_IO;
481             }
482             }
483 55         294  
484             while( defined($fhline = <$fh>) ) {
485 55         62  
486             # chomp $fhline;
487             $fhline =~ s/\r?\n$//; # handle DOS on unix files better
488              
489             $line_num++;
490              
491             # the order of these checks is important, particularly whether we
492             # check for line_ctr before or after the pattern matching
493             # Prior to PDL 2.003 the line checks were done BEFORE the
494 55 50 33     69 # pattern matching
495 55 50 33     132 #
496 55 100 100     297 # need this first check, even with it almost repeated at end of loop,
497 47 100 100     132 # incase the pattern matching excludes $line_num == $line_end, say
498 41 50       66 last if defined($line_end) and $line_num > $line_end;
499 41         34 next if defined($line_start) and $line_num < $line_start;
500             next if $options->{EXCLUDE} and $fhline =~ /$options->{EXCLUDE}/;
501 41         31 next if $options->{INCLUDE} and not $fhline =~ /$options->{INCLUDE}/;
502 41 100       105 next unless ++$line_ctr == $line_step;
503             $line_ctr = 0;
504              
505 41 100       62 $index++;
  90         185  
506             my @v = defined($usecolsep) ? split($usecolsep,$fhline) : split(' ',$fhline);
507              
508             # map empty fields '' to undef value
509 41 100       61 @v = map { $_ eq '' ? undef : $_ } @v;
510              
511             # if the first line, set up the output ndarrays using all the columns
512 11 100       30 # if the user doesn't specify anything
513 3         8 if ( $index == 0 ) {
514              
515 11 100 66     42 # Handle implicit multicolumns in command line
516 2 100       236 if ($#explicit_cols < 0) { # implicit single col data
  2         8  
517 1         5 @explicit_cols = ( 0 .. $#v );
518             }
519             if (scalar(@explicit_cols)==1 and ref($explicit_cols[0]) eq "ARRAY") {
520 11         22 if ( !scalar(@{$explicit_cols[0]}) ) { # implicit multi-col data
521 11         19 @explicit_cols = ( [ 0 .. $#v ] );
522 11         19 }
523 20 100       28 }
524 2 50       7 my $implicit_pdls = 0;
525             my $is_explicit = {};
526 18         45 foreach my $col (@explicit_cols) {
527             if (ref($col) eq "ARRAY") {
528             $implicit_pdls++ if !scalar(@$col);
529 11 50       29 } else {
530 0         0 $is_explicit->{$col} = 1;
531             }
532 11         17 }
533 20 50 66     48 if ($implicit_pdls > 1) {
534 0         0 die "rcols: only one implicit multicolumn ndarray spec allowed, found $implicit_pdls!\n";
  0         0  
535             }
536             foreach my $col (@explicit_cols) {
537             if (ref($col) eq "ARRAY" and !scalar(@$col)) {
538             @$col = grep { !$is_explicit->{$_} } ( 0 .. $#v );
539 11         12 }
540 11         17 }
541 11         19  
542             # remove declared perl columns from pdl data list
543 20 100       46 $k = 0;
    50          
544 2         4 my @pdl_cols = ();
  5         11  
  2         3  
545 2         2 foreach my $col (@explicit_cols) {
  2         5  
546             # strip out declared perl cols so they won't be read into ndarrays
547 18         28 if ( ref($col) eq "ARRAY" ) {
548             @$col = grep { !$is_perl_col[$_] } @{$col};
549             push @pdl_cols, [ @{$col} ];
550             } elsif (!$is_perl_col[$col]) {
551 11         16 push @pdl_cols, $col;
552 11         20 }
553 20 50 66     54 }
554 0         0 # strip out perl cols in explicit col list for return at end
  0         0  
555             @end_perl_cols = @perl_cols;
556             foreach my $col (@explicit_cols) {
557             if ( ref($col) ne "ARRAY" and defined($is_perl_col[$col]) ) {
558             @end_perl_cols = grep { $_ != $col } @end_perl_cols;
559 11         59 }
560 11 50       28 };
561 0         0  
562 0         0 # sort out the types of the ndarrays
563 0         0 my @types = _handle_types( $#pdl_cols, $$options{DEFTYPE}, $$options{TYPES} );
564             if ( $options->{VERBOSE} ) { # dbg aid
565 0         0 print "Reading data into ndarrays of type: [ ";
566             foreach my $t ( @types ) {
567             print $t->shortctype() . " ";
568 11         15 }
569 11         118 print "]\n";
570             }
571              
572 20 100       35 $k = 0;
573             for (@explicit_cols) {
574 2         5 # Using mixed list+ndarray data structure for performance tradeoff
  2         120  
575             # between memory usage (perl list) and speed of IO (PDL operations)
576 18 50       242 if (ref($_) eq "ARRAY") {
577 18 50       42 # use multicolumn ndarray here
578             push @ret, [ $class->zeroes($types[$k++],scalar(@{$_}),1), [] ];
579             } else {
580 11         20 push @ret, ($is_perl_col[$_] ? [ [], [] ] : [ $class->zeroes($types[$k],1), [] ]);
  0         0  
581             $k++ unless $is_perl_col[$_];
582 11         69 }
583             }
584             for (@end_perl_cols) { push @ret, [ [], [] ]; }
585              
586 41         43 $line_store = [ $class->zeroes(long,1), [] ]; # only need to store integers
587 41 100       56 }
588 10         25  
  28         42  
589 10         18 # if necessary, extend PDL in buffered manner
590             $k = 0;
591             if ( $pdlsize < $index ) {
592             for (@ret, $line_store) { _ext_lastD( $_->[0], $extend ); }
593 41         60 $pdlsize += $extend;
594 41         46 }
595 73 100       88  
596 9         7 # - stick perl arrays onto end of $ret
  9         47  
597             $k = 0;
598 64         50 for (@explicit_cols, @end_perl_cols) {
  64         153  
599             if (ref($_) eq "ARRAY") {
600             push @{ $ret[$k++]->[1] }, [ @v[ @$_ ] ];
601             } else {
602             push @{ $ret[$k++]->[1] }, $v[$_];
603 41         37 }
  41         51  
604             }
605              
606 41 100       56 # store the line number
607 11         17 push @{$line_store->[1]}, $line_num;
  31         51  
608 11         16  
609             # need to burp out list if needed
610             if ( $index >= $nextburpindex ) {
611             for (@ret, $line_store) { _burp_1D($_,$index); }
612 41 50 33     259 $nextburpindex = $index + $chunksize;
613             }
614              
615             # Thanks to Frank Samuelson for this
616             last if defined($line_end) and $line_num == $line_end;
617 12 100       137 }
618              
619             }
620              
621 12         21 close($fh) unless $is_handle;
622 32 100 100     50  
  31         82  
623 32         83 # burp one final time if needed and
624             # clean out additional ARRAY ref level for @ret
625             for (@ret, $line_store) {
626             _burp_1D($_,$index) if defined $_ and scalar @{$_->[1]};
627 12 100       93 $_ = $_->[0];
628 1 50       3 }
629 1 50       3  
630 0         0 # have we read anything in? if not, return empty ndarrays
631 0 0       0 if ( $index == -1 ) {
632 0         0 print "Warning: rcols() did not read in any data.\n" if $options->{VERBOSE};
633             if ( wantarray ) {
634 0         0 foreach ( 0 .. $#explicit_cols ) {
635             if ( $is_perl_col[$_] ) {
636             $ret[$_] = PDL->null;
637 0         0 } else {
  0         0  
638 0         0 $ret[$_] = [];
639             }
640 1         8 }
641             for ( @end_perl_cols ) { push @ret, []; }
642             return ( @ret );
643             } else {
644             return PDL->null;
645             }
646             }
647 11 50 66     53  
      66        
648 10         18 # if the user has asked for lines => 0:-1 or 0:10 or 1:10 or 1:-1,
649             # - ie not reversed and the last line number is known -
650 18 50       160 # then we can skip the following nastiness
651             if ( $line_rev == 0 and $index_start >= 0 and $index_end >= -1 ) {
652 10 50       30 for (@ret) {
653 0 0       0 ## $_ = $_->mv(-1,0)->slice("0:${index}")->mv(0,-1) unless ref($_) eq 'ARRAY';
654 0         0 $_ = $_->mv(-1,0)->slice("0:${index}") unless ref($_) eq 'ARRAY'; # cols are dim(0)
  0         0  
655             };
656 0         0 if ( $options->{VERBOSE} ) {
657             if ( ref($ret[0]) eq 'ARRAY' ) {
658             print "Read in ", scalar( @{ $ret[0] } ), " elements.\n";
659 10 100       248 } else {
660             print "Read in ", $ret[0]->nelem, " elements.\n";
661             }
662             }
663             wantarray ? return(@ret) : return $ret[0];
664 1         5 }
665              
666             # Work out which line numbers we want. First we clean up the ndarray
667 1 50       2 # containing the line numbers that have been read in
668 1 50 33     8 $line_store = $line_store->slice("0:${index}");
    50          
669 0         0  
670 0         0 # work out the min/max line numbers required
671 0         0 if ( $line_rev ) {
672             if ( defined($line_start) and defined($line_end) ) {
673 1         1 my $dummy = $line_start;
674             $line_start = $line_end;
675 0         0 $line_end = $dummy;
676             } elsif ( defined($line_start) ) {
677             $line_end = $line_start;
678 1 50       4 } else {
679 1 50       3 $line_start = $line_end;
680             }
681 1         2 }
682             $line_start = $line_num + 1 + $index_start if $index_start < 0;
683 22     22   420 $line_end = $line_num + 1 + $index_end if $index_end < 0;
  22         55  
  22         80037  
  1         1  
684 1 50       3  
685 1         7 my $indices;
686              
687 0         0 { no warnings 'precedence';
688             if ( $line_rev ) {
689             $indices = which( $line_store >= $line_end & $line_store <= $line_start )->slice('-1:0');
690             } else {
691             $indices = which( $line_store >= $line_start & $line_store <= $line_end );
692 1         5 }
693 2 50       5 }
694 0         0  
695 0         0 # truncate the ndarrays
696             for my $col ( @explicit_cols ) {
697             if ( ref($col) eq "ARRAY" ) {
698 2 50       16 for ( @$col ) {
699             $ret[$_] = $ret[$_]->index($indices);
700             }
701             } else {
702 1         6 $ret[$col] = $ret[$col]->index($indices) unless $is_perl_col[$col] };
703 1         2 }
704 2 50       5  
705 0         0 # truncate/reverse/etc the perl arrays
  0         0  
706 0         0 my @indices_array = list $indices;
707 0         0 foreach ( @explicit_cols, @end_perl_cols ) {
  0         0  
  0         0  
708             if ( $is_perl_col[$_] ) {
709             my @temp = @{ $ret[$_] };
710             $ret[$_] = [];
711             foreach my $i ( @indices_array ) { push @{ $ret[$_] }, $temp[$i] };
712 1 50       4 }
713 0         0 }
714 0         0  
715 0 0       0 # print some diagnostics
716 0 0       0 if ( $options->{VERBOSE} ) {
717 0         0 my $done = 0;
718 0         0 foreach my $col (@explicit_cols) {
719             last if $done;
720 0         0 next if $is_perl_col[$col];
721 0 0       0 print "Read in ", $ret[$col]->nelem, " elements.\n";
722 0         0 $done = 1;
723 0         0 }
724             foreach my $col (@explicit_cols, @end_perl_cols) {
725             last if $done;
726             print "Read in ", $ret[$col]->nelem, " elements.\n";
727             $done = 1;
728 1         2 }
729 2 50       5 }
730 2 50       11  
731             # fix 2D pdls to match what wcols generates
732             foreach my $col (@ret) {
733 1 50       27 next if ref($col) eq "ARRAY";
734             $col = $col->transpose if $col->ndims == 2;
735             }
736              
737             wantarray ? return(@ret) : return $ret[0];
738             }
739              
740              
741             =head2 wcols
742              
743             =for ref
744              
745             Write ASCII columns into file from 1D or 2D ndarrays and/or 1D listrefs efficiently.
746              
747             Can take file name or *HANDLE, and if no file/filehandle is given defaults to STDOUT.
748              
749             Options (case insensitive):
750              
751             HEADER - prints this string before the data. If the string
752             is not terminated by a newline, one is added. (default B<''>).
753              
754             COLSEP - prints this string between columns of data. Defaults to
755             $PDL::IO::Misc::defcolsep.
756              
757             FORMAT - A printf-style format string that is cycled through
758             column output for user controlled formatting.
759              
760             =for usage
761              
762             Usage: wcols $data1, $data2, $data3,..., *HANDLE|"outfile", [\%options]; # or
763             wcols $format_string, $data1, $data2, $data3,..., *HANDLE|"outfile", [\%options];
764              
765             where the $dataN args are either 1D ndarrays, 1D perl array refs,
766             or 2D ndarrays (as might be returned from rcols() with the [] column
767             syntax and/or using the PERLCOLS option). dim(0) of all ndarrays
768             written must be the same size. The printf-style $format_string,
769             if given, overrides any FORMAT key settings in the option hash.
770              
771             e.g.,
772              
773             =for example
774              
775             $x = random(4); $y = ones(4);
776             wcols $x, $y+2, 'foo.dat';
777             wcols $x, $y+2, *STDERR;
778             wcols $x, $y+2, '|wc';
779              
780             $x = sequence(3); $y = zeros(3); $c = random(3);
781             wcols $x,$y,$c; # Orthogonal version of 'print $x,$y,$c' :-)
782              
783             wcols "%10.3f", $x,$y; # Formatted
784             wcols "%10.3f %10.5g", $x,$y; # Individual column formatting
785              
786             $x = sequence(3); $y = zeros(3); $units = [ 'm/sec', 'kg', 'MPH' ];
787             wcols $x,$y, { HEADER => "# x y" };
788             wcols $x,$y, { Header => "# x y", Colsep => ', ' }; # case insensitive option names!
789             wcols " %4.1f %4.1f %s",$x,$y,$units, { header => "# Day Time Units" };
790              
791             $a52 = sequence(5,2); $y = ones(5); $c = [ 1, 2, 4 ];
792             wcols $a52; # now can write out 2D pdls (2 columns data in output)
793             wcols $y, $a52, $c # ...and mix and match with 1D listrefs as well
794              
795             NOTES:
796              
797             1. Columns are separated by whitespace by default, use
798             C<$PDL::IO::Misc::defcolsep> to modify the default value or
799             the COLSEP option
800              
801             2. Support for the C<$PDL::IO::Misc::colsep> global value
802             of PDL-2.4.6 and earlier is maintained but the initial value
803             of the global is undef until you set it. The value will be
804             then be picked up and used as if defcolsep were specified.
805              
806             3. Dim 0 corresponds to the column data dimension for both
807             rcols and wcols. This makes wcols the reverse operation
808             of rcols.
809              
810 4 50   4 0 4700 =cut
811              
812             *wcols = \&PDL::wcols;
813 4 50       15  
814             sub PDL::wcols {
815             barf 'Usage: wcols($optional_format_string, 1_or_2D_pdls, *HANDLE|"filename", [\%options])' if @_<1;
816 4         10  
817 4         0 # handle legacy colsep variable
818 4 100       15 $usecolsep = (defined $colsep) ? $colsep : $defcolsep;
819 2         4  
820 2         9 # if last argument is a reference to a hash, parse the options
821 2 50       19 my ($format_string, $step, $fh);
  0 50       0  
    50          
822 0         0 my $header;
823 2         6 if ( ref( $_[-1] ) eq "HASH" ) {
824             my $opt = pop;
825 0         0 foreach my $key ( sort keys %$opt ) {
826             if ( $key =~ /^H/i ) { $header = $opt->{$key}; } # option: HEADER
827             elsif ( $key =~ /^COLSEP/i ) { $usecolsep = $opt->{$key}; } # option: COLSEP
828             elsif ( $key =~ /^FORMAT/i ) { $format_string = $opt->{$key}; } # option: FORMAT
829 4 100 100     20 else {
830 3 100       11 print "Warning: wcols does not understand option <$key>.\n";
831             }
832 3         6 }
833 3         25 }
834 3         4 if (ref(\$_[0]) eq "SCALAR" || $format_string) {
835             $format_string = shift if (ref(\$_[0]) eq "SCALAR");
836 4         7 # 1st arg not ndarray, explicit format string overrides option hash FORMAT
837 4         4 $step = $format_string;
838 4   33     30 $step =~ s/(%%|[^%])//g; # use step to count number of format items
839             $step = length ($step);
840             }
841 4 50       8 my $file = $_[-1];
842 4         6 my $file_opened;
  4         7  
843             my $is_handle = !UNIVERSAL::isa($file,'PDL') &&
844             !UNIVERSAL::isa($file,'ARRAY') &&
845 0 0       0 _is_io_handle $file;
846 0         0 if ($is_handle) { # file handle passed directly
847 0 0       0 $fh = $file; pop;
848 0 0 0     0 }
849 0 0       0 else{
850             if (ref(\$file) eq "SCALAR") { # Must be a file name
851 0         0 $fh = gensym;
852 0         0 if (!$is_handle) {
853             $file = ">$file" unless $file =~ /^\|/ or $file =~ /^\>/;
854             open $fh, $file or barf "File $file can not be opened for writing\n";
855             }
856 0         0 pop;
857             $file_opened = 1;
858             }
859             else{ # Not a filehandle or filename, assume something else
860 4         7 # (probably ndarray) and send to STDOUT
861 4 50       35 $fh = *STDOUT;
  0         0  
862 4         6 }
863 4         8 }
864 8 50       14  
865 0 0       0 my @p = @_;
  0         0  
866 0         0 my $n = (ref $p[0] eq 'ARRAY') ? $#{$p[0]}+1 : $p[0]->dim(0);
867             my @dogp = (); # need to break 2D pdls into a their 1D pdl components
868 8 50 33     34 for (@p) {
869 8 50       14 if ( ref $_ eq 'ARRAY' ) {
870 0         0 barf "wcols: 1D args must have same number of elements\n" if scalar(@{$_}) != $n;
871             push @dogp, $_;
872 8         9 } else {
873             barf "wcols: 1D args must have same number of elements\n" if $_->dim(0) != $n or $_->getndims > 2;
874             if ( $_->getndims == 2 ) {
875             push @dogp, $_->dog;
876 4 50       6 } else {
877 0 0       0 push @dogp, $_;
878 0         0 }
879             }
880 4         3 }
881 4         5 if ( defined $header ) {
882 4         6 $header .= "\n" unless $header =~ m/\n$/;
883 16 100       16 print $fh $header;
884 12         12 }
885 12         12 my $i;
886 12         13 my $pcnt = scalar @dogp;
887 24         21 for ($i=0; $i<$n; $i++) {
  24         20  
888 24 100       32 if ($format_string) {
889 12         34 my @d;
890 12 50       17 my $pdone = 0;
891 12         24 for (@dogp) {
892             push @d,_at_1D($_,$i); $pdone++;
893             if (@d == $step) {
894 12 50 33     37 printf $fh $format_string,@d;
895 0         0 printf $fh $usecolsep unless $pdone==$pcnt;
896 0 0       0 $#d = -1;
897 0         0 }
898             }
899 0         0 if (@d && !$i) {
900             my $str;
901 0         0 if ($#dogp>0) {
902             $str = ($#dogp+1).' columns don\'t';
903 0         0 } else {
904             $str = '1 column doesn\'t';
905             }
906             $str .= " fit in $step column format ".
907             '(even repeated) -- discarding surplus';
908 4         4 carp $str;
909 4         4 # printf $fh $format_string,@d;
910 8         5 # printf $fh $usecolsep;
911 8 100       13 }
912             } else {
913             my $pdone = 0;
914 16         54 for (@dogp) {
915             $pdone++;
916 4 50       8 print $fh _at_1D($_,$i) . ( ($pdone==$pcnt) ? '' : $usecolsep );
917 4         14 }
918             }
919             print $fh "\n";
920             }
921             close($fh) if $file_opened;
922             return 1;
923             }
924              
925             =head2 swcols
926              
927             =for ref
928              
929             generate string list from C format specifier and a list of ndarrays
930              
931             C takes an (optional) format specifier of the printf
932             sort and a list of 1D ndarrays as input. It returns a perl
933             array (or array reference if called in scalar context)
934             where each element of the array is the string generated by
935             printing the corresponding element of the ndarray(s) using
936             the format specified. If no format is specified it uses the
937             default print format.
938              
939             =for usage
940              
941             Usage: @str = swcols format, pdl1,pdl2,pdl3,...;
942             or $str = swcols format, pdl1,pdl2,pdl3,...;
943              
944 0     0 0 0 =cut
945              
946 0         0 *swcols = \&PDL::swcols;
947              
948 0 0       0 sub PDL::swcols{
949 0         0 my ($format_string,$step);
950 0         0  
951 0         0 my @outlist;
952              
953             if (ref(\$_[0]) eq "SCALAR") {
954 0         0 $step = $format_string = shift; # 1st arg not ndarray
955 0 0       0 $step =~ s/(%%|[^%])//g; # use step to count number of format items
  0         0  
956 0         0 $step = length ($step);
957 0 0       0 }
958 0 0       0  
  0         0  
959             my @p = @_;
960 0 0 0     0 my $n = (ref $p[0] eq 'ARRAY') ? $#{$p[0]}+1 : $p[0]->nelem;
961             for (@p) {
962             if ( ref $_ eq 'ARRAY' ) {
963             barf "swcols: 1D args must have same number of elements\n" if scalar(@{$_}) != $n;
964 0         0 } else {
965 0         0 barf "swcols: 1D args must have same number of elements\n" if $_->nelem != $n or $_->getndims!=1;
966 0 0       0 }
967 0         0 }
968 0         0  
969 0         0 my $i;
970 0 0       0 for ($i=0; $i<$n; $i++) {
971 0         0 if ($format_string) {
972 0         0 my @d;
973             for (@p) {
974             push @d,_at_1D($_,$i);
975 0 0 0     0 if (@d == $step) {
976 0         0 push @outlist,sprintf $format_string,@d;
977 0 0       0 $#d = -1;
978 0         0 }
979             }
980 0         0 if (@d && !$i) {
981             my $str;
982 0         0 if ($#p>0) {
983             $str = ($#p+1).' columns don\'t';
984 0         0 } else {
985             $str = '1 column doesn\'t';
986             }
987             $str .= " fit in $step column format ".
988             '(even repeated) -- discarding surplus';
989 0         0 carp $str;
990 0         0 # printf $fh $format_string,@d;
991             # printf $fh $usecolsep;
992             }
993             } else {
994 0 0       0 for (@p) {
995             push @outlist,sprintf _at_1D($_,$i),$usecolsep;
996             }
997             }
998             }
999             wantarray ? return @outlist: return \@outlist;
1000             }
1001              
1002              
1003             =head2 rgrep
1004              
1005             =for ref
1006              
1007             Read columns into ndarrays using full regexp pattern matching.
1008              
1009             Options:
1010              
1011             =for options
1012              
1013             UNDEFINED: This option determines what will be done for undefined
1014             values. For instance when reading a comma-separated file of the type
1015             C<1,2,,4> where the C<,,> indicates a missing value.
1016             The default value is to assign C<$PDL::undefval> to undefined values,
1017             but if C is set this is used instead. This would normally
1018             be set to a number, but if it is set to C and PDL is compiled
1019             with Badvalue support (see L) then undefined values are set to
1020             the appropriate badvalue and the column is marked as bad.
1021             DEFTYPE: Sets the default type of the columns - see the documentation for
1022             L
1023             TYPES: A reference to a Perl array with types for each column - see
1024             the documentation for L
1025             BUFFERSIZE: The number of lines to extend the ndarray by. It might speed
1026             up the reading a little bit by setting this to the number of lines in the
1027             file, but in general L is a better choice
1028              
1029             Usage
1030              
1031             =for usage
1032              
1033             ($x,$y,...) = rgrep(sub, *HANDLE|"filename")
1034              
1035             e.g.
1036              
1037             =for example
1038              
1039             ($x,$y) = rgrep {/Foo (.*) Bar (.*) Mumble/} $file;
1040              
1041 1 50   1 1 1474 i.e. the vectors C<$x> and C<$y> get the progressive values
1042             of C<$1>, C<$2> etc.
1043              
1044 1         3 =cut
  1         2  
1045 1         2  
1046             sub rgrep (&@) {
1047 1         5 barf 'Usage ($x,$y,...) = rgrep(sub, *HANDLE|"filename", [{OPTIONS}])'
1048 1 50       12 if $#_ > 2;
1049 1 50 0     3  
1050             my (@ret,@v,$nret); my ($m,$n)=(-1,0); # Count/PDL size
1051 1 50       4 my $pattern = shift;
1052 0         0  
1053             my $is_handle = _is_io_handle $_[0];
1054             my $fh = $is_handle ? $_[0] : gensym;
1055             open $fh, $_[0] or die "File $_[0] not found\n" unless $is_handle;
1056 1         12  
1057             if (ref($pattern) ne "CODE") {
1058             die "Got a ".ref($pattern)." for rgrep?!";
1059             }
1060              
1061              
1062             # set up default options
1063 1   50     6 my $opt = PDL::Options->new( {
1064 1         7 DEFTYPE => $deftype,
1065             TYPES => [],
1066 1         3 UNDEFINED => $PDL::undefval,
1067             BUFFERSIZE => 10000
1068             } );
1069             # Check if the user specified options
1070 1         3 my $u_opt = $_[1] || {};
1071             $opt->options( $u_opt);
1072 1 50       6  
1073             my $options = $opt->current();
1074 1         20  
1075 5 50       10 # If UNDEFINED is set to .*bad.* then undefined are set to
1076             # bad - unless we have a Perl that is not compiled with Bad support
1077 5         64 my $undef_is_bad = ($$options{UNDEFINED} =~ /bad/i);
1078 5 100       8 barf "Unknown PDL type given for DEFTYPE.\n"
1079 1         2 unless ref($$options{DEFTYPE}) eq "PDL::Type";
1080              
1081             while(<$fh>) {
1082 1         26 next unless @v = &$pattern;
1083 1         4  
1084             $m++; # Count got
1085 2         29 if ($m==0) {
1086             $nret = $#v; # Last index of values to return
1087              
1088 4 50       10 # Handle various columns as in rcols - added 18/04/05
1089             my @types = _handle_types( $nret, $$options{DEFTYPE}, $$options{TYPES} );
1090 5 100       8 for (0..$nret) {
1091 1         2 # Modified 18/04/05 to use specified precision.
1092 2         11 $ret[$_] = [ PDL->zeroes($types[$_], 1), [] ];
1093             }
1094 1         4 } else { # perhaps should only carp once...
1095             carp "Non-rectangular rgrep" if $nret != $#v;
1096 5         8 }
1097             if ($n<$m) {
1098             for (0..$nret) {
1099             _ext_lastD( $ret[$_]->[0], $$options{BUFFERSIZE} ); # Extend PDL in buffered manner
1100 10 50       14 }
1101             $n += $$options{BUFFERSIZE};
1102 0 0       0 }
1103 0         0 for(0..$nret) {
1104             # Set values - '1*' is to ensure numeric
1105 0         0 # We now (JB - 18/04/05) also check for defined values or not
1106             # Ideally this should include Badvalue support..
1107 0         0 if ($v[$_] eq '') {
1108             # Missing value - let us treat this specially
1109             if ($undef_is_bad) {
1110 10         26 set $ret[$_]->[0], $m, $$options{DEFTYPE}->badvalue();
1111             # And set bad flag on $ref[$_]!
1112             $ret[$_]->[0]->badflag(1);
1113             } else {
1114             set $ret[$_]->[0], $m, $$options{UNDEFINED};
1115 1 50       4 }
1116 1         2 } else {
  2         9  
1117 1 50       18 set $ret[$_]->[0], $m, 1*$v[$_];
1118             }
1119             }
1120             }
1121              
1122             close($fh) unless $is_handle;
1123             for (@ret) { $_ = $_->[0]->slice("0:$m")->copy; }; # Truncate
1124             wantarray ? return(@ret) : return $ret[0];
1125             }
1126              
1127             =head2 isbigendian
1128              
1129             =for ref
1130              
1131             Determine endianness of machine - returns 0 or 1 accordingly
1132              
1133             =cut
1134              
1135             !NO!SUBS!
1136              
1137             # $isbigendian is set up at top of file
1138 7946     7946 0 35689 pp_addpm( "sub PDL::isbigendian { return $isbigendian; };\n*isbigendian = \\&PDL::isbigendian;\n");
1139             pp_add_exported("", "isbigendian");
1140              
1141             ################################ XS CODE ######################################
1142              
1143             pp_add_exported('', 'rcube');
1144             pp_addpm(<<'EOPM');
1145             =head2 rcube
1146              
1147             =for ref
1148              
1149             Read list of files directly into a large data cube (for efficiency)
1150              
1151             =for usage
1152              
1153             $cube = rcube \&reader_function, @files;
1154              
1155             =for example
1156              
1157             $cube = rcube \&rfits, glob("*.fits");
1158              
1159             This IO function allows direct reading of files into a large data cube,
1160             Obviously one could use cat() but this is more memory efficient.
1161              
1162             The reading function (e.g. rfits, readfraw) (passed as a reference)
1163             and files are the arguments.
1164              
1165             The cube is created as the same X,Y dims and datatype as the first
1166             image specified. The Z dim is simply the number of images.
1167              
1168             =cut
1169              
1170             sub rcube {
1171              
1172 0     0 1   my $reader = shift;
1173              
1174 0 0         barf "Usage: blah" unless ref($reader) eq "CODE";
1175              
1176 0           my $k=0;
1177 0           my ($im,$cube,$nx,$ny);
1178 0           my $nz = scalar(@_);
1179              
1180 0           for my $file (@_) {
1181 0 0         print "Slice ($k) - reading file $file...\n" if $PDL::verbose;
1182 0           $im = &$reader($file);
1183 0           ($nx, $ny) = dims $im;
1184 0 0         if ($k == 0) {
1185 0 0         print "Creating $nx x $ny x $nz cube...\n" if $PDL::verbose;
1186 0           $cube = $im->zeroes($im->type,$nx,$ny,$nz);
1187             }
1188             else {
1189 0 0 0       barf "Dimensions do not match for file $file!\n" if
1190             $im->getdim(0) != $nx or $im->getdim(1) != $ny ;
1191              
1192             }
1193 0           $cube->slice(":,:,($k)") .= $im;
1194 0           $k++;
1195             }
1196              
1197 0           return $cube;
1198             }
1199              
1200              
1201             EOPM
1202              
1203             pp_def('rasc',
1204             Pars => '[o] nums(n); int [o] ierr(n)',
1205             OtherPars => 'PerlIO *fp; IV num => n',
1206             PMFunc => '',
1207             PMCode => <<'EOF',
1208             sub rasc {PDL->rasc(@_)}
1209             sub PDL::rasc {
1210             my ($pdl, $file, $num) = @_;
1211             $num = -1 unless defined $num;
1212             my $is_openhandle = defined fileno $file;
1213             my $fi;
1214             if ($is_openhandle) {
1215             $fi = $file;
1216             } else {
1217             barf 'usage: rasc $pdl, "filename"|FILEHANDLE, [$num_to_read]'
1218             if !defined $file || ref $file;
1219             open $fi, "<", $file or barf "Can't open $file";
1220             }
1221             $pdl->_rasc_int(my $ierr=null,$fi,$num);
1222             close $fi unless $is_openhandle;
1223             return all $ierr > 0;
1224             }
1225             EOF
1226             GenericTypes => ['F','D'],
1227             CHeader => <<'EOH', ###### Read ASCII Function ##########
1228             #define SWALLOWLINE(fp) while ((s = PerlIO_getc(fp)) != '\n' && s != EOF)
1229             #define TRAILING_WHITESPACE_CHECK(s, label) \
1230             if (s!=' ' && s!='\t' && s!='\r' && s!='\n' && s!=',') { nread = -1; goto label; }
1231             EOH
1232             Code => <<'EOF',
1233             int ns, i, j;
1234             ns = $SIZE(n);
1235             broadcastloop %{
1236             for (i=0;i
1237             $GENERIC(nums) f = 0;
1238             int nread = 0;
1239             int s = PerlIO_getc($COMP(fp));
1240             int afterp = 0, aftere=0;
1241             int expo = 0;
1242             $GENERIC(nums) sig = 1.0, esig = 1.0;
1243             $GENERIC(nums) div = 1.0;
1244             if (s == EOF) { nread = 0; goto endread_$GENERIC(nums); }
1245             while (1) {
1246             if (s == EOF) { nread = 0; goto endread_$GENERIC(nums); } /* signal end of line */
1247             if (s == '#')
1248             SWALLOWLINE($COMP(fp));
1249             if ((s >='0' && s <='9') || s =='.' || s == 'e' || s == 'E'
1250             || s == '+' || s == '-') break;
1251             if (s!=' ' && s!='\t' && s!='\r' && s!='\n' && s!=',')
1252             { nread = -1; goto endread_$GENERIC(nums); } /* garbage */
1253             s = PerlIO_getc($COMP(fp)); /* else skip whitespace */
1254             }
1255             /* parse number */
1256             while (1) {
1257             switch (s) {
1258             case '0': case '1': case '2': case '3': case '4':
1259             case '5': case '6': case '7': case '8': case '9':
1260             if (aftere)
1261             expo = (expo*10) + (s - '0');
1262             else if (afterp) {
1263             div /= 10.0;
1264             f += div*(s - '0');
1265             } else
1266             f = (f*10) + (s - '0');
1267             break;
1268             case '+':
1269             /* ignore */
1270             break;
1271             case '-':
1272             if (aftere)
1273             esig = -1;
1274             else
1275             sig = -1;
1276             break;
1277             case 'e': case 'E':
1278             if (aftere) { nread = -1; goto endread_$GENERIC(nums); }
1279             aftere = 1;
1280             break;
1281             case '.':
1282             if (afterp || aftere) { nread = -1; goto endread_$GENERIC(nums); }
1283             afterp = 1;
1284             break;
1285             default:
1286             goto endread_$GENERIC(nums);
1287             break;
1288             }
1289             nread++;
1290             s = PerlIO_getc($COMP(fp));
1291             }
1292             endread_$GENERIC(nums):
1293             if (($ierr(n=>i) = nread) <= 0)
1294             break;
1295             f *= sig;
1296             int i2;
1297             for (i2=0;i2
1298             f *= (esig > 0 ? 10.0 : 0.1);
1299             $nums(n=>i) = f;
1300             TRAILING_WHITESPACE_CHECK(s, endread_$GENERIC(nums));
1301             }
1302             for (j=i+1; j
1303             $ierr(n=>j) = $ierr(n=>i); /* inherit error flags */
1304             %}
1305             EOF
1306             Doc => <<'EOF',
1307             =for ref
1308              
1309             Simple function to slurp in ASCII numbers quite quickly,
1310             although error handling is marginal (to nonexistent).
1311              
1312             =for usage
1313              
1314             $pdl->rasc("filename"|FILEHANDLE [,$noElements]);
1315              
1316             Where:
1317             filename is the name of the ASCII file to read or open file handle
1318             $noElements is the optional number of elements in the file to read.
1319             (If not present, all of the file will be read to fill up $pdl).
1320             $pdl can be of type float or double (for more precision).
1321              
1322             =for example
1323              
1324             # (test.num is an ascii file with 20 numbers. One number per line.)
1325             $in = PDL->null;
1326             $num = 20;
1327             $in->rasc('test.num',20);
1328             $imm = zeroes(float,20,2);
1329             $imm->rasc('test.num');
1330             EOF
1331             );
1332              
1333             pp_done();