File Coverage

blib/lib/PDL/IO/FastRaw.pm
Criterion Covered Total %
statement 91 102 89.2
branch 24 38 63.1
condition 13 26 50.0
subroutine 12 14 85.7
pod 3 8 37.5
total 143 188 76.0


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             PDL::IO::FastRaw -- A simple, fast and convenient io format for PerlDL.
4              
5             =head1 SYNOPSIS
6              
7             use PDL;
8             use PDL::IO::FastRaw;
9              
10             writefraw($pdl,"fname"); # write a raw file
11              
12             $pdl2 = readfraw("fname"); # read a raw file
13             $pdl2 = PDL->readfraw("fname");
14              
15             gluefraw($pdlx, "fname"); # append to existing file
16             $pdlx->gluefraw("fname");
17              
18             $pdl3 = mapfraw("fname2",{ReadOnly => 1}); # mmap a file, don't read yet
19              
20             $pdl4 = maptextfraw("fname3",{...}); # map a text file into a 1-D pdl.
21              
22              
23             =head1 DESCRIPTION
24              
25             This is a very simple and fast io format for PerlDL.
26             The disk data consists of two files, a header metadata file
27             in ASCII and a binary file consisting simply of consecutive
28             bytes, shorts or whatever.
29              
30             It is hoped that this will not only make for a simple PerlDL module
31             for saving and retrieving these files but also make it easy
32             for other programs to use these files.
33              
34             The format of the ASCII header is simply
35              
36            
37            
38             ...
39              
40             You should probably stick with the default header name. You may want
41             to specify your own header, however, such as when you have a large
42             collection of data files with identical dimensions and data types.
43             Under these circumstances, simply specify the C
option in the
44             options hash.
45              
46             The binary files are in general
47             NOT interchangeable between different architectures since the binary
48             file is simply dumped from the memory region of the ndarray.
49             This is what makes the approach efficient.
50              
51             It is also possible to mmap the file which can give a large
52             speedup in certain situations as well as save a lot of memory
53             by using a disk file as virtual memory. When a file is mapped,
54             parts of it are read only as they are accessed in the memory
55             (or as the kernel decides: if you are reading the pages in order,
56             it may well preread some for you).
57              
58             Note that memory savings and copy-on-write are operating-system
59             dependent - see Core.xs and your operating system documentation
60             for exact semantics of whatever. Basically, if you write to a
61             mmapped file without C, the change will be reflected
62             in the file immediately. C doesn't really make it impossible
63             to write to the ndarray but maps the memory privately so the file
64             will not be changed when you change the ndarray. Be aware though
65             that mmapping a 40Mb file without C spends no virtual
66             memory but with C it does reserve 40Mb.
67              
68             =head2 Example: Converting ASCII to raw
69              
70             You have a whole slew of data files in ASCII from an experiment
71             that you ran in your lab. You're still tweaking the analysis
72             and plots, so you'd like if your data could load as fast as
73             possible. Eventually you'll read the data into your scripts
74             using C, but the first thing you might do is create
75             a script that converts all the data files to raw files:
76              
77             #!/usr/bin/perl
78             # Assumes that the data files end with a .asc or .dat extension
79             # and saves the raw file output with a .bdat extension.
80             # call with
81             # >./convert_to_raw.pl file1.dat file2.dat ...
82             # or
83             # >./convert_to_raw.pl *.dat
84            
85             use PDL;
86             use PDL::IO::FastRaw; # for saving raw files
87             use PDL::IO::Misc; # for reading ASCII files with rcols
88             while(shift) { # run through the entire supplied list of file names
89             ($newName = $_) =~ s/\.(asc|dat)/.bdat/;
90             print "Saving contents of $_ to $newName\n";
91             $data = rcols($_);
92             writefraw($data, $newName);
93             }
94              
95              
96             =head2 Example: readfraw
97              
98             Now that you've got your data into a raw file format, you can
99             start working on your analysis scripts. If you scripts used C
100             in the past, the reading portion of the script should go much,
101             much faster now:
102              
103             #!/usr/bin/perl
104             # My plotting script.
105             # Assume I've specified the files to plot on the command line like
106             # >./plot_script.pl file1.bdat file2.bdat ...
107             # or
108             # >./plot_script.pl *.bdat
109            
110             use PDL;
111             use PDL::IO::FastRaw;
112             while(shift) { # run through the entire supplied list of file names
113             $data = readfraw($_);
114             my_plot_func($data);
115             }
116              
117             =head2 Example: Custom headers
118              
119             In the first example, I allow C to use the standard header
120             file name, which would be C. However, I often measure
121             time series that have identical length, so all of those header files
122             are redundant. To fix that, I simply pass the Header option to the
123             C command. A modified script would look like this:
124              
125             #!/usr/bin/perl
126             # Assumes that the data files end with a .asc or .dat extension
127             # and saves the raw file output with a .bdat extension.
128             # call with
129             # >./convert_to_raw.pl [-hHeaderFile] [-hHeaderFile] ...
130            
131             use PDL;
132             use PDL::IO::FastRaw; # for saving raw files
133             use PDL::IO::Misc; # for reading ASCII files with rcols
134             my $header_file = undef;
135             CL_OPTION: while($_ = shift @ARGV) { # run through the entire list of command-line options
136             if(/-h(.*)/) {
137             $header_file = $1;
138             next CL_OPTION;
139             }
140             ($newName = $_) =~ s/\.(asc|dat)/.bdat/;
141             print "Saving contents of $_ to $newName\n";
142             $data = rcols($_);
143             writefraw($data, $newName, {Header => $header_file});
144             }
145              
146             Modifying the read script is left as an exercise for the reader. :]
147              
148              
149             =head2 Example: Using mapfraw
150              
151             Sometimes you'll want to use C rather than the read/write
152             functions. In fact, the original author of the module doesn't
153             use the read/write functions anymore, prefering to always use
154             C. How would you go about doing this?
155              
156             Assuming you've already saved your data into the raw format, the
157             only change you would have to make to the script in example 2 would
158             be to change the call to C to C. That's it.
159             You will probably see differences in performance, though I (David
160             Mertens) couldn't tell you about them because I haven't played
161             around with C much myself.
162              
163             What if you eschew the use of C and prefer to only use
164             C? How would you save your data to a raw format? In that
165             case, you would have to create a C ndarray with the correct
166             dimensions first using
167              
168             $ndarray_on_hd = mapfraw('fname', {Creat => 1, Dims => [dim1, dim2, ...]});
169              
170             Note that you must specify the dimensions and you must tell
171             C to create the new ndarray for you by setting the
172             C option to a true value, not C (note the missing
173             final 'e').
174              
175             =head1 FUNCTIONS
176              
177             =cut
178              
179             package PDL::IO::FastRaw;
180 1     1   32142 use strict;
  1         3  
  1         42  
181 1     1   6 use warnings;
  1         3  
  1         112  
182              
183             our $VERSION = '0.000003';
184             $VERSION = eval $VERSION;
185              
186             require Exporter;
187 1     1   7 use PDL::Core '';
  1         2  
  1         11  
188 1     1   7 use PDL::Exporter;
  1         2  
  1         9  
189              
190             our @ISA = qw/PDL::Exporter/;
191             our @EXPORT_OK = qw/writefraw readfraw mapfraw maptextfraw gluefraw/;
192             our %EXPORT_TAGS = (Func=>\@EXPORT_OK);
193              
194             # Exported functions
195              
196             *writefraw = \&PDL::writefraw;
197             *gluefraw = \&PDL::gluefraw;
198 5     5 1 1405 sub readfraw {PDL->readfraw(@_)}
199 3     3 1 497 sub mapfraw {PDL->mapfraw(@_)}
200 0     0 1 0 sub maptextfraw {PDL->maptextfraw(@_)}
201              
202             sub _read_frawhdr {
203 8     8   21 my($name,$opts) = @_;
204 8   66     47 my $hname = $opts->{Header} || "$name.hdr";
205 8 50       285 open my $h, '<', $hname
206             or barf "Couldn't open '$hname' for reading: $!";
207 8         209 chomp(my $tid = <$h>);
208 8         25 chomp(my $ndims = <$h>);
209 8 50       18 chomp(my $str = <$h>); if(!defined $str) {barf("Format error in '$hname'");}
  8         26  
  0         0  
210 8         30 my @dims = split ' ',$str;
211 8 50       30 if($#dims != $ndims-1) {
212 0         0 barf("Format error reading fraw header file '$hname'");
213             }
214             return {
215 8         211 Type => $tid,
216             Dims => \@dims,
217             NDims => $ndims
218             };
219             }
220              
221             sub _writefrawhdr {
222 7     7   19 my($pdl,$name,$opts) = @_;
223 7   66     38 my $hname = $opts->{Header} || "$name.hdr";
224 7 50       1020 open my $h, '>', $hname
225             or barf "Couldn't open '$hname' for writing: $!";
226             print $h map "$_\n", $pdl->get_datatype,
227             $opts->{NDims} // $pdl->getndims,
228 7 100 66     426 join(' ', $opts->{Dims} ? @{$opts->{Dims}} : $pdl->dims);
  2         288  
229             }
230              
231             =head2 writefraw
232              
233             =for ref
234              
235             Write a raw format binary file
236              
237             =for usage
238              
239             writefraw($pdl,"fname");
240             writefraw($pdl,"fname", {Header => 'headerfname'});
241              
242             =for options
243              
244             The C command
245             supports the following option:
246              
247             =over 8
248              
249             =item Header
250              
251             Specify the header file name.
252              
253             =back
254              
255             =cut
256              
257             sub PDL::writefraw {
258 5     5 0 43 my($pdl,$name,$opts) = @_;
259 5         17 _writefrawhdr($pdl,$name,$opts);
260 5 50       615 open my $d, '>', $name
261             or barf "Couldn't open '$name' for writing: $!";
262 5         18 binmode $d;
263 5         12 print $d ${$pdl->get_dataref};
  5         340  
264             }
265              
266             =head2 readfraw
267              
268             =for ref
269              
270             Read a raw format binary file
271              
272             =for usage
273              
274             $pdl2 = readfraw("fname");
275             $pdl2 = PDL->readfraw("fname");
276             $pdl2 = readfraw("fname", {Header => 'headerfname'});
277              
278             =for options
279              
280             The C command
281             supports the following option:
282              
283             =over 8
284              
285             =item Header
286              
287             Specify the header file name.
288              
289             =back
290              
291             =cut
292              
293             sub PDL::readfraw {
294 5     5 0 11 my $class = shift;
295 5         16 my($name,$opts) = @_;
296 5 50       260 open my $d, '<', $name or barf "Couldn't open '$name' for reading: $!";
297 5         16 binmode $d;
298 5         17 my $hdr = _read_frawhdr($name,$opts);
299 5         48 my $pdl = $class->zeroes(PDL::Type->new($hdr->{Type}), @{$hdr->{Dims}});
  5         119  
300 5         10 my $len = length ${$pdl->get_dataref};
  5         22  
301 5         9 my $index = 0;
302 5         10 my $data;
303             my $retlen;
304 5         97 while (($retlen = sysread $d, $data, $len) != 0) {
305 5         9 substr(${$pdl->get_dataref},$index,$len) = $data;
  5         21  
306 5         10 $index += $retlen;
307 5         27 $len -= $retlen;
308             }
309 5         20 $pdl->upd_data();
310 5         107 return $pdl;
311             }
312              
313             =head2 gluefraw
314              
315             =for ref
316              
317             Append a single data item to an existing binary file written by
318             L. Error if dims not compatible with existing data.
319              
320             =for usage
321              
322             gluefraw($file, $pdl[, $opts]);
323              
324             =cut
325              
326             sub PDL::gluefraw {
327 1     1 0 8 my $usage = 'Usage: gluefraw($pdl,"filename"[,$opts])';
328 1         3 my ($pdl,$name,$opts) = @_;
329 1 50 33     36 barf $usage if @_ < 2 or @_ > 3 or !UNIVERSAL::isa($pdl, 'PDL') or ref $name;
      33        
      33        
330 1 50       22 barf "'$name' must be real filename: $!" if !-f $name;
331 1   50     9 $opts ||= {};
332 1         4 my $hdr = _read_frawhdr($name,$opts);
333 0         0 barf "gluefraw: ndarray has type '@{[$pdl->type]}' but file has type '$hdr->{Type}'"
334 1 50       6 if $pdl->type != PDL::Type->new($hdr->{Type});
335 1 50       5 my @dims = ref $hdr->{Dims} ? @{$hdr->{Dims}} : $hdr->{Dims};
  1         5  
336 1 50       5 barf "gluefraw: header dims needs at least 2 dims, got (@dims)" if @dims < 2;
337 1         5 my @ldims = @dims[0..$#dims-1];
338 1 50       6 barf "gluefraw: incompatible lower dims, ndarray (@{[$pdl->dims]}) vs header (@ldims)"
  0         0  
339             if !PDL::all($pdl->shape == pdl(@ldims));
340 1 50       64 open my $d, '>>', $name or barf "Couldn't open '$name' for appending: $!";
341 1         4 binmode $d;
342 1         3 print $d ${$pdl->get_dataref};
  1         19  
343 1         3 $dims[-1]++;
344 1         6 $hdr->{Dims} = \@dims;
345 1         8 _writefrawhdr($pdl, $name, { %$opts, %$hdr });
346             }
347              
348             =head2 mapfraw
349              
350             =for ref
351              
352             Memory map a raw format binary file (see the module docs also)
353              
354             =for usage
355              
356             $pdl3 = mapfraw("fname2",{ReadOnly => 1});
357              
358             =for options
359              
360             The C command
361             supports the following options (not all combinations make sense):
362              
363             =over 8
364              
365             =item Dims, Datatype
366              
367             If creating a new file or if you want to specify your own header
368             data for the file, you can give an array reference and a scalar,
369             respectively.
370              
371             =item Creat
372              
373             Create the file. Also writes out a header for the file.
374              
375             =item Trunc
376              
377             Set the file size. Automatically enabled with C. NOTE: This also
378             clears the file to all zeroes.
379              
380             =item ReadOnly
381              
382             Disallow writing to the file.
383              
384             =item Header
385              
386             Specify the header file name.
387              
388             =back
389              
390             =cut
391              
392             sub PDL::mapfraw {
393 3     3 0 7 my $class = shift;
394 3         11 my($name,$opts) = @_;
395 3         6 my $hdr;
396 3 100       11 if($opts->{Dims}) {
397 1   33     6 $hdr->{Type} = $opts->{Datatype} // double->enum;
398 1         4 $hdr->{Dims} = $opts->{Dims};
399 1         3 $hdr->{NDims} = scalar(@{$opts->{Dims}});
  1         3  
400             } else {
401 2         6 $hdr = _read_frawhdr($name,$opts);
402             }
403 3         18 my $s = PDL::Core::howbig($hdr->{Type});
404 3         7 for(@{$hdr->{Dims}}) {
  3         10  
405 6         13 $s *= $_;
406             }
407 3         32 my $pdl = $class->zeroes(PDL::Type->new($hdr->{Type}));
408             $pdl->set_data_by_file_map(
409             $name,
410             $s,
411             1,
412             ($opts->{ReadOnly}?0:1),
413             ($opts->{Creat}?1:0),
414             (0644),
415 3 50 66     37 ($opts->{Creat} || $opts->{Trunc} ? 1:0)
    100          
    100          
416             );
417 3         21 $pdl->setdims($hdr->{Dims});
418 3         10 $pdl->set_donttouchdata;
419 3 100       13 _writefrawhdr($pdl,$name,$opts) if $opts->{Creat};
420 3         20 $pdl;
421             }
422              
423             =head2 maptextfraw
424              
425             =for ref
426              
427             Memory map a text file (see the module docs also).
428              
429             Note that this function maps the raw format so if you are
430             using an operating system which does strange things to e.g.
431             line delimiters upon reading a text file, you get the raw (binary)
432             representation.
433              
434             The file doesn't really need to be text but it is just mapped
435             as one large binary chunk.
436              
437             This function is just a convenience wrapper which firsts Cs
438             the file and sets the dimensions and datatype.
439              
440             =for usage
441              
442             $pdl4 = maptextfraw("fname", {options}
443              
444             =for options
445              
446             The options other than Dims, Datatype of C are
447             supported.
448              
449             =cut
450              
451             sub PDL::maptextfraw {
452 0     0 0   my($class, $name, $opts) = @_;
453 0           $opts = {%$opts}; # Copy just in case
454 0           my @s = stat $name;
455 0           $opts->{Dims} = [$s[7]];
456 0           $opts->{Datatype} = &PDL::byte;
457 0           return PDL::mapfraw($class, $name, $opts);
458             }
459              
460             =head1 BUGS
461              
462             Should be documented better. C and C should
463             also have options (the author nowadays only uses C ;)
464              
465             =head1 AUTHOR
466              
467             Copyright (C) Tuomas J. Lukka 1997.
468             All rights reserved. There is no warranty. You are allowed
469             to redistribute this software / documentation under certain
470             conditions. For details, see the file COPYING in the PDL
471             distribution. If this file is separated from the PDL distribution,
472             the copyright notice should be included in the file.
473              
474             =cut
475              
476             1;