File Coverage

blib/lib/PDL/IO/Storable.pm
Criterion Covered Total %
statement 101 107 94.3
branch 32 46 69.5
condition 7 14 50.0
subroutine 16 17 94.1
pod 0 9 0.0
total 156 193 80.8


line stmt bran cond sub pod time code
1             package PDL::IO::Storable;
2              
3 11     11   545 use strict;
  11         30  
  11         490  
4 11     11   60 use warnings;
  11         25  
  11         1550  
5              
6             our @EXPORT_OK = qw();
7             our %EXPORT_TAGS = (Func=>\@EXPORT_OK);
8             our $VERSION = '2.096';
9             our @ISA = ( 'PDL::Exporter' );
10              
11 11     11   121 use PDL::Core;
  11         26  
  11         381  
12 11     11   93 use PDL::Exporter;
  11         27  
  11         122  
13 11     11   67 use DynaLoader;
  11         25  
  11         641  
14              
15             =head1 NAME
16              
17             PDL::IO::Storable - helper functions to make PDL usable with serialisation packages
18              
19             =head1 SYNOPSIS
20              
21             use Storable;
22             use PDL::IO::Storable;
23             $hash = {
24             'foo' => 42,
25             'bar' => zeroes(23,45),
26             };
27             store $hash, 'perlhash.dat';
28              
29             use JSON::MaybeXS;
30             $encoder = JSON::MaybeXS->new(allow_tags => 1);
31             my $ndarray = xvals (5,2);
32             my $encoded_json = $encoder->encode ($ndarray);
33             my $decoded_ndarray = $encoder->decode ($encoded_json);
34              
35             =head1 DESCRIPTION
36              
37             Serialisation packages such as C, C, C
38             and C implement
39             object persistence for Perl data structures that can
40             contain arbitrary Perl objects. This module implements the relevant methods to
41             be able to store and retrieve ndarrays via C as well as packages that support
42             the L protocol (currently L, L and JSON packages).
43              
44             Note that packages supporting the C protocol need to have their
45             respective flags enabled so that the FREEZE and THAW callbacks are used.
46              
47             Note also that while L is supported, if it has to
48             fall back to L, it will fail. L treats the data
49             it gets back from C as items to encode, while L
50             treats the list it gets as strings already encoded. They are
51             fundamentally incompatible, so this module supports the JSON::XS
52             option.
53              
54             Finally, L is not preserved. If a data structure
55             containing ndarrays connected by data flow is serialised then this will need to be
56             explicitly reinstated on deserialisation.
57              
58             =head1 FUNCTIONS
59              
60             =cut
61              
62             { package # hide from PAUSE
63             PDL;
64 11     11   100 use Carp;
  11         39  
  11         3353  
65             # routines to make PDL work with Storable >= 1.03
66              
67             # pdlpack() serializes an ndarray, while pdlunpack() unserializes it. Earlier
68             # versions of PDL didn't control for endianness, type sizes and enumerated type
69             # values; this made stored data unportable across different architectures and
70             # PDL versions. This is no longer the case, but the reading code is still able
71             # to read the old files. The old files have no meta-information in them so it's
72             # impossible to read them correctly with 100% accuracy, but we try to make an
73             # educated guess
74             #
75             # Old data format:
76             #
77             # int type
78             # int ndims
79             # int dims[ndims]
80             # data
81             #
82             # Note that here all the sizes and endiannesses are the native. This is
83             # un-portable. Furthermore, the "type" is an enum, and its values could change
84             # between PDL versions. Here I assume that the old format input data is indeed
85             # native, so the old data files have the same portability issues, but at least
86             # things will remain working and broken in the same way they were before
87             #
88             #
89             # New format:
90             #
91             # uint64 0xFFFF FFFF FFFF FFFF # meant to be different from the old-style data
92             # char type[16] # ' '-padded, left-aligned type string such as 'PDL_LL'
93             # uint32 sizeof(type) # little-endian
94             # uint32 one # native-endian. Used to determine the endianness
95             # uint64 ndims # little-endian
96             # uint64 dims[ndims] # little-endian
97             # data
98             #
99             # The header data is all little-endian here. The data is stored with native
100             # endianness. On load it is checked, and a swap happens, if it is required
101              
102             sub pdlpack {
103 13     13 0 46 my ($pdl) = @_;
104              
105 13         50 my $hdr = pack( 'c8A16VL',
106             (-1) x 8,
107             $pdl->type->symbol,
108             PDL::Core::howbig( $pdl->get_datatype ), 1 );
109              
110             # I'd like this to be simply
111             # my $dimhdr = pack( 'Q<*', $pdl->getndims, $pdl->dims )
112             # but my pack() may not support Q, so I break it up manually
113             #
114             # if sizeof(int) == 4 here, then $_>>32 will not return 0 necessarily (this in
115             # undefined). I thus manually make sure this is the case
116             #
117 13 50       30 my $noMSW = (PDL::Core::howbig($PDL::Types::PDL_IND) < 8) ? 1 : 0;
118             my $dimhdr = pack( 'V*',
119 13 50       53 map( { $_ & 0xFFFFFFFF, $noMSW ? 0 : ($_ >> 32) } ($pdl->getndims, $pdl->dims ) ) );
  28         78  
120              
121 13         66 my $dref = $pdl->get_dataref;
122 13         496 return $hdr . $dimhdr . $$dref;
123             }
124              
125             sub pdlunpack {
126 11     11   93 use Config ();
  11         26  
  11         10774  
127 18     18 0 44 my ($pdl,$pack) = @_;
128              
129 18         26 my ($type, $ndims);
130 18         29 my @dims = ();
131              
132 18         23 my $do_swap = 0;
133              
134             # first I try to infer the type of this storable
135 18         19 my $offset = 0;
136 18         64 my @magicheader = unpack( "ll", substr( $pack, $offset ) );
137 18         29 $offset += 8;
138              
139 18 100 66     73 if( $magicheader[0] != -1 ||
140             $magicheader[1] != -1 )
141             {
142 2 50       4 print "PDL::IO::Storable detected an old-style pdl\n" if $PDL::verbose;
143              
144             # old-style data. I leave the data sizes, endianness native, since I don't
145             # know any better. This at least won't break anything.
146             #
147             # The "type" however needs attention. Most-recent old-format data had these
148             # values for the type:
149             #
150             # enum { byte,
151             # short,
152             # unsigned short,
153             # long,
154             # long long,
155             # float,
156             # double }
157             #
158             # The $type I read from the file is assumed to be in this enum even though
159             # PDL may have added other types in the middle of this enum.
160 2         5 my @reftypes = ($PDL::Types::PDL_B,
161             $PDL::Types::PDL_S,
162             $PDL::Types::PDL_U,
163             $PDL::Types::PDL_L,
164             $PDL::Types::PDL_LL,
165             $PDL::Types::PDL_F,
166             $PDL::Types::PDL_D);
167              
168 2         19 my $stride = $Config::Config{intsize};
169 2         6 ($type,$ndims) = unpack 'i2', $pack;
170 2 50       8 @dims = $ndims > 0 ? unpack 'i*', substr $pack, 2*$stride,
171             $ndims*$stride : ();
172              
173 2         2 $offset = (2+$ndims)*$stride;
174              
175 2 50 33     8 if( $type < 0 || $type >= @reftypes )
176             {
177 0         0 croak "Reading in old-style pdl with unknown type: $type. Giving up.";
178             }
179 2         5 $type = $reftypes[$type];
180             }
181             else
182             {
183 16 50       29 print "PDL::IO::Storable detected a new-style pdl\n" if $PDL::verbose;
184              
185             # new-style data. I KNOW the data sizes, endianness and the type enum
186 16         43 my ($typestring) = unpack( 'A16', substr( $pack, $offset ) );
187 16         23 $offset += 16;
188              
189 16         41 $typestring =~ s#\s+$##;
190 16         28 $type = eval { PDL::Type->new($typestring)->enum };
  16         52  
191 16 50       32 if( $@ )
192             {
193 0         0 croak "PDL::IO::Storable couldn't parse type string '$typestring'. Giving up";
194             }
195              
196 16         31 my ($sizeof) = unpack( 'V', substr( $pack, $offset ) );
197 16         21 $offset += 4;
198 16 50       34 if( $sizeof != PDL::Core::howbig( $type ) )
199             {
200 0         0 croak
201             "PDL::IO::Storable sees mismatched data type sizes when reading data of type '$typestring'\n" .
202             "Stored data has sizeof = $sizeof, while here it is " . PDL::Core::howbig( $type ) . ".\n" .
203             "Giving up";
204             }
205              
206             # check the endianness, if the "1" I read is interpreted as "1" on my
207             # machine then the endiannesses match, and I can just read the data
208 16         27 my ($one) = unpack( 'L', substr( $pack, $offset ) );
209 16         20 $offset += 4;
210              
211 16 100       28 if( $one == 1 )
212             {
213 15 50       24 print "PDL::IO::Storable detected matching endianness\n" if $PDL::verbose;
214             }
215             else
216             {
217 1 50       5 print "PDL::IO::Storable detected non-matching endianness. Correcting data on load\n" if $PDL::verbose;
218              
219             # mismatched endianness. Let's make sure it's a big/little issue, not
220             # something weird. If mismatched, the '00000001' should be seen as
221             # '01000000'
222 1 50       3 if( $one != 0x01000000 )
223             {
224 0         0 croak
225             "PDL::IO::Storable sees confused endianness. A '1' was read as '$one'.\n" .
226             "This is neither matching nor swapped endianness. I don't know what's going on,\n" .
227             "so I'm giving up."
228             }
229              
230             # all righty. Everything's fine, but I need to swap all the data
231 1         3 $do_swap = 1;
232             }
233              
234             # mostly this acts like unpack('Q<'...), but works even if my unpack()
235             # doesn't support 'Q'. This also makes sure that my PDL_Indx is large enough
236             # to read this ndarray
237             sub unpack64bit
238             {
239 31     31 0 48 my ($count, $pack, $offset) = @_;
240              
241             return map
242             {
243 31         52 my ($lsw, $msw) = unpack('VV', substr($$pack, $$offset));
  35         61  
244 35         44 $$offset += 8;
245              
246 35 50 33     50 croak( "PDL::IO::Storable tried reading a file with dimensions that don't fit into 32 bits.\n" .
247             "However here PDL_Indx can't store a number so large. Giving up." )
248             if( PDL::Core::howbig($PDL::Types::PDL_IND) < 8 && $msw != 0 );
249              
250 35         87 (($msw << 32) | $lsw)
251             } (1..$count);
252             }
253              
254 16         39 ($ndims) = unpack64bit( 1, \$pack, \$offset );
255 16 100       41 @dims = unpack64bit( $ndims, \$pack, \$offset ) if $ndims > 0;
256             }
257              
258 18 50       34 print "thawing PDL, Dims: [",join(',',@dims),"]\n" if $PDL::verbose;
259              
260 11     11   100 use Scalar::Util qw /blessed/;
  11         24  
  11         8447  
261 18 100       31 if (!blessed $pdl) { # set_datatype needs an object, not a class name
262 5         14 $pdl = $pdl->new;
263             }
264 18         55 $pdl->set_sv_to_null_pdl; # make this a real ndarray -- this is the tricky bit!
265 18         104 $pdl->set_datatype($type);
266 18         71 $pdl->setdims([@dims]);
267 18         61 my $dref = $pdl->get_dataref;
268 18         47 $$dref = substr $pack, $offset;
269 18         38 $pdl->upd_data;
270 18 100 66     55 $pdl->type->bswap->($pdl) if $do_swap && PDL::Core::howbig($type) != 1;
271 18         263 return $pdl;
272             }
273              
274             sub STORABLE_freeze {
275 14     14 0 276 my ($self, $cloning) = @_;
276 14 100       106 return UNIVERSAL::isa($self, "HASH") ? ("",{%$self}) # hash ref -> Storable
277             : pdlpack($self); # pack the ndarray into a long string
278             }
279              
280             sub STORABLE_thaw {
281 19     19 0 1523 my ($pdl,$cloning,$serial,$hashref) = @_;
282 19         28 my $class = ref $pdl;
283 19 100       36 if (defined $hashref) {
284 1 50 50     5 croak "serial data with hashref!" unless ($serial//'') eq "";
285 1         8 @$pdl{keys %$hashref} = values %$hashref;
286             } else {
287             # all the magic is happening in pdlunpack
288 18         40 $pdl->pdlunpack($serial); # unpack our serial into this sv
289             }
290             }
291              
292             # have these as PDL methods
293              
294             =head2 store
295              
296             =for ref
297              
298             store an ndarray using L
299              
300             =for example
301              
302             $x = random 12,10;
303             $x->store('myfile');
304              
305             =cut
306              
307             =head2 freeze
308              
309             =for ref
310              
311             freeze an ndarray using L
312              
313             =for example
314              
315             $x = random 12,10;
316             $frozen = $x->freeze;
317              
318             =cut
319              
320 0     0 0 0 sub store { require Storable; Storable::store(@_) }
  0         0  
321 1     1 0 493 sub freeze { require Storable; Storable::freeze(@_) }
  1         5  
322              
323             sub FREEZE {
324 10     10 0 4717 my ($self, $serialiser, @data) = @_;
325             # non-JSON can use the Storable code
326 10 100       43 return $self->STORABLE_freeze ($serialiser, @data)
327             if $serialiser ne 'JSON';
328             # JSON needs to use a plain text format to avoid encoding issues
329             # so we unpdl it and let JSON do the rest.
330             # Stringify type so it does not need FREEZE/THAW methods
331 5         22 my $type = '' . $self->type;
332 5         290 return ($type, $self->unpdl);
333             }
334              
335             sub THAW {
336 10     10 0 169 my ($class, $serialiser, @data) = @_;
337 10 100       45 return $class->STORABLE_thaw (undef, @data)
338             if $serialiser ne 'JSON';
339             # The type has been stringified when frozen so reinstantiate it.
340 5         22 $data[0] = PDL::Type->new($data[0]);
341 5         56 return PDL->new (@data);
342             }
343              
344             }
345              
346             =head1 AUTHOR
347              
348             Copyright (C) 2013 Dima Kogan
349             Copyright (C) 2002 Christian Soeller
350             All rights reserved. There is no warranty. You are allowed
351             to redistribute this software / documentation under certain
352             conditions. For details, see the file COPYING in the PDL
353             distribution. If this file is separated from the PDL distribution,
354             the copyright notice should be included in the file.
355              
356             =cut
357              
358             1;