File Coverage

blib/lib/Mac/PropertyList/WriteBinary.pm
Criterion Covered Total %
statement 150 162 92.5
branch 43 50 86.0
condition n/a
subroutine 28 30 93.3
pod 0 1 0.0
total 221 243 90.9


line stmt bran cond sub pod time code
1 4     4   190062 use v5.10;
  4         17  
2              
3             package Mac::PropertyList::WriteBinary;
4 4     4   22 use strict;
  4         8  
  4         165  
5 4     4   20 use warnings;
  4         7  
  4         314  
6              
7 4     4   769 use Encode ();
  4         33007  
  4         124  
8 4     4   826 use Mac::PropertyList ();
  4         9  
  4         103  
9 4     4   3511 use Math::BigInt;
  4         118440  
  4         31  
10 4     4   77137 use Exporter qw(import);
  4         9  
  4         516  
11              
12             =encoding utf8
13              
14             =head1 NAME
15              
16             Mac::PropertyList::WriteBinary - pack data into a Mac "binary property list"
17              
18             =head1 SYNOPSIS
19              
20             use Mac::PropertyList::WriteBinary;
21              
22             my $data = Mac::PropertyList::dict->new( { ... => ... } );
23             my $buf = Mac::PropertyList::WriteBinary::as_string($data);
24              
25             =head1 DESCRIPTION
26              
27             The C function converts a property list structure
28             (composed of instances of C,
29             C, etc.) into a binary format compatible
30             with the Apple CoreFoundation binary property list functions.
31              
32             It takes a single argument, the top-level object to write, and returns
33             a byte string.
34              
35             The property list can contain the following perl objects:
36              
37             =over 4
38              
39             =item C value objects
40              
41             These are written according to their class.
42              
43             =item Unblessed references to Perl lists and hashes
44              
45             These are written as arrays and dictionaries, respectively.
46              
47             =item Perl scalars
48              
49             All Perl scalars are written as strings; this is similar to the behavior
50             of writing an oldstyle OpenStep property list, which does not
51             distinguish between numbers and strings, and then reading it using
52             CoreFoundation functions.
53              
54             =item C
55              
56             This is written as the null object. CoreFoundation will read this as
57             C, but appears to be unable to write it.
58              
59             =back
60              
61             Strings are uniqued (two equal strings will be written as references
62             to the same object). If the same reference appears more than once in
63             the structure, it will likewise only be represented once in the
64             output. Although the bplist format can represent circular data
65             structures, they cannot be written by this module (they will be
66             detected and result in an error — they wouldn't be read correctly by
67             CoreFoundation anyway, so you aren't missing much).
68              
69             =head1 BUGS
70              
71             C objects are not handled yet.
72              
73             Objects other than strings (and null) are not uniqued by value,
74             only by reference equality. This may change in a future version.
75              
76             Perl's dictionary keys can only be strings, but a bplist's can be
77             any scalar object.
78              
79             There is no way to write the C objects used by the keyed archiver.
80              
81             Perls that do not use IEEE-754 format internally for floating point
82             numbers will produce incorrect output.
83              
84             =cut
85              
86             use constant {
87             header => 'bplist00',
88              
89             tagInteger => 0x10,
90             tagFloat => 0x20,
91             tagDate => 0x30,
92             tagData => 0x40,
93             tagASCII => 0x50,
94             tagUTF16 => 0x60,
95             tagUID => 0x80,
96             tagArray => 0xA0,
97             tagSet => 0xC0,
98             tagDict => 0xD0,
99              
100             # If we can actually represent an integer close to 2^64 with full
101             # precision and pack it with 'Q', then we can use that
102 4 50       9 havePack64 => ( eval { pack('Q>', 1153202979583557643) eq "\x10\x01\0\0\0\0\0\x0B" } ? 1 : 0 ),
  4         8945  
103 4     4   26 };
  4         10  
104              
105             our $VERSION = '1.603_02';
106             our @EXPORT_OK = qw( as_string );
107              
108             sub as_string {
109 49     49 0 3207 my($value) = @_;
110 49         158 my($ctxt) = _create_fragments($value);
111 47         84 my(@offsets, $xref_offset, $offset_size);
112              
113             # The header (magic number and version, which is 00)
114 47         84 my($buf) = header;
115              
116             # Write each fragment, making note of its offset in the file
117 47         171 foreach my $objid (0 .. $ctxt->{nextid}-1) {
118 96         178 $offsets[$objid] = length $buf;
119 96         298 $buf .= $ctxt->{fragments}->{$objid};
120             }
121              
122             # ... and the offset of the beginning of the offsets table
123 47         70 $xref_offset = length $buf;
124              
125             # Figure out how many bytes to use to represent file offsets,
126             # and append the offset table
127 47 100       87 if ($xref_offset < 256) {
    50          
128 46         116 $buf .= pack('C*', @offsets);
129 46         65 $offset_size = 1;
130             } elsif ($xref_offset < 65536) {
131 1         5 $buf .= pack('n*', @offsets);
132 1         4 $offset_size = 2;
133             } else {
134 0         0 $buf .= pack('N*', @offsets);
135 0         0 $offset_size = 4;
136             }
137              
138             # Write the file trailer
139             $buf .= pack('x5 CCC ' . ( havePack64? 'Q>' : 'x4N' ) x 3,
140             0, $offset_size, $ctxt->{objref_size},
141 47         257 $ctxt->{nextid}, $ctxt->{rootid}, $xref_offset);
142              
143 47         314 $buf;
144             }
145              
146             # sub to_file {
147             # To consider:
148             # It might be useful to have a version of &as_string which writes
149             # the fragments directly to a file handle without having to build a
150             # single large buffer in RAM. This would be more efficient for
151             # larger structures. On the other hand, if you're writing large
152             # structures with this module, you're already suffering needlessly,
153             # so perhaps it's not worth optimizing overmuch for that case.
154             # }
155              
156              
157             # _assign_id is the workhorse function which recursively
158             # descends the data structure and assigns object ids to each node,
159             # as well as creating fragments of the final file.
160             sub _assign_id {
161 120     120   230 my($context, $value) = @_;
162              
163             # The type of this value
164 120         238 my($tp) = ref $value;
165              
166             # Unblessed scalars are either strings or undef.
167 120 100       292 if ($tp eq '') {
168 31 100       72 if (!defined $value) {
169             $context->{nullid} = $context->{nextid} ++
170 2 50       8 unless defined $context->{nullid};
171 2         6 return $context->{nullid};
172             } else {
173             $context->{strings}->{$value} = $context->{nextid} ++
174 29 100       100 unless exists $context->{strings}->{$value};
175 29         99 return $context->{strings}->{$value};
176             }
177             }
178              
179             # If we reach here we know that $value is a ref. Keep a table of
180             # stringified refs, so that we can re-use the id of an object
181             # we've seen before.
182 89 100       384 if(exists $context->{refs}{$value}) {
183 4         12 my($thisid) = $context->{refs}->{$value};
184 4 100       25 die "Recursive data structure\n" unless defined $thisid;
185 3         32 return $thisid;
186             }
187 85         242 $context->{refs}->{$value} = undef;
188              
189             # Serialize the object into $fragment if possible. Since we
190             # don't yet know how many bytes we will use to represent object
191             # ids in the final file, don't serialize those yet–keep them
192             # as a list of integers for now.
193 85         141 my($fragment, @objrefs);
194              
195 85 100       1110 if($tp eq 'ARRAY') {
    100          
    100          
196 4         11 $fragment = _counted_header(tagArray, scalar @$value);
197 4         10 @objrefs = map { $context->_assign_id($_) } @$value;
  9         23  
198             } elsif($tp eq 'HASH') {
199 3         17 my(@ks) = sort (CORE::keys %$value);
200 3         11 $fragment = _counted_header(tagDict, scalar @ks);
201 6         13 @objrefs = ( ( map { $context->_assign_id($_) } @ks ),
202 3         8 ( map { $context->_assign_id($value->{$_}) } @ks ) );
  6         13  
203             } elsif(UNIVERSAL::can($tp, '_as_bplist_fragment')) {
204 77         194 ($fragment, @objrefs) = $value->_as_bplist_fragment($context);
205             } else {
206 1         18 die "Cannot serialize type '$tp'\n";
207             }
208              
209             # As a special case, a fragment of 'undef' indicates that
210             # the object ID was already assigned.
211 81 100       206 return $objrefs[0] if !defined $fragment;
212              
213             # Assign the next object ID to this object.
214 75         174 my($thisid) = $context->{nextid} ++;
215 75         186 $context->{refs}->{$value} = $thisid;
216              
217             # Store the fragment and unpacked object references (if any).
218 75         232 $context->{fragments}->{$thisid} = $fragment;
219 75 100       230 $context->{objrefs}->{$thisid} = \@objrefs if @objrefs;
220              
221 75         275 return $thisid;
222             }
223              
224             sub _create_fragments {
225 49     49   72 my ($value) = @_;
226              
227             # Set up the state needed by _assign_id
228              
229 49         319 my ($ctxt) = bless({
230             nextid => 0, # The next unallocated object ID
231             nullid => undef, # The object id of 'null'
232             strings => { }, # Maps string values to object IDs
233             refs => { }, # Maps stringified refs to object IDs
234             fragments => { }, # Maps object IDs to bplist fragments, except object lists
235             objrefs => { }, # Maps object IDs to objref lists
236             });
237              
238             # Traverse the data structure, and remember the id of the root object
239 49         157 $ctxt->{rootid} = $ctxt->_assign_id($value);
240              
241             # Figure out how many bytes to use to represent an object id.
242 47         76 my ($objref_pack);
243 47 50       99 if ($ctxt->{nextid} < 256) {
    0          
244 47         64 $objref_pack = 'C*';
245 47         77 $ctxt->{objref_size} = 1;
246             } elsif ($ctxt->{nextid} < 65536) {
247 0         0 $objref_pack = 'n*';
248 0         0 $ctxt->{objref_size} = 2;
249             } else {
250 0         0 $objref_pack = 'N*';
251 0         0 $ctxt->{objref_size} = 4;
252             }
253              
254 47         70 my($objid, $reflist, $stringval);
255              
256             # Append the unformatted object ids to their corresponding fragments,
257             # now that we know how to pack them.
258 47         71 while (($objid, $reflist) = each %{$ctxt->{objrefs}}) {
  58         206  
259 11         42 $ctxt->{fragments}->{$objid} .= pack($objref_pack, @$reflist);
260             }
261 47         115 delete $ctxt->{objrefs};
262              
263             # Create fragments for all the strings.
264             # TODO: If &to_file is written, it would be worth
265             # breaking this out so that the conversion can be done on the
266             # fly without keeping all of the converted strings in memory.
267             {
268 47         70 my($ascii) = Encode::find_encoding('ascii');
  47         181  
269 47         750 my($utf16be) = Encode::find_encoding('UTF-16BE');
270              
271 47         13470 while (($stringval, $objid) = each %{$ctxt->{strings}}) {
  66         233  
272 19         30 my($fragment);
273              
274             # Strings may be stored as ASCII (7 bits) or UTF-16-bigendian.
275 19 100       91 if ($stringval =~ /\A[\x01-\x7E]*\z/s) {
276             # The string is representable in ASCII.
277 16         73 $fragment = $ascii->encode($stringval);
278 16         41 $fragment = _counted_header(tagASCII, length $fragment) . $fragment;
279             } else {
280 3         38 $fragment = $utf16be->encode($stringval);
281 3         13 $fragment = _counted_header(tagUTF16, (length $fragment)/2) . $fragment;
282             }
283              
284 19         496 $ctxt->{fragments}->{$objid} = $fragment;
285             }
286             }
287              
288             # If there's a in the file, create its fragment.
289             $ctxt->{fragments}->{$ctxt->{nullid}} = "\x00"
290 47 100       111 if defined $ctxt->{nullid};
291              
292 47         99 $ctxt;
293             }
294              
295             sub _counted_header {
296 38     38   80 my ($typebyte, $count) = @_;
297              
298             # Datas, strings, and container objects have a count/size encoded
299             # in the lower 4 bits of their type byte. If the count doesn't fit
300             # in 4 bits, the bits are set to all-1s and the actual value
301             # follows, encoded as an integer (including the integer's
302             # own type byte).
303              
304 38 100       84 if ($count < 15) {
305 32         144 return pack('C', $typebyte + $count);
306             } else {
307 6         27 return pack('C', $typebyte + 15) . &_pos_integer($count);
308             }
309             }
310              
311             sub _neg_integer {
312 14     14   28 my($count) = @_;
313 14         24 if (havePack64) {
314 14         63 return pack('Cq>', tagInteger + 3, $count);
315             } else {
316             return _pack_int_64(Math::BigInt->new($count)->badd(
317             Math::BigInt->new('0x10000000000000000')));
318             }
319             }
320              
321             sub _pos_integer {
322 35     35   67 my($count) = @_;
323              
324 35 100       101 if ($count < 256) {
    100          
    100          
325 18         112 return pack('CC', tagInteger + 0, $count);
326             } elsif ($count <= 65535) { # 0xFFFF
327 5         30 return pack('CS>', tagInteger + 1, $count);
328             } elsif ($count <= 4294967295) { # 0xFFFFFFFF
329 4         39 return pack('CN', tagInteger + 2, $count);
330             } elsif (havePack64) {
331 8         45 return pack('Cq>', tagInteger + 3, $count);
332             } else {
333             return _pack_int_64($count);
334             }
335             }
336              
337             sub _pack_int_64 {
338 0     0   0 my($val) = @_;
339 0         0 my $low = Math::BigInt->new($val)->bmod(Math::BigInt->new('0x100000000'));
340 0         0 my $high = Math::BigInt->new($val)->brsft(32);
341 0         0 return pack( 'CNN', tagInteger + 3, $high->numify, $low->numify);
342             }
343              
344             package Mac::PropertyList::array;
345              
346             sub _as_bplist_fragment {
347 3     3   18 my($context, @items) = ( $_[1], $_[0]->value );
348 3         10 @items = map { $context->_assign_id($_) } @items;
  18         49  
349              
350 3         11 return ( Mac::PropertyList::WriteBinary::_counted_header(Mac::PropertyList::WriteBinary::tagArray, scalar @items),
351             @items );
352             }
353              
354             package Mac::PropertyList::dict;
355              
356             sub _as_bplist_fragment {
357 6     6   17 my($self, $context) = @_;
358 6         34 my($value) = scalar $self->value; # Returns a ref in scalar context
359 6         36 my(@keys) = sort (CORE::keys %$value);
360              
361             return ( Mac::PropertyList::WriteBinary::_counted_header(Mac::PropertyList::WriteBinary::tagDict, scalar @keys),
362 13         58 ( map { $context->_assign_id($_) } @keys ),
363 6         48 ( map { $context->_assign_id($value->{$_}) } @keys ));
  13         40  
364              
365             }
366              
367             package Mac::PropertyList::date;
368              
369 4     4   35 use Scalar::Util ( 'looks_like_number' );
  4         9  
  4         385  
370 4     4   2392 use Time::Local ( 'timegm' );
  4         9808  
  4         1603  
371              
372             sub _as_bplist_fragment {
373 4     4   12 my($value) = scalar $_[0]->value;
374 4         6 my($posixval);
375              
376 4 100       25 if (looks_like_number($value)) {
    50          
377 1         3 $posixval = $value;
378             } elsif ($value =~ /\A(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d)\:(\d\d)\:(\d\d(?:\.\d+)?)Z\z/) {
379 3         18 $posixval = timegm($6, $5, $4, $3, $2 - 1, $1);
380             } else {
381 0         0 die "Invalid plist date '$value'\n";
382             }
383              
384             # Dates are simply stored as floating-point numbers (seconds since the
385             # start of the CoreFoundation epoch) with a different tag value.
386             # See the notes in Mac::PropertyList::real on float format.
387 4         121 return pack('Cd>', Mac::PropertyList::WriteBinary::tagDate + 3,
388             $posixval - 978307200);
389             }
390              
391             package Mac::PropertyList::real;
392              
393             # Here we're assuming that the 'd' format for pack produces
394             # an IEEE-754 double-precision (64-bit) floating point
395             # representation, because ... it does on practically every
396             # system. However, this will not be portable to systems which
397             # don't natively use IEEE-754 format!
398              
399             sub _as_bplist_fragment {
400 5     5   10 my($self) = shift;
401              
402 5         40 return pack('Cd>', Mac::PropertyList::WriteBinary::tagFloat + 3, $self->value);
403             }
404              
405             package Mac::PropertyList::integer;
406              
407 4     4   75 use constant tagInteger => 0x10;
  4         11  
  4         712  
408              
409             sub _as_bplist_fragment {
410 43     43   156 my($value) = $_[0]->value;
411              
412             # Per comments in CFBinaryPList.c, only 8-byte integers (and
413             # 16-byte integers, if they're supported, which they're not) are
414             # interpreted as signed. Shorter integers are always unsigned.
415             # Therefore all negative numbers must be written as 8-byte
416             # integers.
417              
418 43 100       121 my $method = $value < 0 ? '_neg_integer' : '_pos_integer';
419 43         215 my $sub = Mac::PropertyList::WriteBinary->can($method);
420 43         99 $sub->($value);
421             }
422              
423             package Mac::PropertyList::uid;
424              
425 4     4   31 use constant tagUID => Mac::PropertyList::WriteBinary->tagUID;
  4         8  
  4         2016  
426              
427             sub _as_bplist_fragment {
428 4     4   12 my( $value ) = $_[0]->value;
429              
430             # TODO what about UIDs longer than 16 bytes? Or are there none?
431 4         20 return pack 'CH*', tagUID + length( $value ) / 2 - 1, $value;
432             }
433              
434             package Mac::PropertyList::string;
435              
436             sub _as_bplist_fragment {
437             # Returning a fragment of 'undef' indicates we've already assigned
438             # an object ID.
439 6     6   22 return ( undef, $_[1]->_assign_id($_[0]->value) );
440             }
441              
442             package Mac::PropertyList::ustring;
443              
444             sub _as_bplist_fragment {
445             # Returning a fragment of 'undef' indicates we've already assigned
446             # an object ID.
447 0     0   0 return ( undef, $_[1]->_assign_id($_[0]->value) );
448             }
449              
450             package Mac::PropertyList::data;
451              
452             sub _as_bplist_fragment {
453 3     3   11 my($value) = $_[0]->value;
454 3         8 return (&Mac::PropertyList::WriteBinary::_counted_header(Mac::PropertyList::WriteBinary::tagData, length $value) .
455             $value);
456             }
457              
458             package Mac::PropertyList::true;
459              
460 2     2   8 sub _as_bplist_fragment { return "\x09"; }
461              
462             package Mac::PropertyList::false;
463              
464 1     1   3 sub _as_bplist_fragment { return "\x08"; }
465              
466             =head1 AUTHOR
467              
468             Wim Lewis, C<< >>
469              
470             Copyright © 2012-2024 Wim Lewis. All rights reserved.
471              
472             Tom Wyant added support for UID types.
473              
474             This program is free software; you can redistribute it and/or modify
475             it under the same terms as Perl itself.
476              
477             =head1 SEE ALSO
478              
479             L for the inverse operation.
480              
481             Apple's partial published CoreFoundation source code:
482             L
483              
484             =cut
485              
486             "One more thing";