File Coverage

blib/lib/DataStore/CAS.pm
Criterion Covered Total %
statement 135 196 68.8
branch 49 114 42.9
condition 31 83 37.3
subroutine 37 64 57.8
pod 27 27 100.0
total 279 484 57.6


line stmt bran cond sub pod time code
1             package DataStore::CAS;
2 5     5   1085657 use 5.008;
  5         24  
3 5     5   42 use Carp;
  5         10  
  5         535  
4 5     5   836 use Try::Tiny;
  5         3250  
  5         504  
5             require Scalar::Util;
6             require Symbol;
7 5     5   2507 use Moo::Role;
  5         61812  
  5         50  
8              
9             our $VERSION= '0.08';
10             our @CARP_NOT= qw( DataStore::CAS::File DataStore::CAS::VirtualHandle );
11              
12             # ABSTRACT: Abstract base class for Content Addressable Storage
13              
14              
15             requires 'digest';
16              
17             has hash_of_null => ( is => 'lazy' );
18              
19             sub _build_hash_of_null {
20 6     6   158 return shift->calculate_hash('');
21             }
22              
23              
24             requires 'get';
25              
26              
27             sub _thing_stringifies_to_filename {
28 23     23   129 my $ref= ref $_[0];
29 23 100 33     732 !$ref? defined $_[0] && length $_[0]
      66        
30             : $ref->isa('Path::Class::File')
31             || $ref->isa('Path::Tiny')
32             || $ref->isa('File::Temp')
33             || -e "$_[0]"
34             }
35             sub _describe_unputtable {
36 0 0   0   0 !defined $_[0]? 'undef'
    0          
    0          
37             : !ref $_[0]? '"'.$_[0].'"'
38             : !Scalar::Util::blessed($_[0])? ref($_[0]).' ref'
39             : 'object of '.ref($_[0]).' (stringifies to "'.$_[0].'")'
40             }
41              
42             sub put {
43 24     24 1 33943 my $ref= ref $_[1];
44 24 100 66     324 goto $_[0]->can('put_scalar')
45             if !$ref || $ref eq 'SCALAR';
46 6 100 100     127 goto $_[0]->can('put_file')
47             if $ref->isa('DataStore::CAS::File')
48             or _thing_stringifies_to_filename($_[1]);
49 1 50 33     31 goto $_[0]->can('put_handle')
50             if $ref->isa('IO::Handle')
51             or Scalar::Util::reftype($_[1]) eq 'GLOB';
52 0         0 croak("Unhandled argument to ->put : "._describe_unputtable($_[1]));
53             }
54              
55              
56             sub put_scalar {
57 17     17 1 51 my ($self, undef, $flags)= @_;
58 17 50       80 my $ref= ref $_[1] eq 'SCALAR'? $_[1] : \$_[1];
59              
60             # Force to plain string if it is an object
61 17 50       57 if (ref $$ref) {
62             # TODO: croak unless object has stringify magic
63 0         0 $ref= \"$$ref";
64             }
65              
66             # Can only 'put' octets, not wide-character unicode strings.
67 17 50       76 utf8::downgrade($$ref, 1)
68             or croak "scalar must be byte string (octets). If storing unicode,"
69             ." you must reduce to a byte encoding first.";
70              
71             my $hash= $flags && $flags->{known_hashes} && $flags->{known_hashes}{$self->digest}
72 17 50 33     175 ? $flags->{known_hashes}{$self->digest}
73             : $self->calculate_hash($ref);
74 17 100       94 if ($self->get($hash)) {
75             # Already have it
76             $flags->{stats}{dup_file_count}++
77 1 50       5 if $flags->{stats};
78 1         6 return $hash;
79             } else {
80 16 100       90 $flags= { ($flags? %$flags : ()), known_hashes => { $self->digest => $hash } };
81 16         96 my $handle= $self->new_write_handle($flags);
82 16         71 $handle->_write_all($$ref);
83 16         84 return $self->commit_write_handle($handle);
84             }
85             }
86              
87              
88             sub put_file {
89 8     8 1 26 my ($self, $file, $flags)= @_;
90 8         16 my $ref= ref $file;
91 8   100     47 my $is_cas_file= $ref && $ref->isa('DataStore::CAS::File');
92 8         16 my $is_filename= _thing_stringifies_to_filename($file);
93 8 50 66     51 croak "Unhandled argument to ->put_file : "._describe_unputtable($file)
94             unless $is_cas_file || $is_filename;
95              
96 8 50       25 my %known_hashes= $flags->{known_hashes}? %{$flags->{known_hashes}} : ();
  0         0  
97             # Apply reuse_hash feature, if requested
98 8 50 66     26 if ($is_cas_file && $flags->{reuse_hash}) {
99 0         0 $known_hashes{$file->store->digest}= $file->hash;
100 0         0 $flags= { %$flags, known_hashes => \%known_hashes };
101             }
102             # It is probably better to read a file twice than to write one that
103             # doesn't need to be written.
104             # ...but can't do better than ->put_handle unless the file is a real file.
105 8 50 33     82 my $fname= $is_filename? "$file"
    100          
106             : $is_cas_file && $file->can('local_file')? $file->local_file
107             : undef;
108 8 50 33     110 if ($known_hashes{$self->digest} || (defined $fname && -f $fname)) {
      33        
109             # Calculate the hash if it wasn't given.
110 8   33     35 my $hash= ($known_hashes{$self->digest} ||= $self->calculate_file_hash($fname));
111             # Avoid unnecessary work
112 8 100       958 if ($self->get($hash)) {
113             $flags->{stats}{dup_file_count}++
114 7 50       24 if $flags->{stats};
115             $self->_unlink_source_file($file, $flags)
116 7 50 33     47 if $flags->{move} && defined $fname;
117 7         36 return $hash;
118             }
119             # Save hash for next step
120 1         7 $flags= { %$flags, known_hashes => \%known_hashes };
121             }
122 1         3 my $fh;
123 1 50 0     3 if ($is_cas_file) {
    0          
    0          
124 1 50       4 $fh= $file->open or croak "Can't open '$file': $!";
125             }
126             elsif ($ref && $ref->can('openr')) {
127 0 0       0 $fh= $file->openr or croak "Can't open '$file': $!";
128             }
129             elsif ($is_filename) {
130 0 0       0 open($fh, '<', $fname) or croak "Can't open '$fname': $!";
131             }
132             else {
133 0         0 croak "Don't know how to open '$file'";
134             }
135 1         6 my $hash= $self->put_handle($fh, $flags);
136             $self->_unlink_source_file($file, $flags)
137 1 50 33     4 if $hash && $flags->{move};
138 0         0 return $hash;
139             }
140              
141             sub _unlink_source_file {
142 1     1   107 my ($self, $file, $flags)= @_;
143 1 50       4 return if $flags->{dry_run};
144 1   33     19 my $is_cas_file= ref $file && ref($file)->isa('DataStore::CAS::File');
145 1 50       3 if ($is_cas_file) {
146 1         342 croak "Refusing to delete origin CAS File (this can damage a CAS)\n"
147             ."If you really want to do this, pass \$file->local_name and then"
148             ." delete the cas entry yourself.";
149             } else {
150 0 0 0     0 if (ref $file && ref($file)->isa('File::Temp')) {
151             # The Simple backend closes File::Temp files to ensure they don't
152             # get written to any more. so match that behavior here.
153 0         0 $file->close;
154             }
155 0 0       0 unlink "$file" or croak "unlink($file): $!"
156             }
157             }
158              
159              
160             sub put_handle {
161 2     2 1 7 my ($self, $h_in, $flags)= @_;
162 2         7 binmode $h_in;
163 2         13 my $h_out= $self->new_write_handle($flags);
164 2   50     14 my $buf_size= $flags->{buffer_size} || 1024*1024;
165 2         5 my $buf;
166 2         5 while(1) {
167 4         89 my $got= read($h_in, $buf, $buf_size);
168 4 100       56 if ($got) {
    50          
169 2 50       13 $h_out->_write_all($buf) or croak "write: $!";
170             } elsif (!defined $got) {
171 0 0 0     0 next if ($!{EINTR} || $!{EAGAIN});
172 0         0 croak "read: $!";
173             } else {
174 2         6 last;
175             }
176             }
177 2         12 return $self->commit_write_handle($h_out);
178             }
179              
180              
181             # This implementation probably needs overridden by subclasses.
182             sub new_write_handle {
183 0     0 1 0 my ($self, $flags)= @_;
184 0         0 return DataStore::CAS::FileCreatorHandle->new($self, { flags => $flags });
185             }
186              
187             # This must be implemented by subclasses
188             requires 'commit_write_handle';
189              
190              
191             sub calculate_hash {
192 23     23 1 43 my $self= shift;
193 23 100       136 Digest->new($self->digest)->add(ref $_[0]? ${$_[0]} : $_[0])->hexdigest;
  17         983  
194             }
195              
196             sub calculate_file_hash {
197 11     11 1 30 my ($self, $file)= @_;
198 11 50       503 open my $fh, '<', $file or croak "open($file): $!";
199 11         33 binmode $fh;
200 11         44 Digest->new($self->digest)->addfile($fh)->hexdigest;
201             }
202              
203              
204             sub validate {
205 0     0 1 0 my ($self, $hash, $flags)= @_;
206              
207 0         0 my $file= $self->get($hash);
208 0 0       0 return undef unless defined $file;
209              
210             # Exceptions during 'put' will most likely come from reading $file,
211             # which means that validation fails, and we return false.
212 0         0 my $new_hash;
213             try {
214             # We don't pass flags directly through to get/put, because flags for validate
215             # are not the same as flags for get or put. But, 'stats' is a standard thing.
216 0     0   0 my %args= ( dry_run => 1 );
217 0 0       0 $args{stats}= $flags->{stats} if $flags->{stats};
218 0         0 $new_hash= $self->put_handle($file, \%args);
219             }
220       0     catch {
221 0         0 };
222 0 0 0     0 return (defined $new_hash and $new_hash eq $hash)? 1 : 0;
223             }
224              
225              
226             requires 'delete';
227              
228              
229             requires 'iterator';
230              
231              
232             requires 'open_file';
233              
234             # File and Handle objects have DESTROY methods that call these methods of
235             # their associated CAS. The CAS should implement these for cleanup of
236             # temporary files, or etc.
237       19     sub _file_destroy {}
238       18     sub _handle_destroy {}
239              
240             package DataStore::CAS::File;
241 5     5   14619 use strict;
  5         16  
  5         198  
242 5     5   44 use warnings;
  5         11  
  5         2628  
243              
244             our $VERSION= '0.08';
245              
246 30     30 1 904 sub cas { $_[0]{store} }
247             *store= *cas; # back-compat
248 5     5 1 42 sub hash { $_[0]{hash} }
249 3     3 1 27 sub size { $_[0]{size} }
250              
251             sub open {
252 4     4 1 11 my $self= shift;
253 4 100       27 return $self->cas->open_file($self)
254             if @_ == 0;
255 1 50       4 return $self->cas->open_file($self, { @_ })
256             if @_ > 1;
257 1 50 33     10 return $self->cas->open_file($self, { layer => $_[0] })
258             if @_ == 1 and !ref $_[0];
259 0         0 Carp::croak "Wrong arguments to 'open'";
260             };
261              
262             sub DESTROY {
263 20     20   13171 my $cas= $_[0]->cas;
264 20 50       105 $cas->_file_destroy(@_) if defined $cas; # simple guard against global destruction
265             }
266              
267             our $AUTOLOAD;
268             sub AUTOLOAD {
269 4     4   2094 my $attr= substr($AUTOLOAD, rindex($AUTOLOAD, ':')+1);
270 4 100       25 return $_[0]{$attr} if exists $_[0]{$attr};
271 2         7 unshift @_, $_[0]->cas;
272             goto (
273 2   66     249 $_[0]->can("_file_$attr")
274             or Carp::croak "Can't locate object method \"_file_$attr\" via package \"".ref($_[0]).'"'
275             );
276             }
277              
278             package DataStore::CAS::VirtualHandle;
279 5     5   43 use strict;
  5         12  
  5         210  
280 5     5   33 use warnings;
  5         22  
  5         5932  
281              
282             our $VERSION= '0.08';
283              
284             sub new {
285 19     19 1 976 my ($class, $cas, $fields)= @_;
286 19         135 my $glob= bless Symbol::gensym(), $class;
287 19         399 ${*$glob}= $cas;
  19         63  
288 19 50       113 %{*$glob}= %$fields if defined $fields;
  19         84  
289 19         152 tie *$glob, $glob;
290 19         96 $glob;
291             }
292 19     19   68 sub TIEHANDLE { return $_[0]; }
293              
294 38     38 1 654 sub cas { ${*${$_[0]}} } # the scalar view of the symbol points to the CAS object
  38         60  
  38         234  
295             *_cas= *cas; # back-compat
296 39     39   98 sub _data { \%{*${$_[0]}} } # the hashref view of the symbol holds the fields of the handle
  39         56  
  39         182  
297              
298             sub DESTROY {
299 19     19   1070 my $cas= $_[0]->cas;
300 19 50       145 $cas->_handle_destroy(@_) if defined $cas; # simple guard against global destruction
301             }
302              
303             # By default, any method not defined will call to $cas->_handle_$method( $handle, @args );
304             our $AUTOLOAD;
305             sub AUTOLOAD {
306 4     4   3258 unshift @_, ${*${$_[0]}}; # unshift @_, $self->_cas
  4         7  
  4         26  
307 4         19 my $attr= substr($AUTOLOAD, rindex($AUTOLOAD, ':')+1);
308             goto (
309 4   66     247 $_[0]->can("_handle_$attr")
310             or Carp::croak "Can't locate object method \"_handle_$attr\" via package \"".ref($_[0]).'"'
311             );
312             }
313              
314             #
315             # Tied filehandle API
316             #
317              
318 1     1   2100 sub READ { (shift)->read(@_) }
319 0 0   0   0 sub READLINE { wantarray? (shift)->getlines : (shift)->getline }
320 0     0   0 sub GETC { $_[0]->getc }
321 0     0   0 sub EOF { $_[0]->eof }
322              
323 0     0   0 sub WRITE { (shift)->write(@_) }
324 0     0   0 sub PRINT { (shift)->print(@_) }
325 0     0   0 sub PRINTF { (shift)->printf(@_) }
326              
327 0     0   0 sub SEEK { (shift)->seek(@_) }
328 0     0   0 sub TELL { (shift)->tell(@_) }
329              
330 0     0   0 sub FILENO { $_[0]->fileno }
331 0     0   0 sub CLOSE { $_[0]->close }
332              
333             #
334             # The following are some default implementations to make subclassing less cumbersome.
335             #
336              
337             sub getlines {
338 0     0 1 0 my $self= shift;
339 0 0 0     0 wantarray or !defined wantarray or Carp::croak "getlines called in scalar context";
340 0         0 my (@ret, $line);
341 0         0 push @ret, $line
342             while defined ($line= $self->getline);
343 0         0 @ret;
344             }
345              
346             # I'm not sure why anyone would ever want this function, but I'm adding
347             # it for completeness.
348             sub getc {
349 0     0 1 0 my $c;
350 0 0       0 $_[0]->read($c, 1)? $c : undef;
351             }
352              
353             # 'write' does not guarantee that all bytes get written in one shot.
354             # Needs to be called in a loop to accomplish "print" semantics.
355             sub _write_all {
356 18     18   56 my ($self, $str)= @_;
357 18         46 while (1) {
358 18         85 my $wrote= $self->write($str);
359 18 50 33     164 return 1 if defined $wrote and ($wrote eq length $str);
360 0 0 0     0 return undef unless defined $wrote or $!{EINTR} or $!{EAGAIN};
      0        
361 0         0 substr($str, 0, $wrote)= '';
362             }
363             }
364              
365             # easy to forget that 'print' API involves "$," and "$\"
366             sub print {
367 0     0 1 0 my $self= shift;
368 0 0       0 my $str= join( (defined $, ? $, : ""), @_ );
369 0 0       0 $str .= $\ if defined $\;
370 0         0 $self->_write_all($str);
371             }
372              
373             # as if anyone would want to write their own printf implementation...
374             sub printf {
375 0     0 1 0 my $self= shift;
376 0         0 my $str= sprintf($_[0], $_[1..$#_]);
377 0         0 $self->_write_all($str);
378             }
379              
380             # virtual handles are unlikely to have one, and if they did, they wouldn't
381             # be using this class
382 0     0 1 0 sub fileno { undef; }
383              
384             package DataStore::CAS::FileCreatorHandle;
385 5     5   48 use strict;
  5         15  
  5         270  
386 5     5   29 use warnings;
  5         11  
  5         334  
387 5     5   33 use parent -norequire => 'DataStore::CAS::VirtualHandle';
  5         13  
  5         65  
388              
389             our $VERSION= '0.08';
390              
391             # For write-handles, commit data to the CAS and return the digest hash for it.
392 0     0 1 0 sub commit { $_[0]->_cas->commit_write_handle(@_) }
393              
394             # These would happen anyway via the AUTOLOAD, but we enumerate them so that
395             # they officially appear as methods of this class.
396 0     0 1 0 sub close { $_[0]->_cas->_handle_close(@_) }
397 0     0 1 0 sub seek { $_[0]->_cas->_handle_seek(@_) }
398 0     0 1 0 sub tell { $_[0]->_cas->_handle_tell(@_) }
399 18     18 1 64 sub write { $_[0]->_cas->_handle_write(@_) }
400              
401             # This is a write-only handle
402 0     0 1   sub eof { return 1; }
403 0     0 1   sub read { return 0; }
404 0     0 1   sub readline { return undef; }
405              
406              
407             1;
408              
409             __END__