File Coverage

blib/lib/DataStore/CAS.pm
Criterion Covered Total %
statement 137 197 69.5
branch 46 104 44.2
condition 36 91 39.5
subroutine 37 63 58.7
pod 26 26 100.0
total 282 481 58.6


line stmt bran cond sub pod time code
1             package DataStore::CAS;
2 5     5   11856 use 5.008;
  5         16  
3 5     5   2018 use Moo::Role;
  5         59173  
  5         28  
4 5     5   1810 use Carp;
  5         14  
  5         277  
5 5     5   544 use Try::Tiny;
  5         1408  
  5         8115  
6             require Scalar::Util;
7             require Symbol;
8              
9             our $VERSION= '0.04';
10             our @CARP_NOT= qw( DataStore::CAS::File DataStore::CAS::VirtualHandle );
11              
12             # ABSTRACT: Abstract base class for Content Addressable Storage
13              
14              
15             requires 'digest';
16              
17             has hash_of_null => ( is => 'lazy' );
18              
19             sub _build_hash_of_null {
20 6     6   105 return shift->calculate_hash('');
21             }
22              
23              
24             requires 'get';
25              
26              
27             sub _thing_stringifies_to_filename {
28 18     18   33 my $ref= ref $_[0];
29 18 50 66     239 (!$ref && defined $_[0])
      66        
      66        
30             || $ref->isa('Path::Class::File')
31             || $ref->isa('Path::Tiny')
32             || $ref->isa('File::Temp')
33             }
34              
35             sub put {
36 23     23 1 7656 my $ref= ref $_[1];
37 23 100 66     156 goto $_[0]->can('put_scalar')
38             if !$ref || $ref eq 'SCALAR';
39 5 100 100     61 goto $_[0]->can('put_file')
40             if $ref->isa('DataStore::CAS::File')
41             or _thing_stringifies_to_filename($_[1]);
42 1 50 33     18 goto $_[0]->can('put_handle')
43             if $ref->isa('IO::Handle')
44             or Scalar::Util::reftype($_[1]) eq 'GLOB';
45 0         0 croak("Can't 'put' object of type $ref");
46             }
47              
48              
49             sub put_scalar {
50 17     17 1 41 my ($self, undef, $flags)= @_;
51 17 50       59 my $ref= ref $_[1] eq 'SCALAR'? $_[1] : \$_[1];
52              
53             # Force to plain string if it is an object
54 17 50       41 if (ref $$ref) {
55             # TODO: croak unless object has stringify magic
56 0         0 $ref= \"$$ref";
57             }
58              
59             # Can only 'put' octets, not wide-character unicode strings.
60 17 50       51 utf8::downgrade($$ref, 1)
61             or croak "scalar must be byte string (octets). If storing unicode,"
62             ." you must reduce to a byte encoding first.";
63              
64             my $hash= $flags && $flags->{known_hashes} && $flags->{known_hashes}{$self->digest}
65 17 50 33     69 ? $flags->{known_hashes}{$self->digest}
66             : $self->calculate_hash($ref);
67 17 100       56 if ($self->get($hash)) {
68             # Already have it
69             $flags->{stats}{dup_file_count}++
70 1 50       5 if $flags->{stats};
71 1         6 return $hash;
72             } else {
73 16 100       77 $flags= { ($flags? %$flags : ()), known_hashes => { $self->digest => $hash } };
74 16         58 my $handle= $self->new_write_handle($flags);
75 16         46 $handle->_write_all($$ref);
76 16         56 return $self->commit_write_handle($handle);
77             }
78             }
79              
80              
81             sub put_file {
82 6     6 1 13 my ($self, $file, $flags)= @_;
83 6         12 my $ref= ref $file;
84 6   100     28 my $is_cas_file= $ref && $ref->isa('DataStore::CAS::File');
85 6         12 my $is_filename= _thing_stringifies_to_filename($file);
86 6 50 0     31 croak "Unhandled argument to put_file: ".($file||'(undef)')
      66        
      66        
87             unless defined $file && ($is_cas_file || $is_filename);
88              
89 6 50       21 my %known_hashes= $flags->{known_hashes}? %{$flags->{known_hashes}} : ();
  0         0  
90             # Apply reuse_hash feature, if requested
91 6 50 66     18 if ($is_cas_file && $flags->{reuse_hash}) {
92 0         0 $known_hashes{$file->store->digest}= $file->hash;
93 0         0 $flags= { %$flags, known_hashes => \%known_hashes };
94             }
95             # It is probably better to read a file twice than to write one that
96             # doesn't need to be written.
97             # ...but can't do better than ->put_handle unless the file is a real file.
98 6 50 33     39 my $fname= $is_filename? "$file"
    100          
99             : $is_cas_file && $file->can('local_file')? $file->local_file
100             : undef;
101 6 50 33     71 if ($known_hashes{$self->digest} || (defined $fname && -f $fname)) {
      33        
102             # Calculate the hash if it wasn't given.
103 6   33     36 my $hash= ($known_hashes{$self->digest} ||= $self->calculate_file_hash($fname));
104             # Avoid unnecessary work
105 6 100       642 if ($self->get($hash)) {
106             $flags->{stats}{dup_file_count}++
107 5 50       18 if $flags->{stats};
108             $self->_unlink_source_file($file, $flags)
109 5 50 33     16 if $flags->{move} && defined $fname;
110 5         21 return $hash;
111             }
112             # Save hash for next step
113 1         8 $flags= { %$flags, known_hashes => \%known_hashes };
114             }
115 1         3 my $fh;
116 1 50 0     4 if ($is_cas_file) {
    0          
    0          
117 1 50       4 $fh= $file->open or croak "Can't open '$file': $!";
118             }
119             elsif ($ref && $ref->can('openr')) {
120 0 0       0 $fh= $file->openr or croak "Can't open '$file': $!";
121             }
122             elsif ($is_filename) {
123 0 0       0 open($fh, '<', $fname) or croak "Can't open '$fname': $!";
124             }
125             else {
126 0         0 croak "Don't know how to open '$file'";
127             }
128 1         4 my $hash= $self->put_handle($fh, $flags);
129             $self->_unlink_source_file($file, $flags)
130 1 50 33     103 if $hash && $flags->{move};
131 0         0 return $hash;
132             }
133              
134             sub _unlink_source_file {
135 1     1   4 my ($self, $file, $flags)= @_;
136 1 50       4 return if $flags->{dry_run};
137 1   33     12 my $is_cas_file= ref $file && ref($file)->isa('DataStore::CAS::File');
138 1 50       4 if ($is_cas_file) {
139 1         266 croak "Refusing to delete origin CAS File (this can damage a CAS)\n"
140             ."If you really want to do this, pass \$file->local_name and then"
141             ." delete the cas entry yourself.";
142             } else {
143 0 0 0     0 if (ref $file && ref($file)->isa('File::Temp')) {
144             # The Simple backend closes File::Temp files to ensure they don't
145             # get written to any more. so match that behavior here.
146 0         0 $file->close;
147             }
148 0 0       0 unlink "$file" or croak "unlink($file): $!"
149             }
150             }
151              
152              
153             sub put_handle {
154 2     2 1 7 my ($self, $h_in, $flags)= @_;
155 2         6 binmode $h_in;
156 2         8 my $h_out= $self->new_write_handle($flags);
157 2   50     13 my $buf_size= $flags->{buffer_size} || 1024*1024;
158 2         4 my $buf;
159 2         2 while(1) {
160 4         58 my $got= read($h_in, $buf, $buf_size);
161 4 100       27 if ($got) {
    50          
162 2 50       9 $h_out->_write_all($buf) or croak "write: $!";
163             } elsif (!defined $got) {
164 0 0 0     0 next if ($!{EINTR} || $!{EAGAIN});
165 0         0 croak "read: $!";
166             } else {
167 2         8 last;
168             }
169             }
170 2         9 return $self->commit_write_handle($h_out);
171             }
172              
173              
174             # This implementation probably needs overridden by subclasses.
175             sub new_write_handle {
176 0     0 1 0 my ($self, $flags)= @_;
177 0         0 return DataStore::CAS::FileCreatorHandle->new($self, { flags => $flags });
178             }
179              
180             # This must be implemented by subclasses
181             requires 'commit_write_handle';
182              
183              
184             sub calculate_hash {
185 23     23 1 37 my $self= shift;
186 23 100       71 Digest->new($self->digest)->add(ref $_[0]? ${$_[0]} : $_[0])->hexdigest;
  17         710  
187             }
188              
189             sub calculate_file_hash {
190 9     9 1 20 my ($self, $file)= @_;
191 9 50       359 open my $fh, '<', $file or croak "open($file): $!";
192 9         41 binmode $fh;
193 9         39 Digest->new($self->digest)->addfile($fh)->hexdigest;
194             }
195              
196              
197             sub validate {
198 0     0 1 0 my ($self, $hash, $flags)= @_;
199              
200 0         0 my $file= $self->get($hash);
201 0 0       0 return undef unless defined $file;
202              
203             # Exceptions during 'put' will most likely come from reading $file,
204             # which means that validation fails, and we return false.
205 0         0 my $new_hash;
206             try {
207             # We don't pass flags directly through to get/put, because flags for validate
208             # are not the same as flags for get or put. But, 'stats' is a standard thing.
209 0     0   0 my %args= ( dry_run => 1 );
210 0 0       0 $args{stats}= $flags->{stats} if $flags->{stats};
211 0         0 $new_hash= $self->put_handle($file, \%args);
212             }
213       0     catch {
214 0         0 };
215 0 0 0     0 return (defined $new_hash and $new_hash eq $hash)? 1 : 0;
216             }
217              
218              
219             requires 'delete';
220              
221              
222             requires 'iterator';
223              
224              
225             requires 'open_file';
226              
227             # File and Handle objects have DESTROY methods that call these methods of
228             # their associated CAS. The CAS should implement these for cleanup of
229             # temporary files, or etc.
230       17     sub _file_destroy {}
231       18     sub _handle_destroy {}
232              
233             package DataStore::CAS::File;
234 5     5   43 use strict;
  5         11  
  5         124  
235 5     5   26 use warnings;
  5         15  
  5         1864  
236              
237             our $VERSION= '0.04';
238              
239 4     4 1 581 sub store { $_[0]{store} }
240 5     5 1 48 sub hash { $_[0]{hash} }
241 3     3 1 22 sub size { $_[0]{size} }
242              
243             sub open {
244 4     4 1 11 my $self= shift;
245 4 100       31 return $self->{store}->open_file($self)
246             if @_ == 0;
247 1 50       3 return $self->{store}->open_file($self, { @_ })
248             if @_ > 1;
249 1 50 33     13 return $self->{store}->open_file($self, { layer => $_[0] })
250             if @_ == 1 and !ref $_[0];
251 0         0 Carp::croak "Wrong arguments to 'open'";
252             };
253              
254             sub DESTROY {
255 18     18   4847 $_[0]{store}->_file_destroy(@_);
256             }
257              
258             our $AUTOLOAD;
259             sub AUTOLOAD {
260 4     4   1176 my $attr= substr($AUTOLOAD, rindex($AUTOLOAD, ':')+1);
261 4 100       19 return $_[0]{$attr} if exists $_[0]{$attr};
262 2         7 unshift @_, $_[0]{store};
263             goto (
264 2   66     164 $_[0]->can("_file_$attr")
265             or Carp::croak "Can't locate object method \"_file_$attr\" via package \"".ref($_[0]).'"'
266             );
267             }
268              
269             package DataStore::CAS::VirtualHandle;
270 5     5   45 use strict;
  5         11  
  5         147  
271 5     5   37 use warnings;
  5         9  
  5         4738  
272              
273             our $VERSION= '0.04';
274              
275             sub new {
276 19     19 1 697 my ($class, $cas, $fields)= @_;
277 19         68 my $glob= bless Symbol::gensym(), $class;
278 19         262 ${*$glob}= $cas;
  19         52  
279 19 50       36 %{*$glob}= %{$fields||{}};
  19         71  
  19         82  
280 19         112 tie *$glob, $glob;
281 19         71 $glob;
282             }
283 19     19   51 sub TIEHANDLE { return $_[0]; }
284              
285 19     19   414 sub _cas { ${*${$_[0]}} } # the scalar view of the symbol points to the CAS object
  19         23  
  19         103  
286 39     39   86 sub _data { \%{*${$_[0]}} } # the hashref view of the symbol holds the fields of the handle
  39         52  
  39         148  
287              
288 19     19   776 sub DESTROY { unshift @_, ${*{$_[0]}}; goto $_[0]->can('_handle_destroy') }
  19         27  
  19         85  
  19         115  
289              
290             # By default, any method not defined will call to C<$cas->_handle_$method( $handle, @args );>
291             our $AUTOLOAD;
292             sub AUTOLOAD {
293 4     4   1704 unshift @_, ${*${$_[0]}}; # unshift @_, $self->_cas
  4         5  
  4         18  
294 4         13 my $attr= substr($AUTOLOAD, rindex($AUTOLOAD, ':')+1);
295             goto (
296 4   66     176 $_[0]->can("_handle_$attr")
297             or Carp::croak "Can't locate object method \"_handle_$attr\" via package \"".ref($_[0]).'"'
298             );
299             }
300              
301             #
302             # Tied filehandle API
303             #
304              
305 1     1   1053 sub READ { (shift)->read(@_) }
306 0 0   0   0 sub READLINE { wantarray? (shift)->getlines : (shift)->getline }
307 0     0   0 sub GETC { $_[0]->getc }
308 0     0   0 sub EOF { $_[0]->eof }
309              
310 0     0   0 sub WRITE { (shift)->write(@_) }
311 0     0   0 sub PRINT { (shift)->print(@_) }
312 0     0   0 sub PRINTF { (shift)->printf(@_) }
313              
314 0     0   0 sub SEEK { (shift)->seek(@_) }
315 0     0   0 sub TELL { (shift)->tell(@_) }
316              
317 0     0   0 sub FILENO { $_[0]->fileno }
318 0     0   0 sub CLOSE { $_[0]->close }
319              
320             #
321             # The following are some default implementations to make subclassing less cumbersome.
322             #
323              
324             sub getlines {
325 0     0 1 0 my $self= shift;
326 0 0 0     0 wantarray or !defined wantarray or Carp::croak "getlines called in scalar context";
327 0         0 my (@ret, $line);
328 0         0 push @ret, $line
329             while defined ($line= $self->getline);
330 0         0 @ret;
331             }
332              
333             # I'm not sure why anyone would ever want this function, but I'm adding
334             # it for completeness.
335             sub getc {
336 0     0 1 0 my $c;
337 0 0       0 $_[0]->read($c, 1)? $c : undef;
338             }
339              
340             # 'write' does not guarantee that all bytes get written in one shot.
341             # Needs to be called in a loop to accomplish "print" semantics.
342             sub _write_all {
343 18     18   43 my ($self, $str)= @_;
344 18         25 while (1) {
345 18         39 my $wrote= $self->write($str);
346 18 50 33     136 return 1 if defined $wrote and ($wrote eq length $str);
347 0 0 0     0 return undef unless defined $wrote or $!{EINTR} or $!{EAGAIN};
      0        
348 0         0 substr($str, 0, $wrote)= '';
349             }
350             }
351              
352             # easy to forget that 'print' API involves "$," and "$\"
353             sub print {
354 0     0 1 0 my $self= shift;
355 0 0       0 my $str= join( (defined $, ? $, : ""), @_ );
356 0 0       0 $str .= $\ if defined $\;
357 0         0 $self->_write_all($str);
358             }
359              
360             # as if anyone would want to write their own printf implementation...
361             sub printf {
362 0     0 1 0 my $self= shift;
363 0         0 my $str= sprintf($_[0], $_[1..$#_]);
364 0         0 $self->_write_all($str);
365             }
366              
367             # virtual handles are unlikely to have one, and if they did, they wouldn't
368             # be using this class
369 0     0 1 0 sub fileno { undef; }
370              
371             package DataStore::CAS::FileCreatorHandle;
372 5     5   52 use strict;
  5         12  
  5         128  
373 5     5   26 use warnings;
  5         21  
  5         208  
374 5     5   1374 use parent -norequire => 'DataStore::CAS::VirtualHandle';
  5         959  
  5         43  
375              
376             our $VERSION= '0.04';
377              
378             # For write-handles, commit data to the CAS and return the digest hash for it.
379 0     0 1 0 sub commit { $_[0]->_cas->commit_write_handle(@_) }
380              
381             # These would happen anyway via the AUTOLOAD, but we enumerate them so that
382             # they officially appear as methods of this class.
383 0     0 1 0 sub close { $_[0]->_cas->_handle_close(@_) }
384 0     0 1 0 sub seek { $_[0]->_cas->_handle_seek(@_) }
385 0     0 1 0 sub tell { $_[0]->_cas->_handle_tell(@_) }
386 18     18 1 51 sub write { $_[0]->_cas->_handle_write(@_) }
387              
388             # This is a write-only handle
389 0     0 1   sub eof { return 1; }
390 0     0 1   sub read { return 0; }
391 0     0 1   sub readline { return undef; }
392              
393              
394             1;
395              
396             __END__