File Coverage

lib/Archive/BagIt/Role/Manifest.pm
Criterion Covered Total %
statement 152 168 90.4
branch 32 44 72.7
condition 9 20 45.0
subroutine 22 25 88.0
pod 5 7 71.4
total 220 264 83.3


line stmt bran cond sub pod time code
1             package Archive::BagIt::Role::Manifest;
2 8     8   4162 use strict;
  8         19  
  8         282  
3 8     8   51 use warnings;
  8         13  
  8         225  
4 8     8   43 use namespace::autoclean;
  8         15  
  8         67  
5 8     8   864 use Carp qw( croak carp );
  8         25  
  8         459  
6 8     8   74 use File::Spec ();
  8         21  
  8         146  
7 8     8   34 use Moo::Role;
  8         16  
  8         72  
8             with 'Archive::BagIt::Role::Plugin';
9             with 'Archive::BagIt::Role::Portability';
10             # ABSTRACT: A role that handles all manifest files for a specific Algorithm
11             our $VERSION = '0.095'; # VERSION
12              
13             has 'algorithm' => (
14             is => 'rw',
15             #isa=>'HashRef',
16             );
17              
18             has 'manifest_file' => (
19             is => 'rw',
20             lazy => 1,
21             builder => '_build_manifest_file',
22             );
23              
24             sub _build_manifest_file {
25 84     84   552 my $self = shift;
26 84         275 my $algorithm = $self->algorithm()->name;
27 84         1416 my $file = File::Spec->catfile($self->bagit->metadata_path, "manifest-$algorithm.txt");
28 84 100       2480 if (-f $file) {
29 72         429 return $file;
30             }
31 12         67 return;
32             }
33              
34              
35             has 'tagmanifest_file' => (
36             is => 'rw',
37             lazy => 1,
38             builder => '_build_tagmanifest_file'
39             );
40              
41             sub _build_tagmanifest_file {
42 57     57   394 my $self = shift;
43 57         189 my $algorithm = $self->algorithm()->name;
44 57         925 my $file = File::Spec->catfile( $self->bagit->metadata_path, "tagmanifest-$algorithm.txt");
45 57 100       1784 if (-f $file) {
46 53         314 return $file;
47             }
48 4         65 return;
49             }
50              
51       0 0   sub BUILD {}
52              
53             after BUILD => sub {
54             my $self = shift;
55             my $algorithm = $self->algorithm->name;
56             $self->{bagit}->{manifests}->{$algorithm} = $self;
57             };
58              
59             has 'parallel_support' => (
60             is => 'ro',
61             builder => '_check_parallel_support',
62             predicate => 1,
63             lazy => 1,
64             );
65              
66             sub _check_parallel_support {
67 0     0   0 my $self = shift;
68 0         0 my $class = 'Parallel::parallel_map';
69 0 0       0 if (!exists $INC{'Parallel/parallel_map.pm'}) {
70 0         0 carp "Module '$class' not available, disable parallel support";
71 0         0 $self->bagit->use_parallel( 0 );
72 0         0 return 0;
73             }
74 0         0 load_class($class);
75 0         0 $class->import( 'parallel_map' );
76 0         0 return 1;
77             }
78              
79             sub check_pluggable_modules() {
80 177     177 0 264 my $self = shift;
81 177   33     495 return ($self->has_parallel_support() && $self->has_async_support());
82             }
83              
84              
85              
86             has 'manifest_entries' => (
87             is => 'ro',
88             lazy => 1,
89             builder => '_build_manifest_entries',
90             );
91              
92              
93             has 'tagmanifest_entries' => (
94             is => 'ro',
95             lazy => 1,
96             builder => '_build_tagmanifest_entries',
97             );
98              
99             sub __build_xxxmanifest_entries {
100 125     125   244 my ($self, $xxmanifest_file) = @_;
101 125         226 my $xxmanifest_entries = {};
102 125         338 my $algorithm = $self->algorithm()->name;
103 125 50       4424 open(my $XXMANIFEST, "<:encoding(UTF-8)", $xxmanifest_file) or croak("Cannot open $xxmanifest_file: $!");
104 125         11187 while (my $line = <$XXMANIFEST>) {
105 383         1958 $line = chomp_portable($line);
106 383         1712 my ($digest, $file) = split(/\s+/, $line, 2);
107 383 50 33     1425 next unless ((defined $digest) && (defined $file)); # empty lines!
108 383         2641 $xxmanifest_entries->{$file} = $digest;
109             }
110 125         1620 close($XXMANIFEST);
111 125         1006 return $xxmanifest_entries;
112             }
113              
114             sub _build_tagmanifest_entries {
115 53     53   484 my ($self) = @_;
116 53         783 my $tm_file = $self->tagmanifest_file();
117 53 50       360 if (defined $tm_file) {
118 53         110 return $self->__build_xxxmanifest_entries($tm_file);
119             }
120 0         0 return;
121             }
122              
123             sub _build_manifest_entries {
124 72     72   558 my ($self) = @_;
125 72         1130 my $m_file = $self->manifest_file();
126 72 50       463 if (defined $m_file) {
127 72         197 return $self->__build_xxxmanifest_entries($m_file);
128             }
129 0         0 return;
130             }
131              
132             sub _fill_digest_hashref { # should be handle if empty values and ignore it (because parallel map)
133 549     549   945 my ($self, $bagit, $localname) = @_;
134 549 50 33     2102 if ((!defined $localname) or (0 == length($localname)) ) {
135             # croak "empty localname used!";
136 0         0 return;
137             }
138 549         749 my $digest_hashref;
139 549         5226 my $fullname = File::Spec->catfile($bagit, $localname);
140 549         11499 my $calc_digest = $self->bagit->digest_callback();
141 549         4456 my $eval = &$calc_digest($self->algorithm(), $fullname);
142 549   50     1738 $digest_hashref->{calculated_digest} = $eval // '';
143 549         846 $digest_hashref->{local_name} = $localname;
144 549         711 $digest_hashref->{full_name} = $fullname;
145 549         1627 return $digest_hashref;
146             }
147              
148              
149              
150              
151             # calc digest
152             # expects expected_ref, array_ref of filenames
153             # returns arrayref of hashes where each entry has
154             # $tmp->{calculated_digest} = $digest;
155             # $tmp->{expected_digest} = $expected_digest;
156             # $tmp->{filename} = $filename;
157             sub calc_digests {
158 177     177 1 365 my ($self, $bagit, $filenames_ref) = @_;
159 177         393 $self->check_pluggable_modules(); # handles Modules
160 177         267 my @digest_hashes;
161             my %digest_results;
162 177 100       3841 if ($self->bagit->use_parallel()) {
163             # Parallel::Map does not work at the moment, potential bug in Parallel::Map or IO::Async
164             # @digest_hashes = pmap_scalar {
165             # $self->_fill_digest_hashref($bagit, $_);
166             # } foreach => $filenames_ref;
167             # works as expected:
168              
169             my $anon_sub = sub {
170 0     0   0 my $filename = shift;
171 0         0 return $self->_fill_digest_hashref($bagit, $filename);
172 10         98 };
173             ## no critic (ProhibitStringyEval);
174 10         1008 @digest_hashes = eval 'Parallel::parallel_map::parallel_map (
175             sub { my $filename = shift; &$anon_sub($filename);} , @{ $filenames_ref}
176             )';
177             } else {
178             # serial variant
179 167         1163 @digest_hashes = map {$self->_fill_digest_hashref($bagit, $_)} @{$filenames_ref}
  549         1342  
  167         300  
180             }
181 177         946 return \@digest_hashes;
182             }
183              
184             sub _verify_XXX_manifests {
185 126     126   337 my ($self, $xxprefix, $xxmanifest_entries, $files_ref, $return_all_errors) = @_;
186             # Read the manifest file
187 126         161 my @files = @{ $files_ref };
  126         285  
188 126         164 my @invalid_messages;
189 126         399 my $bagit = $self->bagit->bag_path;
190 126         312 my $algorithm = $self->algorithm()->name;
191             my $subref_invalid_report_or_die = sub {
192 37     37   68 my $message = shift;
193 37 100       70 if (defined $return_all_errors) {
194 15         22 push @invalid_messages, $message;
195             } else {
196 22         249 croak($message);
197             }
198 15         32 return;
199 126         592 };
200             # Test readability
201 126         274 foreach my $local_name (@files) {
202             # local_name is relative to bagit base
203 382         3321 my $filepath = File::Spec->catfile($bagit, $local_name);
204 382 50       5442 unless (-r $filepath) {
205 0         0 &$subref_invalid_report_or_die(
206             "cannot read $local_name (bag-path:$bagit)",
207             );
208             }
209             }
210             # Evaluate each file against the manifest
211              
212 126         459 my $local_xxfilename = "${xxprefix}-${algorithm}.txt";
213              
214             # first check if each file from payload exists in manifest_entries for given alg
215 126         196 foreach my $local_name (@files) {
216 381         844 my $normalized_local_name = normalize_payload_filepath($local_name);
217             # local_name is relative to bagit base
218 381 100       899 unless (exists $xxmanifest_entries->{$normalized_local_name}) { # localname as value should exist!
219 8         67 &$subref_invalid_report_or_die(
220             "file '$local_name' (normalized='$normalized_local_name') found, which is not in '$local_xxfilename' (bag-path:'$bagit')!"
221             #."DEBUG: \n".join("\n", keys %{$xxmanifest_entries->{$algorithm}})
222             );
223             }
224             }
225             # second check if each file from manifest_entries for given alg exists in payload
226 119         149 my %normalised_files;
227 119         186 foreach my $file (@files) {
228 369         594 $normalised_files{ normalize_payload_filepath( $file )} = 1;
229             }
230 119         165 foreach my $local_mf_entry_path (keys %{$xxmanifest_entries}) {
  119         441  
231 369 100       627 if ( # to avoid escapes via manifest-files
232             check_if_payload_filepath_violates($local_mf_entry_path)
233             ) {
234 7         42 &$subref_invalid_report_or_die("file '$local_mf_entry_path' not allowed in '$local_xxfilename' (bag-path:'$bagit'")
235             }
236             else {
237 362 100       957 unless (exists $normalised_files{$local_mf_entry_path}) {
238 1         18 &$subref_invalid_report_or_die(
239             "file '$local_mf_entry_path' NOT found, but expected via '$local_xxfilename' (bag-path:'$bagit')!"
240             );
241             }
242             }
243             }
244             # all preconditions full filled, now calc all digests
245 111         338 my $digest_hashes_ref = $self->calc_digests($bagit, \@files);
246             # compare digests
247 111 50 33     620 if (defined $digest_hashes_ref && (ref $digest_hashes_ref eq 'ARRAY')) {
248 111         166 foreach my $digest_entry (@{$digest_hashes_ref}) {
  111         227  
249 314         598 my $normalized = normalize_payload_filepath($digest_entry->{local_name});
250 314         538 $digest_entry->{expected_digest} = $xxmanifest_entries->{$normalized};
251 314 50       567 if (! defined $digest_entry->{expected_digest} ) { next; } # undef expected digests only occur if all preconditions fullfilled but return_all_errors was set, we should ignore it!
  0         0  
252 314 100       707 if ($digest_entry->{calculated_digest} ne $digest_entry->{expected_digest}) {
253 21         210 my $xxfilename = File::Spec->catfile($bagit, $local_xxfilename);
254             &$subref_invalid_report_or_die(
255             sprintf("file '%s' (normalized='%s') invalid, digest (%s) calculated=%s, but expected=%s in file '%s'",
256             $digest_entry->{local_name},
257             $normalized,
258             $algorithm,
259             $digest_entry->{calculated_digest},
260             $digest_entry->{expected_digest},
261 21         190 $xxfilename
262             )
263             );
264             }
265             }
266             }
267              
268 104 100 100     321 if ($return_all_errors && (scalar @invalid_messages > 0)) {
269 13         19 push @{$self->bagit->{errors}},
  13         81  
270             join("\n\t",
271             sort @invalid_messages
272             );
273 13         122 return;
274             }
275 91         812 return 1;
276             }
277              
278              
279             sub verify_manifest {
280 85     85 1 2065 my ($self, $payload_files_ref, $return_all_errors) = @_;
281 85 100       1400 if ($self->manifest_file()) {
282 73         1527 return $self->_verify_XXX_manifests(
283             "manifest",
284             $self->manifest_entries(),
285             $payload_files_ref,
286             $return_all_errors
287             );
288             }
289 12         41 return;
290             }
291              
292              
293             sub verify_tagmanifest {
294 57     57 1 1091 my ($self, $non_payload_files_ref, $return_all_errors) = @_;
295 57         75 my @non_payload_files = grep {$_ !~ m#tagmanifest-[0-9a-zA-Z]+\.txt$#} @{ $non_payload_files_ref };
  287         735  
  57         125  
296 57 100       1025 if ($self->tagmanifest_file()) {
297 53         1192 return $self->_verify_XXX_manifests(
298             "tagmanifest",
299             $self->tagmanifest_entries(),
300             \@non_payload_files,
301             $return_all_errors
302             );
303             }
304 4         28 return;
305             }
306              
307             sub __create_xxmanifest {
308 66     66   342 my ($self, $prefix, $files_ref) = @_;
309 66         197 my $algo = $self->algorithm->name;
310 66         167 my $bagit = $self->bagit->bag_path;
311 66         1238 my $manifest_file = File::Spec->catfile($self->bagit->metadata_path, "$prefix-${algo}.txt");
312             # Generate digests for all of the files under ./data
313 66         1200 my $digest_hashes_ref = $self->calc_digests($bagit, $files_ref);
314 66 50 33     317 if (defined $digest_hashes_ref && (ref $digest_hashes_ref eq 'ARRAY')) {
315 66 50       4738 open(my $fh, ">:encoding(UTF-8)",$manifest_file) or croak("Cannot create $prefix-${algo}.txt: $!\n");
316 66         4487 foreach my $digest_entry (@{$digest_hashes_ref}) {
  66         163  
317 220         493 my $normalized_file = normalize_payload_filepath($digest_entry->{local_name});
318 220         320 my $digest = $digest_entry->{calculated_digest};
319 220         787 print($fh "$digest $normalized_file\n");
320             }
321 66         3860 close($fh);
322             }
323 66         379 return 1;
324             }
325              
326              
327             sub create_manifest {
328 32     32 1 258 my ($self) = @_;
329 32         590 $self->__create_xxmanifest('manifest', $self->bagit->payload_files);
330 32         126 return 1;
331             }
332              
333              
334             sub create_tagmanifest {
335 34     34 1 693 my ($self) = @_;
336 34         52 my @non_payload_files = grep {$_ !~ m#^tagmanifest-.*\.txt$#} @{ $self->bagit->non_payload_files };
  160         472  
  34         582  
337 34         110 $self->__create_xxmanifest('tagmanifest', \@non_payload_files);
338 34         142 return 1;
339             }
340              
341 8     8   22424 no Moo;
  8         20  
  8         69  
342             1;
343              
344             __END__