File Coverage

lib/Archive/BagIt/Role/Manifest.pm
Criterion Covered Total %
statement 152 168 90.4
branch 32 44 72.7
condition 9 20 45.0
subroutine 22 25 88.0
pod 5 7 71.4
total 220 264 83.3


line stmt bran cond sub pod time code
1             package Archive::BagIt::Role::Manifest;
2 8     8   3175 use strict;
  8         17  
  8         219  
3 8     8   36 use warnings;
  8         12  
  8         231  
4 8     8   42 use namespace::autoclean;
  8         22  
  8         67  
5 8     8   711 use Carp qw( croak carp );
  8         14  
  8         451  
6 8     8   44 use File::Spec ();
  8         14  
  8         121  
7 8     8   38 use Moo::Role;
  8         12  
  8         61  
8             with 'Archive::BagIt::Role::Plugin';
9             with 'Archive::BagIt::Role::Portability';
10             # ABSTRACT: A role that handles all manifest files for a specific Algorithm
11             our $VERSION = '0.094'; # VERSION
12              
13             has 'algorithm' => (
14             is => 'rw',
15             #isa=>'HashRef',
16             );
17              
18             has 'manifest_file' => (
19             is => 'rw',
20             lazy => 1,
21             builder => '_build_manifest_file',
22             );
23              
24             sub _build_manifest_file {
25 83     83   767 my $self = shift;
26 83         496 my $algorithm = $self->algorithm()->name;
27 83         1255 my $file = File::Spec->catfile($self->bagit->metadata_path, "manifest-$algorithm.txt");
28 83 100       2309 if (-f $file) {
29 71         468 return $file;
30             }
31 12         80 return;
32             }
33              
34              
35             has 'tagmanifest_file' => (
36             is => 'rw',
37             lazy => 1,
38             builder => '_build_tagmanifest_file'
39             );
40              
41             sub _build_tagmanifest_file {
42 60     60   518 my $self = shift;
43 60         303 my $algorithm = $self->algorithm()->name;
44 60         976 my $file = File::Spec->catfile( $self->bagit->metadata_path, "tagmanifest-$algorithm.txt");
45 60 100       1807 if (-f $file) {
46 54         340 return $file;
47             }
48 6         32 return;
49             }
50              
51       0 0   sub BUILD {}
52              
53             after BUILD => sub {
54             my $self = shift;
55             my $algorithm = $self->algorithm->name;
56             $self->{bagit}->{manifests}->{$algorithm} = $self;
57             };
58              
59             has 'parallel_support' => (
60             is => 'ro',
61             builder => '_check_parallel_support',
62             predicate => 1,
63             lazy => 1,
64             );
65              
66             sub _check_parallel_support {
67 0     0   0 my $self = shift;
68 0         0 my $class = 'Parallel::parallel_map';
69 0 0       0 if (!exists $INC{'Parallel/parallel_map.pm'}) {
70 0         0 carp "Module '$class' not available, disable parallel support";
71 0         0 $self->bagit->use_parallel( 0 );
72 0         0 return 0;
73             }
74 0         0 load_class($class);
75 0         0 $class->import( 'parallel_map' );
76 0         0 return 1;
77             }
78              
79             sub check_pluggable_modules() {
80 177     177 0 307 my $self = shift;
81 177   33     627 return ($self->has_parallel_support() && $self->has_async_support());
82             }
83              
84              
85              
86             has 'manifest_entries' => (
87             is => 'ro',
88             lazy => 1,
89             builder => '_build_manifest_entries',
90             );
91              
92              
93             has 'tagmanifest_entries' => (
94             is => 'ro',
95             lazy => 1,
96             builder => '_build_tagmanifest_entries',
97             );
98              
99             sub __build_xxxmanifest_entries {
100 125     125   288 my ($self, $xxmanifest_file) = @_;
101 125         242 my $xxmanifest_entries = {};
102 125         350 my $algorithm = $self->algorithm()->name;
103 125 50       5050 open(my $XXMANIFEST, "<:encoding(UTF-8)", $xxmanifest_file) or croak("Cannot open $xxmanifest_file: $!");
104 125         13354 while (my $line = <$XXMANIFEST>) {
105 384         2055 $line = chomp_portable($line);
106 384         1908 my ($digest, $file) = split(/\s+/, $line, 2);
107 384 50 33     1225 next unless ((defined $digest) && (defined $file)); # empty lines!
108 384         2429 $xxmanifest_entries->{$file} = $digest;
109             }
110 125         1700 close($XXMANIFEST);
111 125         1302 return $xxmanifest_entries;
112             }
113              
114             sub _build_tagmanifest_entries {
115 54     54   561 my ($self) = @_;
116 54         720 my $tm_file = $self->tagmanifest_file();
117 54 50       355 if (defined $tm_file) {
118 54         160 return $self->__build_xxxmanifest_entries($tm_file);
119             }
120 0         0 return;
121             }
122              
123             sub _build_manifest_entries {
124 71     71   744 my ($self) = @_;
125 71         939 my $m_file = $self->manifest_file();
126 71 50       591 if (defined $m_file) {
127 71         260 return $self->__build_xxxmanifest_entries($m_file);
128             }
129 0         0 return;
130             }
131              
132             sub _fill_digest_hashref { # should be handle if empty values and ignore it (because parallel map)
133 550     550   951 my ($self, $bagit, $localname) = @_;
134 550 50 33     1990 if ((!defined $localname) or (0 == length($localname)) ) {
135             # croak "empty localname used!";
136 0         0 return;
137             }
138 550         575 my $digest_hashref;
139 550         4686 my $fullname = File::Spec->catfile($bagit, $localname);
140 550         10314 my $calc_digest = $self->bagit->digest_callback();
141 550         3975 my $eval = &$calc_digest($self->algorithm(), $fullname);
142 550   50     1560 $digest_hashref->{calculated_digest} = $eval // '';
143 550         781 $digest_hashref->{local_name} = $localname;
144 550         722 $digest_hashref->{full_name} = $fullname;
145 550         1492 return $digest_hashref;
146             }
147              
148              
149              
150              
151             # calc digest
152             # expects expected_ref, array_ref of filenames
153             # returns arrayref of hashes where each entry has
154             # $tmp->{calculated_digest} = $digest;
155             # $tmp->{expected_digest} = $expected_digest;
156             # $tmp->{filename} = $filename;
157             sub calc_digests {
158 177     177 1 392 my ($self, $bagit, $filenames_ref) = @_;
159 177         598 $self->check_pluggable_modules(); # handles Modules
160 177         308 my @digest_hashes;
161             my %digest_results;
162 177 100       3670 if ($self->bagit->use_parallel()) {
163             # Parallel::Map does not work at the moment, potential bug in Parallel::Map or IO::Async
164             # @digest_hashes = pmap_scalar {
165             # $self->_fill_digest_hashref($bagit, $_);
166             # } foreach => $filenames_ref;
167             # works as expected:
168              
169             my $anon_sub = sub {
170 0     0   0 my $filename = shift;
171 0         0 return $self->_fill_digest_hashref($bagit, $filename);
172 10         105 };
173             ## no critic (ProhibitStringyEval);
174 10         1213 @digest_hashes = eval 'Parallel::parallel_map::parallel_map (
175             sub { my $filename = shift; &$anon_sub($filename);} , @{ $filenames_ref}
176             )';
177             } else {
178             # serial variant
179 167         1274 @digest_hashes = map {$self->_fill_digest_hashref($bagit, $_)} @{$filenames_ref}
  550         1257  
  167         287  
180             }
181 177         1026 return \@digest_hashes;
182             }
183              
184             sub _verify_XXX_manifests {
185 126     126   400 my ($self, $xxprefix, $xxmanifest_entries, $files_ref, $return_all_errors) = @_;
186             # Read the manifest file
187 126         187 my @files = @{ $files_ref };
  126         330  
188 126         173 my @invalid_messages;
189 126         450 my $bagit = $self->bagit->bag_path;
190 126         319 my $algorithm = $self->algorithm()->name;
191             my $subref_invalid_report_or_die = sub {
192 37     37   87 my $message = shift;
193 37 100       95 if (defined $return_all_errors) {
194 15         34 push @invalid_messages, $message;
195             } else {
196 22         626 croak($message);
197             }
198 15         34 return;
199 126         680 };
200             # Test readability
201 126         326 foreach my $local_name (@files) {
202             # local_name is relative to bagit base
203 383         3023 my $filepath = File::Spec->catfile($bagit, $local_name);
204 383 50       5120 unless (-r $filepath) {
205 0         0 &$subref_invalid_report_or_die(
206             "cannot read $local_name (bag-path:$bagit)",
207             );
208             }
209             }
210             # Evaluate each file against the manifest
211              
212 126         545 my $local_xxfilename = "${xxprefix}-${algorithm}.txt";
213              
214             # first check if each file from payload exists in manifest_entries for given alg
215 126         240 foreach my $local_name (@files) {
216 382         742 my $normalized_local_name = normalize_payload_filepath($local_name);
217             # local_name is relative to bagit base
218 382 100       1299 unless (exists $xxmanifest_entries->{$normalized_local_name}) { # localname as value should exist!
219 8         54 &$subref_invalid_report_or_die(
220             "file '$local_name' (normalized='$normalized_local_name') found, which is not in '$local_xxfilename' (bag-path:'$bagit')!"
221             #."DEBUG: \n".join("\n", keys %{$xxmanifest_entries->{$algorithm}})
222             );
223             }
224             }
225             # second check if each file from manifest_entries for given alg exists in payload
226 119         215 my %normalised_files;
227 119         181 foreach my $file (@files) {
228 370         557 $normalised_files{ normalize_payload_filepath( $file )} = 1;
229             }
230 119         197 foreach my $local_mf_entry_path (keys %{$xxmanifest_entries}) {
  119         598  
231 370 100       624 if ( # to avoid escapes via manifest-files
232             check_if_payload_filepath_violates($local_mf_entry_path)
233             ) {
234 7         48 &$subref_invalid_report_or_die("file '$local_mf_entry_path' not allowed in '$local_xxfilename' (bag-path:'$bagit'")
235             }
236             else {
237 363 100       875 unless (exists $normalised_files{$local_mf_entry_path}) {
238 1         6 &$subref_invalid_report_or_die(
239             "file '$local_mf_entry_path' NOT found, but expected via '$local_xxfilename' (bag-path:'$bagit')!"
240             );
241             }
242             }
243             }
244             # all preconditions full filled, now calc all digests
245 111         421 my $digest_hashes_ref = $self->calc_digests($bagit, \@files);
246             # compare digests
247 111 50 33     740 if (defined $digest_hashes_ref && (ref $digest_hashes_ref eq 'ARRAY')) {
248 111         205 foreach my $digest_entry (@{$digest_hashes_ref}) {
  111         268  
249 315         629 my $normalized = normalize_payload_filepath($digest_entry->{local_name});
250 315         568 $digest_entry->{expected_digest} = $xxmanifest_entries->{$normalized};
251 315 50       569 if (! defined $digest_entry->{expected_digest} ) { next; } # undef expected digests only occur if all preconditions fullfilled but return_all_errors was set, we should ignore it!
  0         0  
252 315 100       710 if ($digest_entry->{calculated_digest} ne $digest_entry->{expected_digest}) {
253 21         209 my $xxfilename = File::Spec->catfile($bagit, $local_xxfilename);
254             &$subref_invalid_report_or_die(
255             sprintf("file '%s' (normalized='%s') invalid, digest (%s) calculated=%s, but expected=%s in file '%s'",
256             $digest_entry->{local_name},
257             $normalized,
258             $algorithm,
259             $digest_entry->{calculated_digest},
260             $digest_entry->{expected_digest},
261 21         250 $xxfilename
262             )
263             );
264             }
265             }
266             }
267              
268 104 100 100     324 if ($return_all_errors && (scalar @invalid_messages > 0)) {
269 13         23 push @{$self->bagit->{errors}},
  13         106  
270             join("\n\t",
271             sort @invalid_messages
272             );
273 13         159 return;
274             }
275 91         1033 return 1;
276             }
277              
278              
279             sub verify_manifest {
280 84     84 1 2044 my ($self, $payload_files_ref, $return_all_errors) = @_;
281 84 100       1304 if ($self->manifest_file()) {
282 72         1624 return $self->_verify_XXX_manifests(
283             "manifest",
284             $self->manifest_entries(),
285             $payload_files_ref,
286             $return_all_errors
287             );
288             }
289 12         33 return;
290             }
291              
292              
293             sub verify_tagmanifest {
294 60     60 1 1224 my ($self, $non_payload_files_ref, $return_all_errors) = @_;
295 60         115 my @non_payload_files = grep {$_ !~ m#tagmanifest-[0-9a-zA-Z]+\.txt$#} @{ $non_payload_files_ref };
  300         722  
  60         136  
296 60 100       1111 if ($self->tagmanifest_file()) {
297 54         1132 return $self->_verify_XXX_manifests(
298             "tagmanifest",
299             $self->tagmanifest_entries(),
300             \@non_payload_files,
301             $return_all_errors
302             );
303             }
304 6         21 return;
305             }
306              
307             sub __create_xxmanifest {
308 66     66   370 my ($self, $prefix, $files_ref) = @_;
309 66         337 my $algo = $self->algorithm->name;
310 66         199 my $bagit = $self->bagit->bag_path;
311 66         1125 my $manifest_file = File::Spec->catfile($self->bagit->metadata_path, "$prefix-${algo}.txt");
312             # Generate digests for all of the files under ./data
313 66         1175 my $digest_hashes_ref = $self->calc_digests($bagit, $files_ref);
314 66 50 33     354 if (defined $digest_hashes_ref && (ref $digest_hashes_ref eq 'ARRAY')) {
315 66 50       5250 open(my $fh, ">:encoding(UTF-8)",$manifest_file) or croak("Cannot create $prefix-${algo}.txt: $!\n");
316 66         5224 foreach my $digest_entry (@{$digest_hashes_ref}) {
  66         164  
317 220         515 my $normalized_file = normalize_payload_filepath($digest_entry->{local_name});
318 220         275 my $digest = $digest_entry->{calculated_digest};
319 220         775 print($fh "$digest $normalized_file\n");
320             }
321 66         4595 close($fh);
322             }
323 66         378 return 1;
324             }
325              
326              
327             sub create_manifest {
328 32     32 1 305 my ($self) = @_;
329 32         584 $self->__create_xxmanifest('manifest', $self->bagit->payload_files);
330 32         119 return 1;
331             }
332              
333              
334             sub create_tagmanifest {
335 34     34 1 917 my ($self) = @_;
336 34         52 my @non_payload_files = grep {$_ !~ m#^tagmanifest-.*\.txt$#} @{ $self->bagit->non_payload_files };
  160         453  
  34         562  
337 34         135 $self->__create_xxmanifest('tagmanifest', \@non_payload_files);
338 34         192 return 1;
339             }
340              
341 8     8   18463 no Moo;
  8         18  
  8         60  
342             1;
343              
344             __END__