File Coverage

lib/Archive/BagIt/Role/Manifest.pm
Criterion Covered Total %
statement 140 143 97.9
branch 30 40 75.0
condition 8 17 47.0
subroutine 21 22 95.4
pod 5 6 83.3
total 204 228 89.4


line stmt bran cond sub pod time code
1             package Archive::BagIt::Role::Manifest;
2 12     12   236134 use strict;
  12         61  
  12         524  
3 12     12   66 use warnings;
  12         20  
  12         641  
4 12     12   1376 use namespace::autoclean;
  12         62171  
  12         101  
5 12     12   1354 use Carp qw( croak carp );
  12         29  
  12         939  
6 12     12   94 use File::Spec ();
  12         90  
  12         327  
7 12     12   439 use Moo::Role;
  12         11255  
  12         97  
8             with 'Archive::BagIt::Role::Plugin';
9             with 'Archive::BagIt::Role::Portability';
10             # ABSTRACT: A role that handles all manifest files for a specific Algorithm
11             our $VERSION = '0.101'; # VERSION
12              
13             has 'algorithm' => (
14             is => 'rw',
15             #isa=>'HashRef',
16             );
17              
18             has 'manifest_file' => (
19             is => 'rw',
20             lazy => 1,
21             builder => '_build_manifest_file',
22             );
23              
24             sub _build_manifest_file {
25 78     78   691 my $self = shift;
26 78         342 my $algorithm = $self->algorithm()->name;
27 78         1877 my $file = File::Spec->catfile($self->bagit->metadata_path, "manifest-$algorithm.txt");
28 78 100       3901 return $file if (-f $file);
29 13         101 return;
30             }
31              
32              
33             has 'tagmanifest_file' => (
34             is => 'rw',
35             lazy => 1,
36             builder => '_build_tagmanifest_file'
37             );
38              
39             sub _build_tagmanifest_file {
40 53     53   464 my $self = shift;
41 53         228 my $algorithm = $self->algorithm()->name;
42 53         1148 my $file = File::Spec->catfile( $self->bagit->metadata_path, "tagmanifest-$algorithm.txt");
43 53 100       2717 return $file if (-f $file);
44 7         60 return;
45             }
46              
47       0 0   sub BUILD {}
48              
49             after BUILD => sub {
50             my $self = shift;
51             my $algorithm = $self->algorithm->name;
52             $self->{bagit}->{manifests}->{$algorithm} = $self;
53             };
54              
55              
56             has 'manifest_entries' => (
57             is => 'ro',
58             lazy => 1,
59             builder => '_build_manifest_entries',
60             );
61              
62              
63             has 'tagmanifest_entries' => (
64             is => 'ro',
65             lazy => 1,
66             builder => '_build_tagmanifest_entries',
67             );
68              
69             sub __build_xxxmanifest_entries {
70 111     111   252 my ($self, $xxmanifest_file) = @_;
71 111         210 my $xxmanifest_entries = {};
72 111         366 my $algorithm = $self->algorithm()->name;
73 111 50       5167 open(my $XXMANIFEST, "<:encoding(UTF-8)", $xxmanifest_file) or croak("Cannot open $xxmanifest_file: $!");
74 111         12694 while (my $line = <$XXMANIFEST>) {
75 347         1882 $line = chomp_portable($line);
76 347         1244 my ($digest, $file) = split(/\s+/, $line, 2);
77 347 50 33     1334 next unless ((defined $digest) && (defined $file)); # empty lines!
78 347         2373 $xxmanifest_entries->{$file} = $digest;
79             }
80 111         1638 close($XXMANIFEST);
81 111         1100 return $xxmanifest_entries;
82             }
83              
84             sub _build_tagmanifest_entries {
85 46     46   467 my ($self) = @_;
86 46         866 my $tm_file = $self->tagmanifest_file();
87 46 50       391 return $self->__build_xxxmanifest_entries($tm_file) if (defined $tm_file);
88 0         0 return;
89             }
90              
91             sub _build_manifest_entries {
92 65     65   622 my ($self) = @_;
93 65         1280 my $m_file = $self->manifest_file();
94 65 50       588 return $self->__build_xxxmanifest_entries($m_file) if (defined $m_file);
95 0         0 return;
96             }
97              
98             sub _fill_digest_hashref { # should be handle if empty values and ignore it (because parallel map)
99 540     540   1202 my ($self, $bagit, $localname) = @_;
100 540 50 33     2608 return if ((!defined $localname) or (0 == length($localname)));
101 540         712 my $digest_hashref;
102 540         7028 my $fullname = File::Spec->catfile($bagit, $localname);
103 540         15876 my $calc_digest = $self->bagit->digest_callback();
104 540         5555 my $eval = &$calc_digest($self->algorithm(), $fullname);
105 540   50     2112 $digest_hashref->{calculated_digest} = $eval // '';
106 540         1196 $digest_hashref->{local_name} = $localname;
107 540         1027 $digest_hashref->{full_name} = $fullname;
108 540         1894 return $digest_hashref;
109             }
110              
111              
112              
113              
114             # calc digest
115             # expects expected_ref, array_ref of filenames
116             # returns arrayref of hashes where each entry has
117             # $tmp->{calculated_digest} = $digest;
118             # $tmp->{expected_digest} = $expected_digest;
119             # $tmp->{filename} = $filename;
120             sub calc_digests {
121 167     167 1 456 my ($self, $bagit, $filenames_ref) = @_;
122 167         249 my @digest_hashes;
123             my %digest_results;
124             # serial variant
125 167         281 @digest_hashes = map {$self->_fill_digest_hashref($bagit, $_)} @{$filenames_ref};
  540         1735  
  167         405  
126 167         560 return \@digest_hashes;
127             }
128              
129             sub _verify_XXX_manifests {
130 111     111   332 my ($self, $xxprefix, $xxmanifest_entries, $files_ref, $return_all_errors) = @_;
131             # Read the manifest file
132 111         205 my @files = @{ $files_ref };
  111         333  
133 111         220 my @invalid_messages;
134 111         488 my $bagit = $self->bagit->bag_path;
135 111         400 my $algorithm = $self->algorithm()->name;
136             my $subref_invalid_report_or_die = sub {
137 33     33   162 my $message = shift;
138 33 100       115 if (defined $return_all_errors) {
139 14         27 push @invalid_messages, $message;
140             } else {
141 19         431 croak($message);
142             }
143 14         36 return;
144 111         630 };
145 111         257 my $local_xxfilename = "${xxprefix}-${algorithm}.txt";
146 111         153 my %normalised_files;
147 111         247 foreach my $local_name (@files) {
148             #### Test readability
149             # local_name is relative to bagit base
150 340         3356 my $filepath = File::Spec->catfile($bagit, $local_name);
151 340 50       5948 unless (-r $filepath) {
152 0         0 &$subref_invalid_report_or_die(
153             "cannot read $local_name (bag-path:$bagit)",
154             );
155             }
156             ### Evaluate each file against the manifest
157             # first check if each file from payload exists in manifest_entries for given alg
158 340         990 my $normalized_local_name = normalize_payload_filepath($local_name);
159             # local_name is relative to bagit base
160 340 100       931 unless (exists $xxmanifest_entries->{$normalized_local_name}) { # localname as value should exist!
161 4         25 &$subref_invalid_report_or_die(
162             "file '$local_name' (normalized='$normalized_local_name') found, which is not in '$local_xxfilename' (bag-path:'$bagit')!"
163             #."DEBUG: \n".join("\n", keys %{$xxmanifest_entries->{$algorithm}})
164             );
165             }
166             ### second check if each file from manifest_entries for given alg exists in payload
167 336         954 $normalised_files{ $normalized_local_name } = 1;
168             }
169              
170 107         196 foreach my $local_mf_entry_path (keys %{$xxmanifest_entries}) {
  107         434  
171 336 100       660 if ( # to avoid escapes via manifest-files
172             check_if_payload_filepath_violates($local_mf_entry_path)
173             ) {
174 7         38 &$subref_invalid_report_or_die("file '$local_mf_entry_path' not allowed in '$local_xxfilename' (bag-path:'$bagit'")
175             } else {
176 329 100       953 unless (exists $normalised_files{$local_mf_entry_path}) {
177 1         6 &$subref_invalid_report_or_die(
178             "file '$local_mf_entry_path' NOT found, but expected via '$local_xxfilename' (bag-path:'$bagit')!"
179             );
180             }
181             }
182             }
183             # all preconditions full filled, now calc all digests
184 99         375 my $digest_hashes_ref = $self->calc_digests($bagit, \@files);
185             # compare digests
186 99 50 33     766 if (defined $digest_hashes_ref && (ref $digest_hashes_ref eq 'ARRAY')) {
187 99         238 foreach my $digest_entry (@{$digest_hashes_ref}) {
  99         232  
188 303         783 my $normalized = normalize_payload_filepath($digest_entry->{local_name});
189 303         818 $digest_entry->{expected_digest} = $xxmanifest_entries->{$normalized};
190 303 50       691 next unless (defined $digest_entry->{expected_digest}); # undef expected digests only occur if all preconditions fullfilled but return_all_errors was set, we should ignore it!
191 303 100       871 if ($digest_entry->{calculated_digest} ne $digest_entry->{expected_digest}) {
192 21         330 my $xxfilename = File::Spec->catfile($bagit, $local_xxfilename);
193             &$subref_invalid_report_or_die(
194             sprintf("file '%s' (normalized='%s') invalid, digest (%s) calculated=%s, but expected=%s in file '%s'",
195             $digest_entry->{local_name},
196             $normalized,
197             $algorithm,
198             $digest_entry->{calculated_digest},
199             $digest_entry->{expected_digest},
200 21         138 $xxfilename
201             )
202             );
203             }
204             }
205             }
206              
207 92 100 100     295 if ($return_all_errors && (scalar @invalid_messages > 0)) {
208 12         18 push @{$self->bagit->{errors}},
  12         85  
209             join("\n\t",
210             sort @invalid_messages
211             );
212 12         162 return;
213             }
214 80         1064 return 1;
215             }
216              
217              
218             sub verify_manifest {
219 78     78 1 2431 my ($self, $payload_files_ref, $return_all_errors) = @_;
220 78 100       1752 if ($self->manifest_file()) {
221 65         1952 return $self->_verify_XXX_manifests(
222             "manifest",
223             $self->manifest_entries(),
224             $payload_files_ref,
225             $return_all_errors
226             );
227             }
228 13         115 return;
229             }
230              
231              
232             sub verify_tagmanifest {
233 53     53 1 1343 my ($self, $non_payload_files_ref, $return_all_errors) = @_;
234 53         182 my @non_payload_files = grep {$_ !~ m#tagmanifest-[0-9a-zA-Z]+\.txt$#} @{ $non_payload_files_ref };
  259         752  
  53         140  
235 53 100       1319 if ($self->tagmanifest_file()) {
236 46         1266 return $self->_verify_XXX_manifests(
237             "tagmanifest",
238             $self->tagmanifest_entries(),
239             \@non_payload_files,
240             $return_all_errors
241             );
242             }
243 7         36 return;
244             }
245              
246             sub __create_xxmanifest {
247 68     68   335 my ($self, $prefix, $files_ref) = @_;
248 68         314 my $algo = $self->algorithm->name;
249 68         239 my $bagit = $self->bagit->bag_path;
250 68         1937 my $manifest_file = File::Spec->catfile($self->bagit->metadata_path, "$prefix-${algo}.txt");
251             # Generate digests for all of the files under ./data
252 68         1579 my $digest_hashes_ref = $self->calc_digests($bagit, $files_ref);
253 68 50 33     498 if (defined $digest_hashes_ref && (ref $digest_hashes_ref eq 'ARRAY')) {
254 68 50       10592 open(my $fh, ">:encoding(UTF-8)",$manifest_file) or croak("Cannot create $prefix-${algo}.txt: $!\n");
255 68         5725 foreach my $digest_entry (@{$digest_hashes_ref}) {
  68         202  
256 222         656 my $normalized_file = normalize_payload_filepath($digest_entry->{local_name});
257 222         408 my $digest = $digest_entry->{calculated_digest};
258 222         826 print($fh "$digest $normalized_file\n");
259             }
260 68         6691 close($fh);
261             }
262 68         512 return 1;
263             }
264              
265              
266             sub create_manifest {
267 34     34 1 383 my ($self) = @_;
268 34         2711 $self->__create_xxmanifest('manifest', $self->bagit->payload_files);
269 34         161 return 1;
270             }
271              
272              
273             sub create_tagmanifest {
274 34     34 1 2567 my ($self) = @_;
275 34         59 my @non_payload_files = grep {$_ !~ m#^tagmanifest-.*\.txt$#} @{ $self->bagit->non_payload_files };
  160         518  
  34         775  
276 34         136 $self->__create_xxmanifest('tagmanifest', \@non_payload_files);
277 34         202 return 1;
278             }
279              
280 12     12   38783 no Moo;
  12         2038  
  12         175  
281             1;
282              
283             __END__