File Coverage

blib/lib/Archive/BagIt.pm
Criterion Covered Total %
statement 494 534 92.5
branch 156 204 76.4
condition 17 39 43.5
subroutine 63 70 90.0
pod 18 20 90.0
total 748 867 86.2


line stmt bran cond sub pod time code
1             package Archive::BagIt;
2 12     12   4703763 use strict;
  12         23  
  12         562  
3 12     12   60 use warnings;
  12         25  
  12         958  
4 12     12   3660 use open ':std', ':encoding(UTF-8)';
  12         8186  
  12         88  
5 12     12   72563 use Encode qw( decode );
  12         31  
  12         1313  
6 12     12   74 use File::Spec ();
  12         21  
  12         327  
7 12     12   6136 use Class::Load qw( load_class );
  12         274752  
  12         1026  
8 12     12   120 use Carp qw( carp croak confess);
  12         21  
  12         826  
9 12     12   4563 use POSIX qw( strftime );
  12         64251  
  12         95  
10 12     12   22356 use Moo;
  12         89768  
  12         75  
11             with "Archive::BagIt::Role::Portability";
12              
13             our $VERSION = '0.101'; # VERSION
14              
15             # ABSTRACT: The main module to handle bags.
16              
17              
18              
19             around 'BUILDARGS' , sub {
20             my $orig = shift;
21             my $class = shift;
22             if (@_ == 1 && !ref $_[0]) {
23             return $class->$orig(bag_path=>$_[0]);
24             } else {
25             return $class->$orig(@_);
26             }
27             };
28              
29              
30             sub BUILD {
31 84     84 0 5226 my ($self, $args) = @_;
32 84 100       560 if (!defined $self->use_plugins()) {
33 83         359 return $self->load_plugins(("Archive::BagIt::Plugin::Manifest::MD5", "Archive::BagIt::Plugin::Manifest::SHA512"));
34             }
35 1         5 return $self->load_plugins($self->use_plugins());
36             }
37              
38             ###############################################
39              
40              
41             has 'force_utf8' => (
42             is => 'rw',
43             lazy => 1,
44             predicate => 'has_force_utf8',
45             );
46              
47             ###############################################
48              
49              
50             has 'bag_path' => (
51             is => 'rw',
52             );
53              
54             # if the user wants specific plugins!
55             has 'use_plugins' => (
56             is => 'rw',
57             lazy => 1,
58             );
59              
60             ###############################################
61              
62             has 'bag_path_arr' => (
63             is => 'ro',
64             lazy => 1,
65             builder => '_build_bag_path_arr',
66             );
67              
68             ###############################################
69              
70              
71             has 'metadata_path' => (
72             is=> 'ro',
73             lazy => 1,
74             builder => '_build_metadata_path',
75             );
76              
77             sub _build_metadata_path {
78 67     67   810 my ($self) = @_;
79 67         716 return $self->bag_path;
80             }
81              
82             ###############################################
83              
84             has 'metadata_path_arr' => (
85             is =>'ro',
86             lazy => 1,
87             builder => '_build_metadata_path_arr',
88             );
89              
90              
91             ###############################################
92              
93             has 'rel_metadata_path' => (
94             is => 'ro',
95             lazy => 1,
96             builder => '_build_rel_metadata_path',
97             );
98              
99             ###############################################
100              
101              
102             has 'payload_path' => (
103             is => 'ro',
104             lazy => 1,
105             builder => '_build_payload_path',
106             );
107              
108             sub _build_payload_path {
109 57     57   632 my ($self) = @_;
110 57         1878 return File::Spec->catdir($self->bag_path, "data");
111             }
112              
113             ###############################################
114              
115             has 'payload_path_arr' => (
116             is => 'ro',
117             lazy => 1,
118             builder => '_build_payload_path_arr',
119             );
120              
121             ###############################################
122              
123             has 'rel_payload_path' => (
124             is => 'ro',
125             lazy => 1,
126             builder => '_build_rel_payload_path',
127             );
128              
129             ###############################################
130              
131              
132             has 'checksum_algos' => (
133             is => 'ro',
134             lazy => 1,
135             builder => '_build_checksum_algos',
136             );
137              
138             ###############################################
139              
140              
141             has 'bag_version' => (
142             is => 'ro',
143             lazy => 1,
144             builder => '_build_bag_version',
145             );
146              
147             ###############################################
148              
149              
150             has 'bag_encoding' => (
151             is => 'ro',
152             lazy => 1,
153             builder => '_build_bag_encoding',
154             );
155              
156             ###############################################
157              
158              
159             has 'bag_info' => (
160             is => 'rw',
161             lazy => 1,
162             builder => '_build_bag_info',
163             predicate => 'has_bag_info'
164             );
165              
166             ###############################################
167              
168              
169             has 'errors' => (
170             is => 'ro',
171             lazy => 1,
172 0     0   0 builder => sub { my $self = shift; return [];},
  0         0  
173             );
174              
175             ###############################################
176              
177              
178              
179             has 'warnings' => (
180             is => 'ro',
181             lazy => 1,
182 0     0   0 builder => sub { my $self = shift; return [];},
  0         0  
183             );
184              
185             ###############################################
186              
187              
188             has 'digest_callback' => (
189             is => 'ro',
190             lazy => 1,
191             builder => sub {
192             my $sub = sub {
193 540     540   1232 my ($digestobj, $filename) = @_;
194 540 50       23131 if (-f $filename) {
195 540 50       21976 open(my $fh, '<:raw', $filename) or confess("Cannot open '$filename', $!");
196 540         1600 binmode($fh);
197 540         2251 my $digest = $digestobj->get_hash_string($fh);
198 540 50       10513 close $fh or confess("could not close file '$filename', $!");
199 540         3715 return $digest;
200             } else {
201 0         0 croak "file $filename is not a real file!";
202             }
203 37     37   661 };
204 37         161 return $sub;
205             }
206             );
207              
208             ###############################################
209              
210              
211             sub get_baginfo_values_by_key {
212 105     105 1 1094 my ($self, $searchkey) = @_;
213 105         3200 my $info = $self->bag_info();
214 105         716 my @values;
215 105 50       296 if (defined $searchkey) {
216 105         274 my $lc_flag = $self->is_baginfo_key_reserved( $searchkey );
217 105         162 foreach my $entry (@{ $info }) {
  105         279  
218 344 50       673 return unless defined $entry;
219 344         409 my ($key, $value) = %{ $entry };
  344         791  
220 344 100       623 if ( __case_aware_compare_for_baginfo( $key, $searchkey, $lc_flag) ) {
221 61         161 push @values, $value;
222             }
223             }
224             }
225 105 100       453 return @values if (scalar(@values) > 0);
226 45         147 return;
227             }
228              
229             ###############################################
230              
231              
232             sub is_baginfo_key_reserved_as_uniq {
233 257     257 1 465 my ($self, $searchkey) = @_;
234             # my $rx = qr{Bag-Count|Bag-Group-Identifier|Bag-Size|Bagging-Date|Payload-Oxum};
235 257         1542 return $searchkey =~ m/^(?:Bag(?:-(?:Group-Identifier|Count|Size)|ging-Date)|Payload-Oxum)$/i;
236             }
237              
238             ###############################################
239              
240              
241             sub is_baginfo_key_reserved {
242 363     363 1 694 my ($self, $searchkey) = @_;
243             # my $rx = qr/
244             # Bag-Count|
245             # Bag-Group-Identifier|
246             # Bag-Size|
247             # Bagging-Date|
248             # Contact-Email|
249             # Contact-Name|
250             # Contact-Phone|
251             # External-Description|
252             # External-Identifier|
253             # Internal-Sender-Description|
254             # Internal-Sender-Identifier|
255             # Organisation-Adress|
256             # Payload-Oxum|
257             # Source-Organization
258             # /;
259 363         1810 return $searchkey =~ m/^(?:Bag(?:-(?:Group-Identifier|Count|Size)|ging-Date)|Internal-Sender-(?:Description|Identifier)|External-(?:Description|Identifier)|Contact-(?:(?:Phon|Nam)e|Email)|Source-Organization|Organisation-Adress|Payload-Oxum)$/ix;
260             }
261              
262             ###############################################
263              
264             sub __case_aware_compare_for_baginfo {
265 469     469   781 my ($internal_key, $search_key, $lc_flag) = @_;
266 469   33     3112 return (defined $internal_key) && (
267             ( $lc_flag && ((lc $internal_key) eq (lc $search_key)) ) # for reserved keys use caseinsensitive search
268             ||
269             ( (!$lc_flag) && ($internal_key eq $search_key) ) # for other keys sensitive search
270             )
271             }
272              
273             ###############################################
274              
275             sub _find_baginfo_idx {
276 61     61   116 my ($self, $searchkey) = @_;
277 61 50       147 if (defined $searchkey) {
278 61 100       151 if (-1 < index($searchkey, ":")) {croak "key should not contain a colon! (searchkey='$searchkey')";}
  1         9  
279 60         1671 my $info = $self->bag_info();
280 60         412 my $size = scalar(@{$info});
  60         96  
281 60         140 my $lc_flag = $self->is_baginfo_key_reserved($searchkey);
282 60         197 foreach my $idx (reverse 0.. $size-1) { # for multiple entries return the latest addition
283 125         148 my %entry = %{$info->[$idx]};
  125         383  
284 125         299 my ($key, $value) = %entry;
285 125 100       203 if (__case_aware_compare_for_baginfo($key, $searchkey, $lc_flag)) {
286 16         49 return $idx;
287             }
288             }
289             }
290 44         117 return;
291             }
292              
293             ###############################################
294              
295             sub _collect_errors {
296 5     5   17 my ($self, $res) = @_;
297 5         10 push @{$self->{errors}}, $res;
  5         17  
298 5         22 return;
299             }
300              
301             ###############################################
302              
303             sub _if_error_push {
304 396     396   667 my ($self, $res) = @_;
305 396 50       778 if ($res ne "") {
306 0         0 return $self->_collect_errors($res);
307             }
308 396         1558 return 1;
309             }
310              
311             ###############################################
312              
313             sub _check_baginfo_keys_generically {
314 47     47   127 my ($self, $info) = @_;
315 47         84 my %keys;
316 47         78 my $ret=1;
317 47         73 foreach my $entry (@{$info}) {
  47         123  
318 198         270 my ($key, $value) = %{$entry};
  198         624  
319 198         401 my $res = _check_key($key); # check key
320 198   33     790 $ret &&= $self->_if_error_push($res);
321 198         393 $res = _check_value($value); # check value
322 198   33     599 $ret &&= $self->_if_error_push($res);
323             # code to prepare check of uniqueness
324 198 100       486 if ($self->is_baginfo_key_reserved($key)) {
325 140         584 $keys{ lc $key }++;
326             } else {
327 58         212 $keys{ $key }++
328             }
329             }
330             # check for uniqueness
331 47         207 foreach my $key (keys %keys) {
332 197 50 66     400 if (
333             ($self->is_baginfo_key_reserved_as_uniq($key))
334             and ($keys{$key} > 1)
335             ) {
336 0   0     0 $ret &&=$self->_collect_errors("Baginfo key '$key' exists $keys{$key}, but should be uniq!");
337             }
338             }
339 47         256 return $ret;
340             }
341              
342             ###############################################
343              
344             sub _verify_baginfo {
345 50     50   114 my ($self, $info) = @_;
346 50         90 my $ret = 1;
347              
348 50 100       159 if (!defined $info) {
349 3 50       17 if (exists $self->{bag_info_file}) {
350 0   0     0 $ret &&= $self->_collect_errors("'bag-info.txt' exists, but is not (partially) parseable!");
351             }
352             } else {
353 47   33     256 $ret &&= $self->_check_baginfo_keys_generically($info);
354             # check for payload oxum
355 47         139 my ($loaded_payloadoxum) = $self->get_baginfo_values_by_key('Payload-Oxum');
356 47 100       141 if (!defined $loaded_payloadoxum) {
357 3         6 push @{$self->{warnings}}, "Payload-Oxum was expected in bag-info.txt, but not found!"; # payload-oxum is recommended, but optional
  3         12  
358             } else {
359 44         166 my ($octets, $streamcount) = $self->calc_payload_oxum();
360 44 100       248 if ("$octets.$streamcount" ne $loaded_payloadoxum) {
361 5   33     79 $ret &&= $self->_collect_errors( "Payload-Oxum differs, calculated $octets.$streamcount but $loaded_payloadoxum was expected by bag-info.txt");
362             }
363             }
364             }
365 50         150 return $ret;
366             }
367              
368             ###############################################
369              
370             sub verify_baginfo {
371 58     58 1 3503 my ($self) = @_;
372 58         1844 my $info = $self->bag_info();
373 58 100   8   575 if (List::Util::any { /the baginfo file .* could not be parsed correctly/ } @{$self->{'errors'}}) {
  8         101  
  58         482  
374 8         42 return;
375             }
376 50         321 return $self->_verify_baginfo($info);
377             }
378              
379             ###############################################
380              
381              
382             sub delete_baginfo_by_key {
383 3     3 1 1280 my ($self, $searchkey) = @_;
384 3         7 my $idx = $self->_find_baginfo_idx($searchkey);
385 3 100       7 if (defined $idx) {
386 2         3 splice @{$self->{bag_info}}, $idx, 1; # delete nth indexed entry
  2         5  
387             }
388 3         12 return 1;
389             }
390              
391             ###############################################
392              
393              
394             sub exists_baginfo_key {
395 2     2 1 4 my ($self, $searchkey) =@_;
396 2         6 return (defined $self->_find_baginfo_idx($searchkey));
397             }
398              
399             ###############################################
400              
401             sub _replace_baginfo_by_first_match {
402 56     56   805 my ($self, $searchkey, $newvalue) = @_;
403 56         143 my $idx = $self->_find_baginfo_idx( $searchkey);
404 55 100       130 if (defined $idx) {
405 13         41 $self->{bag_info}[$idx] = {$searchkey => $newvalue};
406 13         26 return $idx;
407             }
408 42         97 return;
409             }
410              
411             ###############################################
412              
413             sub _check_key {
414 397     397   669 my ($key) = @_;
415 397 100       899 if (!defined $key) {
416 1         8 return "key should match '[^\\r\\n:]+', but is not defined";
417             }
418 396 100       1163 if ($key =~ m/[\r\n]/s) {
419 1         15 return "key should match '[^\\r\\n:]+', but contains newlines (key='$key')";
420             }
421 395 100       1003 if ($key =~ m/:/) {
422 2         11 return "key should not contain a colon! (key='$key')";
423             }
424 393 100       981 if ($key =~ m/^$/) {
425 1         11 return "key should have a length > null (key='')";
426             }
427 392         760 return "";
428             }
429              
430             ###############################################
431              
432             sub _check_key_or_croak {
433 195     195   337 my ($key) = @_;
434 195         2168 my $res = _check_key($key);
435 195 100       455 if ($res eq "") { return 1;}
  194         546  
436 1         15 croak $res;
437             }
438              
439             ###############################################
440              
441             sub _check_value {
442 264     264   494 my ($value) = @_;
443 264 100       572 if (!defined $value) { return "value should match '[^\\r\\n:]+', but is not defined"; }
  1         6  
444 263 100       590 if ($value =~ m/^$/s) { return "value should have a length > null (value='')"; }
  1         6  
445 262         440 return "";
446             }
447              
448             ###############################################
449              
450             sub _check_value_or_croak {
451 64     64   105 my ($value) = @_;
452 64         105 my $res = _check_value($value);
453 64 50       171 if ($res eq "") { return 1;}
  64         131  
454 0         0 croak $res;
455             }
456              
457             ###############################################
458              
459             sub append_baginfo_by_key {
460 60     60 1 537 my ($self, $searchkey, $newvalue) = @_;
461 60 50 33     116 if (
462             _check_key_or_croak($searchkey)
463             and (defined $newvalue)
464             ) {
465 60 50       146 if (-1 < index($searchkey, ":")) {croak "key should not contain a colon! (searchkey='$searchkey')";}
  0         0  
466 60 100       185 if ($self->is_baginfo_key_reserved_as_uniq($searchkey)) {
467 44 100       141 if (defined $self->get_baginfo_values_by_key($searchkey)) {
468             # hmm, search key is marked as uniq and still exists
469 5         12 return;
470             }
471             }
472 55         86 push @{$self->{bag_info}}, { $searchkey => $newvalue };
  55         262  
473             }
474 55         118 return 1;
475             }
476              
477             ###############################################
478              
479              
480             sub add_or_replace_baginfo_by_key {
481 71     71 1 621 my ($self, $searchkey, $newvalue) = @_;
482 71 50       179 if ( _check_key_or_croak($searchkey) ) {
483 70 50       182 if (-1 < index($searchkey, ":")) { croak "key should not contain a colon! (searchkey='$searchkey')"; }
  0         0  
484 70 100       161 if (defined $self->{bag_info}) {
485 53         190 my $idx = $self->_replace_baginfo_by_first_match( $searchkey, $newvalue);
486 53 100       119 if (defined $idx) { return $idx;}
  12         19  
487             }
488 58         190 $self->append_baginfo_by_key( $searchkey, $newvalue );
489 58         239 return -1;
490             }
491             }
492              
493             ###############################################
494              
495              
496             has 'forced_fixity_algorithm' => (
497             is => 'ro',
498             lazy => 1,
499             builder => '_build_forced_fixity_algorithm',
500             );
501              
502             sub has_forced_fixity_algorithm {
503 44     44 0 112 my $self = shift;
504 44         1365 return (defined $self->forced_fixity_algorithm() );
505             } # false if use_plugins used
506              
507             ###############################################
508              
509              
510             has 'manifest_files' => (
511             is => 'ro',
512             lazy => 1,
513             builder => '_build_manifest_files',
514             );
515              
516             ###############################################
517              
518              
519             has 'tagmanifest_files' => (
520             is => 'ro',
521             lazy => 1,
522             builder => '_build_tagmanifest_files',
523             );
524              
525             ###############################################
526              
527              
528             has 'payload_files' => ( # relatively to bagit base
529             is => 'ro',
530             lazy => 1,
531             builder => '_build_payload_files',
532             );
533              
534             ###############################################
535              
536              
537             has 'non_payload_files' => (
538             is=>'ro',
539             lazy => 1,
540             builder => '_build_non_payload_files',
541             );
542              
543             ###############################################
544              
545              
546             has 'plugins' => (
547             is=>'rw',
548             #isa=>'HashRef',
549             );
550              
551             ###############################################
552              
553              
554              
555             has 'manifests' => (
556             is => 'rw',
557             lazy => 1,
558             builder => '_build_manifests'
559             #isa=>'HashRef',
560             );
561              
562             ###############################################
563              
564              
565              
566             has 'algos' => (
567             is=>'rw',
568             #isa=>'HashRef',
569             );
570              
571             ###############################################
572              
573             sub _build_bag_path_arr {
574 0     0   0 my ($self) = @_;
575 0         0 my @split_path = File::Spec->splitdir($self->bag_path);
576 0         0 return @split_path;
577             }
578              
579             sub _build_payload_path_arr {
580 0     0   0 my ($self) = @_;
581 0         0 my @split_path = File::Spec->splitdir($self->payload_path);
582 0         0 return @split_path;
583             }
584              
585             sub _build_rel_payload_path {
586 0     0   0 my ($self) = @_;
587 0         0 my $rel_path = File::Spec->abs2rel( $self->payload_path, $self->bag_path ) ;
588 0         0 return $rel_path;
589             }
590              
591             sub _build_metadata_path_arr {
592 0     0   0 my ($self) = @_;
593 0         0 my @split_path = File::Spec->splitdir($self->metadata_path);
594 0         0 return @split_path;
595             }
596              
597             sub _build_rel_metadata_path {
598 0     0   0 my ($self) = @_;
599 0         0 my $rel_path = File::Spec->abs2rel( $self->metadata_path, $self->bag_path ) ;
600 0         0 return $rel_path;
601             }
602              
603             sub _build_checksum_algos {
604 19     19   1292 my($self) = @_;
605 19         36 my @checksums = keys %{ $self->manifests() };
  19         620  
606 19         248 return \@checksums;
607             }
608              
609             sub _build_manifest_files {
610 19     19   12195 my($self) = @_;
611 19         34 my @manifest_files;
612 19         27 foreach my $algo (@{$self->checksum_algos}) {
  19         474  
613 38         980 my $manifest_file = File::Spec->catfile($self->metadata_path, "manifest-$algo.txt");
614 38 100       1131 if (-f $manifest_file) {
615 35         102 push @manifest_files, $manifest_file;
616             }
617             }
618 19         97 return \@manifest_files;
619             }
620              
621             sub _build_tagmanifest_files {
622 19     19   4280 my ($self) = @_;
623 19         27 my @tagmanifest_files;
624 19         31 foreach my $algo (@{$self->checksum_algos}) {
  19         407  
625 38         947 my $tagmanifest_file = File::Spec->catfile($self->metadata_path,"tagmanifest-$algo.txt");
626 38 100       1448 if (-f $tagmanifest_file) {
627 35         124 push @tagmanifest_files, $tagmanifest_file;
628             }
629             }
630 19         104 return \@tagmanifest_files;
631             }
632              
633             sub __handle_nonportable_local_entry {
634 360     360   540 my $self = shift;
635 360         591 my $local_entry = shift;
636 360         504 my $dir = shift;
637 360 100       1575 if ($local_entry !~ m/^[a-zA-Z0-9._-]+$/) {
638 4         34 my $local_entry_utf8 = decode("UTF-8", $local_entry);
639 4 100       254 if ((!$self->has_force_utf8)) {
640 2         10 my $hexdump = "0x" . unpack('H*', $local_entry);
641 2         10 $local_entry =~m/[^a-zA-Z0-9._-]/; # to find PREMATCH, needed nextline
642             ## no critic (Variables::ProhibitMatchVars)
643             # the slowdown using prematch is accepatable, because only triggered in failure path
644 2         6 my $prematch_position = $`;
645 2         457 carp "possible non portable pathname detected in $dir,\n",
646             "got path (hexdump)='$hexdump'(hex),\n",
647             "decoded path='$local_entry_utf8'\n",
648             " "." "x length($prematch_position)."^"."------- first non portable char\n";
649             }
650 4         24 $local_entry = $local_entry_utf8;
651             }
652 360         2534 return $local_entry;
653             }
654              
655              
656             # own implementation, because File::Find has problems with UTF8 encoded Paths under MSWin32
657             # finds recursively all files in given directory.
658             # if $excludedir is defined, the content will be excluded
659             sub __file_find { ## no critic (CognitiveComplexity::ProhibitExcessCognitiveComplexity)
660 90     90   2960 my ($self,$dir, $excludedir) = @_;
661 90 100       294 if (defined $excludedir) {
662 88         693 $excludedir = File::Spec->rel2abs( $excludedir);
663             }
664 90         197 my @file_paths;
665             my $finder;
666             $finder = sub {
667 108     108   224 my ($current_dir) = @_; #absolute path
668 108         170 my @todo;
669             my @tmp_file_paths;
670 108         6337 opendir( my $dh, $current_dir);
671 108         9567 my @paths = File::Spec->no_upwards ( readdir $dh );
672 108         1742 closedir $dh;
673 108         357 foreach my $local_entry (@paths) {
674 360         1009 my $path_entry = File::Spec->catdir($current_dir, $self->__handle_nonportable_local_entry($local_entry, $dir));
675 360 100 100     7083 if ((defined $excludedir) && ($path_entry eq $excludedir)) {
    100          
    50          
676             # ignore it, because excluded
677             } elsif (-f $path_entry) {
678 308         1011 push @tmp_file_paths, $path_entry;
679             } elsif (-d $path_entry) {
680 18         67 push @todo, $path_entry;
681             } else {
682 0         0 croak "not a file nor a dir found '$path_entry'";
683             }
684             }
685 108         504 push @file_paths, sort @tmp_file_paths;
686 108         717 foreach my $subdir (sort @todo) {
687 18         133 &$finder($subdir);
688             }
689 90         774 };
690 90         1009 my $absolute = File::Spec->rel2abs( $dir );
691 90         295 &$finder($absolute);
692 90         207 @file_paths = map { File::Spec->abs2rel( $_, $dir)} @file_paths;
  308         18575  
693 90         417 return @file_paths;
694             }
695              
696             sub _build_payload_files{
697 54     54   843 my ($self) = @_;
698 54         1339 my $payload_dir = $self->payload_path;
699 54         5250 my $reldir = File::Spec->abs2rel($payload_dir, $self->bag_path());
700 54         252 $reldir =~ s/^\.$//;
701             my @payload = map {
702 54 50       1589 $reldir eq "" ? $_ : File::Spec->catfile($reldir, $_)
  121         1016  
703             } $self->__file_find($payload_dir, File::Spec->rel2abs($self->metadata_path));
704 54 50       704 return wantarray ? @payload : \@payload;
705             }
706              
707              
708             sub __build_read_bagit_txt {
709 109     109   244 my($self) = @_;
710 109         3888 my $bagit = $self->metadata_path;
711 109         1978 my $file = File::Spec->catfile($bagit, "bagit.txt");
712 109 100       6409 open(my $BAGIT, "<:encoding(UTF-8)", $file) or croak("Cannot read '$file': $!");
713 108         12080 my $version_string = <$BAGIT>;
714 108         1520 my $encoding_string = <$BAGIT>;
715 108         1600 close($BAGIT);
716 108 50       373 if (defined $version_string) {
717 108         783 $version_string =~ s/[\r\n]//;
718             }
719 108 100       320 if (defined $encoding_string) {
720 106         371 $encoding_string =~s/[\r\n]//;
721             }
722 108         860 return ($version_string, $encoding_string, $file);
723             }
724              
725             sub _build_bag_version {
726 57     57   3501 my($self) = @_;
727 57         206 my ($version_string, $encoding_string, $file) = $self->__build_read_bagit_txt();
728 56 50       241 croak "Version line missed in '$file" unless defined $version_string;
729 56 100       463 if ($version_string =~ /^BagIt-Version: ([01]\.[0-9]+)$/) {
730 53         404 return $1;
731             } else {
732 3         10 $version_string =~ s/\r//;
733 3         11 $version_string =~ s/^\N{U+FEFF}//;
734 3         104 croak "Version string '$version_string' of '$file' is incorrect";
735             };
736             }
737              
738             sub _build_bag_encoding {
739 52     52   719 my($self) = @_;
740 52         135 my ($version_string, $encoding_string, $file) = $self->__build_read_bagit_txt();
741 52 100       190 croak "Encoding line missed in '$file" unless defined $encoding_string;
742 51 50       160 croak "Encoding '$encoding_string' of '$file' not supported by current Archive::BagIt module!" unless ($encoding_string ne "UTF-8");
743 51         261 return $encoding_string;
744             }
745              
746             sub __sort_bag_info {
747             my @sorted = sort {
748 1     1   25 my %tmpa = %{$a};
  11         14  
  11         23  
749 11         17 my %tmpb = %{$b};
  11         23  
750 11         23 my ($ka, $va) = each %tmpa;
751 11         19 my ($kb, $vb) = each %tmpb;
752 11         19 my $kres = $ka cmp $kb;
753 11 100       19 if ($kres != 0) {
754 10         26 return $kres;
755             } else {
756 1         4 return $va cmp $vb;
757             }
758             } @_;
759 1         4 return @sorted;
760             }
761              
762              
763             sub _extract_key_from_textblob {
764 932     932   13328 my ($self, $textblob) = @_;
765 932 100       1831 if (!defined $textblob) {
766 1         2 push @{$self->{errors}}, "the baginfo file '" . $self->{bag_info_file} . "' could not be parsed correctly, because textblob for key extraction is undefined";
  1         6  
767 1         6 return (undef, undef);
768             }
769 931         1002 my $key;
770 931         2875 my $rx_word = qr{[^: \t\r\n]+};# Hint: this word definition for bag-info.txt-keys differs from word definition of bag-info.txt-values!
771 931         1797 my $rx_spc = qr{\s}; #qr{[\t ]};
772 931 100       10921 if ($textblob =~ s/\A($rx_word)$rx_spc*:[\t ]*//m) {
773             # label if starts with chars not colon or whitespace followed by zero or more spaces, a colon, zero or more spaces
774 927 100       1901 if ($textblob eq "") {
775 1         2 push @{$self->{errors}}, "the baginfo file '" . $self->{bag_info_file} . "' could not be parsed correctly, because following text blob not fullfill the match requirements for values: '$textblob', empty value detected";
  1         7  
776 1         8 return ($1, undef);
777             }
778 926         2144 $key = $1;
779             } else {
780 4         10 push @{$self->{errors}}, "the baginfo file '".$self->{bag_info_file}."' could not be parsed correctly, because following text blob not fullfill the match requirements for keys: '$textblob'";
  4         28  
781             }
782 930         3665 return ($key, $textblob);
783             }
784              
785             sub _extract_value_from_textblob {
786 926     926   7074 my ($self, $textblob) = @_;
787 926 100       1543 if (!defined $textblob) {
788 1         2 push @{$self->{errors}}, "the baginfo file '" . $self->{bag_info_file} . "' could not be parsed correctly, because textblob for value extraction is undefined";
  1         5  
789 1         4 return (undef, undef);
790             }
791 925 100       1491 if ($textblob eq "") {
792 1         2 push @{$self->{errors}}, "the baginfo file '" . $self->{bag_info_file} . "' could not be parsed correctly, because textblob for value extraction is empty";
  1         4  
793 1         4 return (undef, "");
794             }
795 924         985 my $value;
796             # Ex1:
797             # |bar
798             # | baz --> one value: bar\n baz\n tss
799             # | tss
800             # Ex2:
801             # |bar --> one value: bar
802             # |baz
803             # Ex3:
804             # |bar:baz -> one value: bar:baz
805 924         1759 my $rx_word = qr{[^ \t\r\n]+}; # Hint: this word definition for bag-info.txt-values differs from word definition of bag-info.txt-keys!
806 924         1537 my $rx_spc = qr{[\t ]+};
807 924         4622 my $rx_word_spc_word = qr{($rx_word($rx_spc$rx_word)*)};
808 924 100       19789 if ($textblob =~ s/\A($rx_word_spc_word([\r\n]$rx_spc$rx_word_spc_word)+)[\r\n]*//ms) {
    100          
809 6         38 $value = $1;
810             } elsif ($textblob =~ s/\A($rx_word_spc_word)[\r\n]*//s) {
811 913         1778 $value = $1;
812             } else {
813 5         12 push @{$self->{errors}}, "the baginfo file '".$self->{bag_info_file}."' could not be parsed correctly, because following text blob not fullfill the match requirements for values: '$textblob'";
  5         44  
814             }
815 924         3108 return ($value, $textblob);
816             }
817              
818             # parses a bag-info textblob
819             sub _parse_bag_info { ## no critic (CognitiveComplexity::ProhibitExcessCognitiveComplexity)
820 764     764   595561 my ($self, $textblob) = @_;
821             # metadata elements are OPTIONAL and MAY be repeated. Because "bag-
822             # info.txt" is intended for human reading and editing, ordering MAY be
823             # significant and the ordering of metadata elements MUST be preserved.
824             #
825             # A metadata element MUST consist of a label, a colon ":", a single
826             # linear whitespace character (space or tab), and a value that is
827             # terminated with an LF, a CR, or a CRLF.
828             #
829             # The label MUST NOT contain a colon (:), LF, or CR. The label MAY
830             # contain linear whitespace characters but MUST NOT start or end with
831             # whitespace.
832             #
833             # It is RECOMMENDED that lines not exceed 79 characters in length.
834             # Long values MAY be continued onto the next line by inserting a LF,
835             # CR, or CRLF, and then indenting the next line with one or more linear
836             # white space characters (spaces or tabs). Except for linebreaks, such
837             # padding does not form part of the value.
838             #
839             # Implementations wishing to support previous BagIt versions MUST
840             # accept multiple linear whitespace characters before and after the
841             # colon when the bag version is earlier than 1.0; such whitespace does
842             # not form part of the label or value.
843             # find all labels
844 764         1075 my @labels;
845 764 100       1851 croak "_parse_baginfo() called with undef value!" unless (defined $textblob);
846 763         883 while (1) {
847 1675 100       5532 last if ($textblob eq "");
848 920         1273 my ($key, $value);
849 920         2090 ($key, $textblob) = $self->_extract_key_from_textblob($textblob);
850 920 100       1920 last unless (defined $key);
851 917 50       1561 last unless (defined $textblob );
852 917         2266 ($value, $textblob) = $self->_extract_value_from_textblob($textblob);
853 917 100       1993 last unless (defined $value);
854 912 50       1616 if (defined $key) {
855 912         2603 my $entry = { $key => $value };
856 912         1923 push @labels, $entry;
857             }
858             }
859             # The RFC does not allow reordering:
860             #my @sorted = __sort_bag_info(@labels);
861             #return \@sorted;
862 763         2412 return \@labels;
863             }
864              
865             sub _build_bag_info {
866 59     59   3461 my ($self) = @_;
867 59         1693 my $bagit = $self->metadata_path;
868 59         2241 my $file = File::Spec->catfile($bagit, "bag-info.txt");
869 59 100       2113 if (-e $file) {
870 44 50       1847 open(my $BAGINFO, "<:encoding(UTF-8)", $file) or croak("Cannot read $file: $!");
871 44         3818 my @lines;
872 44         1248 while ( my $line = <$BAGINFO>) {
873 202         1362 push @lines, $line;
874             }
875 44         643 close($BAGINFO);
876 44         206 my $lines = join("", @lines);
877 44         242 $self->{bag_info_file}=$file;
878 44         176 return $self->_parse_bag_info($lines);
879             }
880             # bag-info.txt is optional
881 15         102 return;
882             }
883              
884             sub _build_non_payload_files {
885 33     33   844 my ($self) = @_;
886 33         740 my $non_payload_dir = $self->metadata_path();
887 33         2595 my $reldir = File::Spec->abs2rel($non_payload_dir, $self->bag_path());
888 33         189 $reldir =~ s/^\.$//;
889             my @non_payload = map {
890 33 50       971 $reldir eq "" ? $_ : File::Spec->catfile($reldir, $_)
  161         398  
891             } $self->__file_find($non_payload_dir, File::Spec->rel2abs($self->payload_path));
892 33 50       388 return wantarray ? @non_payload : \@non_payload;
893             }
894              
895             sub _build_forced_fixity_algorithm {
896 44     44   530 my ($self) = @_;
897 44 50       191 if ($self->use_plugins()) {
898 0         0 return;
899             } else {
900 44 100       974 if ($self->bag_version() >= 1.0) {
901 28         1133 return Archive::BagIt::Plugin::Algorithm::SHA512->new(bagit => $self);
902             }
903             else {
904 16         721 return Archive::BagIt::Plugin::Algorithm::MD5->new(bagit => $self);
905             }
906             }
907             }
908              
909             ###############################################
910              
911              
912             sub load_plugins {
913 251     251 1 825 my ($self, @plugins) = @_;
914 251         766 my $loaded_plugins = $self->plugins;
915 251 100       799 if (defined $loaded_plugins) {
916 83         174 @plugins = grep {not exists $loaded_plugins->{$_}} @plugins;
  83         398  
917             }
918 251 50       631 return if @plugins == 0;
919 251 50       792 if (exists $ENV{TEST_ACTIVE}) {
920 12     12   115255 use Cwd;
  12         28  
  12         30581  
921 251         3215 my $dir = getcwd();
922 251         2548 my @dirs = File::Spec->splitdir($dir);
923 251 100       901 if ($dirs[-1] eq 't') {
924 245         861 push @INC, "../lib";
925             }
926             }
927 251         567 foreach my $plugin (@plugins) {
928 334 50       1863 load_class ($plugin) or croak ("Can't load $plugin");
929 334         45794 $plugin->new({bagit => $self});
930             }
931              
932 251         1683 return 1;
933             }
934              
935             ###############################################
936              
937              
938             sub load {
939 1     1 1 2115 my ($self) = @_;
940             # call trigger
941 1         5 $self->bag_path;
942 1         38 $self->bag_version;
943 1         37 $self->bag_encoding;
944 1         35 $self->bag_info;
945 1         31 $self->payload_path;
946 1         30 $self->manifest_files;
947 1         29 $self->checksum_algos;
948 1         30 $self->tagmanifest_files;
949 1         54 return 1;
950             }
951              
952             ###############################################
953              
954              
955             sub verify_bag {
956 56     56 1 38072 my ($self,$opts) = @_;
957             #removed the ability to pass in a bag in the parameters, but might want options
958             #like $return all errors rather than dying on first one
959 56         217 my $bagit = $self->bag_path;
960 56         2218 my $version = $self->bag_version(); # to call trigger
961 52         1770 my $encoding = $self->bag_encoding(); # to call trigger
962 51         247 my $baginfo = $self->verify_baginfo(); #to call trigger
963              
964 51         1900 my $fetch_file = File::Spec->catfile($self->metadata_path, "fetch.txt");
965 51         2227 my $payload_dir = $self->payload_path;
966 51         453 my $return_all_errors = $opts->{return_all_errors};
967              
968 51 100       3425 if (-f $fetch_file) {
969 7         135 croak("Fetching via file '$fetch_file' is not supported by current Archive::BagIt implementation")
970             }
971             # check forced fixity
972 44 50       217 if ($self->has_forced_fixity_algorithm()) {
973 44         2450 my $forced_fixity_alg = $self->forced_fixity_algorithm()->name();
974 44         1246 my $manifest_file = File::Spec->catfile($self->metadata_path, "manifest-$forced_fixity_alg.txt");
975 44 100       2073 croak("Manifest '$manifest_file' is not a regular file or does not exist for given bagit version '$version'") unless -f ($manifest_file);
976             }
977 43 50       533 croak("Payload-directory '$payload_dir' is not a directory or does not exist") unless -d ($payload_dir);
978              
979 43 50       287 unless ($version > .95) {
980 0         0 croak ("Bag Version $version is unsupported");
981             }
982              
983 43         79 my @errors;
984              
985              
986             # check for manifests
987 43         68 foreach my $algorithm ( keys %{ $self->manifests }) {
  43         1215  
988 78         2367 my $res = $self->manifests->{$algorithm}->verify_manifest($self->payload_files, $return_all_errors);
989 63 50 66     406 if ((defined $res) && ($res ne "1")) { push @errors, $res; }
  0         0  
990             }
991             #check for tagmanifests
992 28         67 foreach my $algorithm ( keys %{ $self->manifests }) {
  28         819  
993 53         1471 my $res = $self->manifests->{$algorithm}->verify_tagmanifest($self->non_payload_files, $return_all_errors);
994 49 50 66     306 if ((defined $res) && ($res ne "1")) { push @errors, $res; }
  0         0  
995             }
996 24         57 push @{$self->{errors}}, @errors;
  24         81  
997 24         762 my $err = $self->errors();
998 24         236 my @err = @{ $err };
  24         63  
999 24 100       180 if (scalar( @err ) > 0) {
1000 8         312 croak join("\n","bag verify for bagit version '$version' failed with invalid files.", @err);
1001             }
1002 16         131 return 1;
1003             }
1004              
1005              
1006             sub calc_payload_oxum {
1007 78     78 1 148 my($self) = @_;
1008 78         132 my @payload = @{$self->payload_files};
  78         2283  
1009 78         529 my $octets=0;
1010 78         134 my $streamcount = scalar @payload;
1011 78         192 foreach my $local_name (@payload) {# local_name is relative to bagit base
1012 189         3251 my $file = File::Spec->catfile($self->bag_path(), $local_name);
1013 189 50       3348 if (-e $file) {
1014 189         358 my $filesize = 0;
1015 189 100       2492 $filesize = -s $file or carp "empty file $file detected";
1016 189         480 $octets += $filesize;
1017 0         0 } else { croak "file $file does not exist, $!"; }
1018             }
1019 78         338 return ($octets, $streamcount);
1020             }
1021              
1022              
1023             sub calc_bagsize {
1024 17     17 1 40 my($self) = @_;
1025 17         49 my ($octets,$streamcount) = $self->calc_payload_oxum();
1026 17 50       59 if ($octets < 1024) { return "$octets B"; }
  17 0       121  
    0          
    0          
1027 0         0 elsif ($octets < 1024*1024) {return sprintf("%0.1f kB", $octets/1024); }
1028 0         0 elsif ($octets < 1024*1024*1024) {return sprintf "%0.1f MB", $octets/(1024*1024); }
1029 0         0 elsif ($octets < 1024*1024*1024*1024) {return sprintf "%0.1f GB", $octets/(1024*1024*1024); }
1030 0         0 else { return sprintf "%0.2f TB", $octets/(1024*1024*1024*1024); }
1031             }
1032              
1033              
1034             sub create_bagit {
1035 17     17 1 52 my($self) = @_;
1036 17         603 my $metadata_path = $self->metadata_path();
1037 17         436 my $bagit_path = File::Spec->catfile( $metadata_path, "bagit.txt");
1038 17 50       3361 open(my $BAGIT, ">:encoding(UTF-8)", $bagit_path) or croak("Can't open $bagit_path for writing: $!");
1039 17         1339 print($BAGIT "BagIt-Version: 1.0\nTag-File-Character-Encoding: UTF-8");
1040 17         1464 close($BAGIT);
1041 17         126 return 1;
1042             }
1043              
1044              
1045             sub create_baginfo {
1046 17     17 1 46 my($self) = @_; # because bag-info.txt allows multiple key-value-entries, hash is replaced
1047 17         786 $self->add_or_replace_baginfo_by_key('Bagging-Date', POSIX::strftime("%Y-%m-%d", gmtime(time)));
1048 17         72 $self->add_or_replace_baginfo_by_key('Bag-Software-Agent', 'Archive::BagIt ');
1049 17         69 my ($octets, $streams) = $self->calc_payload_oxum();
1050 17         98 $self->add_or_replace_baginfo_by_key('Payload-Oxum', "$octets.$streams");
1051 17         71 $self->add_or_replace_baginfo_by_key('Bag-Size', $self->calc_bagsize());
1052             # The RFC does not allow reordering:
1053 17         425 my $metadata_path = $self->metadata_path();
1054 17         324 my $bag_info_path = File::Spec->catfile( $metadata_path, "bag-info.txt");
1055 17 100 66     143 if (
1056             (exists $self->{errors})
1057 1         6 and ((scalar @{$self->{errors}}) > 0)
1058             ) {
1059 1         28 croak "Could not create baginfo, because current file $bag_info_path has parsing errors!";
1060             }
1061 16 50       2557 open(my $BAGINFO, ">:encoding(UTF-8)", $bag_info_path) or croak("Can't open $bag_info_path for writing: $!");
1062 16         1266 foreach my $entry (@{ $self->bag_info() }) {
  16         527  
1063 64         238 my %tmp = %{ $entry };
  64         216  
1064 64         147 my ($key, $value) = %tmp;
1065 64         134 _check_key_or_croak($key);
1066 64         144 _check_value_or_croak($value);
1067 64 50       137 if (-1 < index($key,":")) { carp "key should not contain a colon! (searchkey='$key')"; }
  0         0  
1068 64         262 print($BAGINFO "$key: $value\n");
1069             }
1070 16         1359 close($BAGINFO);
1071 16         145 return 1;
1072             }
1073              
1074              
1075             sub store {
1076 17     17 1 48 my($self) = @_;
1077             # it is important to create all manifest files first, because tagmanifest should include all manifest-xxx.txt
1078 17         54 foreach my $algorithm ( keys %{ $self->manifests }) {
  17         456  
1079 34         1186 $self->manifests->{$algorithm}->create_manifest();
1080             }
1081 17         125 $self->create_bagit();
1082 17         75 $self->create_baginfo();
1083 16         31 foreach my $algorithm ( keys %{ $self->manifests }) {
  16         554  
1084 32         1176 $self->manifests->{$algorithm}->create_tagmanifest();
1085             }
1086             # retrigger builds
1087 16         90 $self->{checksum_algos} = $self->_build_checksum_algos();
1088 16         62 $self->{tagmanifest_files} = $self->_build_tagmanifest_files();
1089 16         56 $self->{manifest_files} = $self->_build_manifest_files();
1090 16         42 return 1;
1091             }
1092              
1093              
1094             sub init_metadata {
1095 17     17 1 53 my ($class, $bag_path, $options) = @_;
1096 17         77 $bag_path =~ s#/$##; # replace trailing slash
1097 17 50       421 unless ( -d $bag_path) { croak ( "source bag directory '$bag_path' doesn't exist"); }
  0         0  
1098 17         649 my $self = $class->new(bag_path=>$bag_path, %$options);
1099 17 100       400 carp "no payload path" if ! -d $self->payload_path;
1100 17 100       530 unless ( -d $self->payload_path) {
1101 2         330 rename ($bag_path, $bag_path.".tmp");
1102 2         360 mkdir ($bag_path);
1103 2         80 rename ($bag_path.".tmp", $self->payload_path);
1104             }
1105 17 50       961 unless ( -d $self->metadata_path) {
1106             #metadata path is not the root path for some reason
1107 0         0 mkdir ($self->metadata_path);
1108             }
1109 17         92 $self->store();
1110 16         102 return $self;
1111             }
1112              
1113              
1114             sub make_bag {
1115 17     17 1 81451 my ($class, $bag_path, $options) = @_;
1116 17         71 my $isa = ref $class;
1117 17 50       129 if ($isa eq "Archive::BagIt") { # not a class, but an object!
1118 0         0 croak "make_bag() only a class subroutine, not useable with objects. Try store() instead!\n";
1119             }
1120 17         108 my $self = $class->init_metadata($bag_path, $options);
1121 16         101 return $self;
1122             }
1123              
1124              
1125              
1126              
1127              
1128             __PACKAGE__->meta->make_immutable;
1129              
1130             1;
1131              
1132             __END__