File Coverage

blib/lib/Archive/BagIt.pm
Criterion Covered Total %
statement 402 440 91.3
branch 113 156 72.4
condition 8 12 66.6
subroutine 52 59 88.1
pod 18 20 90.0
total 593 687 86.3


line stmt bran cond sub pod time code
1             package Archive::BagIt;
2 9     9   312620 use strict;
  9         33  
  9         262  
3 9     9   40 use warnings;
  9         11  
  9         234  
4 9     9   2279 use utf8;
  9         48  
  9         47  
5 9     9   1359 use open ':std', ':encoding(UTF-8)';
  9         3174  
  9         42  
6 9     9   35894 use Encode qw( decode );
  9         22  
  9         577  
7 9     9   46 use File::Spec ();
  9         13  
  9         181  
8 9     9   3557 use Class::Load qw( load_class );
  9         147342  
  9         493  
9 9     9   65 use Carp qw( carp croak confess);
  9         13  
  9         389  
10 9     9   2498 use POSIX qw( strftime );
  9         32412  
  9         68  
11 9     9   11990 use Moo;
  9         59548  
  9         53  
12             with "Archive::BagIt::Role::Portability";
13              
14             our $VERSION = '0.094'; # VERSION
15              
16             # ABSTRACT: The main module to handle bags.
17              
18              
19              
20             around 'BUILDARGS' , sub {
21             my $orig = shift;
22             my $class = shift;
23             if (@_ == 1 && !ref $_[0]) {
24             return $class->$orig(bag_path=>$_[0]);
25             } else {
26             return $class->$orig(@_);
27             }
28             };
29              
30              
31             sub BUILD {
32 84     84 0 4171 my ($self, $args) = @_;
33 84 100       429 if (!defined $self->use_plugins()) {
34 83         353 return $self->load_plugins(("Archive::BagIt::Plugin::Manifest::MD5", "Archive::BagIt::Plugin::Manifest::SHA512"));
35             }
36 1         5 return $self->load_plugins($self->use_plugins());
37             }
38              
39             ###############################################
40              
41              
42             has 'use_parallel' => (
43             is => 'rw',
44             lazy => 1,
45             default => 0,
46             );
47              
48             ###############################################
49              
50              
51             has 'use_async' => (
52             is => 'rw',
53             lazy => 1,
54             default => 0,
55             );
56              
57             ###############################################
58              
59              
60             has 'force_utf8' => (
61             is => 'rw',
62             lazy => 1,
63             predicate => 'has_force_utf8',
64             );
65              
66             ###############################################
67              
68              
69             has 'bag_path' => (
70             is => 'rw',
71             );
72              
73             # if the user wants specific plugins!
74             has 'use_plugins' => (
75             is => 'rw',
76             lazy => 1,
77             );
78              
79             ###############################################
80              
81             has 'bag_path_arr' => (
82             is => 'ro',
83             lazy => 1,
84             builder => '_build_bag_path_arr',
85             );
86              
87             ###############################################
88              
89              
90             has 'metadata_path' => (
91             is=> 'ro',
92             lazy => 1,
93             builder => '_build_metadata_path',
94             );
95              
96             sub _build_metadata_path {
97 68     68   665 my ($self) = @_;
98 68         451 return $self->bag_path;
99             }
100              
101             ###############################################
102              
103             has 'metadata_path_arr' => (
104             is =>'ro',
105             lazy => 1,
106             builder => '_build_metadata_path_arr',
107             );
108              
109              
110             ###############################################
111              
112             has 'rel_metadata_path' => (
113             is => 'ro',
114             lazy => 1,
115             builder => '_build_rel_metadata_path',
116             );
117              
118             ###############################################
119              
120              
121             has 'payload_path' => (
122             is => 'ro',
123             lazy => 1,
124             builder => '_build_payload_path',
125             );
126              
127             sub _build_payload_path {
128 60     60   662 my ($self) = @_;
129 60         1604 return File::Spec->catdir($self->bag_path, "data");
130             }
131              
132             ###############################################
133              
134             has 'payload_path_arr' => (
135             is => 'ro',
136             lazy => 1,
137             builder => '_build_payload_path_arr',
138             );
139              
140             ###############################################
141              
142             has 'rel_payload_path' => (
143             is => 'ro',
144             lazy => 1,
145             builder => '_build_rel_payload_path',
146             );
147              
148             ###############################################
149              
150              
151             has 'checksum_algos' => (
152             is => 'ro',
153             lazy => 1,
154             builder => '_build_checksum_algos',
155             );
156              
157             ###############################################
158              
159              
160             has 'bag_version' => (
161             is => 'ro',
162             lazy => 1,
163             builder => '_build_bag_version',
164             );
165              
166             ###############################################
167              
168              
169             has 'bag_encoding' => (
170             is => 'ro',
171             lazy => 1,
172             builder => '_build_bag_encoding',
173             );
174              
175             ###############################################
176              
177              
178             has 'bag_info' => (
179             is => 'rw',
180             lazy => 1,
181             builder => '_build_bag_info',
182             predicate => 'has_bag_info'
183             );
184              
185             ###############################################
186              
187              
188             has 'errors' => (
189             is => 'ro',
190             lazy => 1,
191 0     0   0 builder => sub { my $self = shift; return [];},
  0         0  
192             );
193              
194             ###############################################
195              
196              
197              
198             has 'warnings' => (
199             is => 'ro',
200             lazy => 1,
201 0     0   0 builder => sub { my $self = shift; return [];},
  0         0  
202             );
203              
204             ###############################################
205              
206              
207             has 'digest_callback' => (
208             is => 'ro',
209             lazy => 1,
210             builder => sub {
211             my $sub = sub {
212 550     550   852 my ($digestobj, $filename) = @_;
213 550 50       7129 if (-f $filename) {
214 550 50       17294 open(my $fh, '<:raw', $filename) or confess("Cannot open '$filename', $!");
215 550         1547 binmode($fh);
216 550         2087 my $digest = $digestobj->get_hash_string($fh);
217 550 50       5892 close $fh or confess("could not close file '$filename', $!");
218 550         2990 return $digest;
219             } else {
220 0         0 croak "file $filename is not a real file!";
221             }
222 36     36   671 };
223 36         125 return $sub;
224             }
225             );
226              
227             ###############################################
228              
229              
230             sub get_baginfo_values_by_key {
231 111     111 1 1111 my ($self, $searchkey) = @_;
232 111         1920 my $info = $self->bag_info();
233 111         524 my @values;
234 111 50       279 if (defined $searchkey) {
235 111         278 my $lc_flag = $self->is_baginfo_key_reserved( $searchkey );
236 111         151 foreach my $entry (@{ $info }) {
  111         301  
237 363 50       568 return unless defined $entry;
238 363         322 my ($key, $value) = %{ $entry };
  363         674  
239 363 100       531 if ( __case_aware_compare_for_baginfo( $key, $searchkey, $lc_flag) ) {
240 64         149 push @values, $value;
241             }
242             }
243             }
244 111 100       423 return @values if (scalar(@values) > 0);
245 48         116 return;
246             }
247              
248             ###############################################
249              
250              
251             sub is_baginfo_key_reserved_as_uniq {
252 278     278 1 448 my ($self, $searchkey) = @_;
253 278         1192 return $searchkey =~ m/^(Bagging-Date)|(Bag-Size)|(Payload-Oxum)|(Bag-Group-Identifier)|(Bag-Count)$/i;
254             }
255              
256             ###############################################
257              
258              
259             sub is_baginfo_key_reserved {
260 390     390 1 563 my ($self, $searchkey) = @_;
261 390         1984 return $searchkey =~ m/^
262             (Source-Organization)|
263             (Organisation-Adress)|
264             (Contact-Name)|
265             (Contact-Phone)|
266             (Contact-Email)|
267             (External-Description)|
268             (Bagging-Date)|
269             (External-Identifier)|
270             (Bag-Size)|
271             (Payload-Oxum)|
272             (Bag-Group-Identifier)|
273             (Bag-Count)|
274             (Internal-Sender-Identifier)|
275             (Internal-Sender-Description)$/ix
276              
277             }
278              
279             ###############################################
280              
281             sub __case_aware_compare_for_baginfo {
282 482     482   710 my ($internal_key, $search_key, $lc_flag) = @_;
283 482   33     2774 return (defined $internal_key) && (
284             ( $lc_flag && ((lc $internal_key) eq (lc $search_key)) ) # for reserved keys use caseinsensitive search
285             ||
286             ( (!$lc_flag) && ($internal_key eq $search_key) ) # for other keys sensitive search
287             )
288             }
289              
290             ###############################################
291              
292             sub _find_baginfo_idx {
293 58     58   77 my ($self, $searchkey) = @_;
294 58 50       112 if (defined $searchkey) {
295 58 100       128 if ($searchkey =~ m/:/) {croak "key should not contain a colon! (searchkey='$searchkey')";}
  1         9  
296 57         1099 my $info = $self->bag_info();
297 57         334 my $size = scalar(@{$info});
  57         76  
298 57         121 my $lc_flag = $self->is_baginfo_key_reserved($searchkey);
299 57         189 foreach my $idx (reverse 0.. $size-1) { # for multiple entries return the latest addition
300 119         118 my %entry = %{$info->[$idx]};
  119         349  
301 119         226 my ($key, $value) = %entry;
302 119 100       167 if (__case_aware_compare_for_baginfo($key, $searchkey, $lc_flag)) {
303 16         49 return $idx;
304             }
305             }
306             }
307 41         82 return;
308             }
309             ###############################################
310              
311              
312             sub verify_baginfo {
313 56     56 1 128 my ($self) = @_;
314 56         90 my %keys;
315 56         1199 my $info = $self->bag_info();
316 56         209 my $ret = 1;
317 56 100       157 if (defined $info) {
318 49         83 foreach my $entry (@{$self->bag_info()}) {
  49         871  
319 222         512 my ($key, $value) = %{$entry};
  222         534  
320 222 100       502 if ($self->is_baginfo_key_reserved($key)) {
321 173         578 $keys{ lc $key }++;
322             }
323             else {
324 49         148 $keys{ $key }++
325             }
326             }
327 49         185 foreach my $key (keys %keys) {
328 222 100       382 if ($self->is_baginfo_key_reserved_as_uniq($key)) {
329 129 50       368 if ($keys{$key} > 1) {
330 0         0 push @{$self->{errors}}, "Baginfo key '$key' exists $keys{$key}, but should be uniq!";
  0         0  
331 0         0 $ret = undef;
332             }
333             }
334             }
335             }
336             # check for payload oxum
337 56         226 my ($loaded_payloadoxum) = $self->get_baginfo_values_by_key('Payload-Oxum');
338 56 100       145 if (defined $loaded_payloadoxum) {
339 47         236 my ($octets, $streamcount) = $self->calc_payload_oxum();
340 47 100       219 if ("$octets.$streamcount" ne $loaded_payloadoxum) {
341 5         15 push @{$self->{errors}}, "Payload-Oxum differs, calculated $octets.$streamcount but $loaded_payloadoxum was expected by bag-info.txt";
  5         39  
342 5         11 $ret = undef;
343             }
344             } else {
345 9         21 push @{$self->{warnings}}, "Payload-Oxum was expected in bag-info.txt, but not found!"; # payload-oxum is recommended, but optional
  9         55  
346             }
347 56         213 return $ret;
348             }
349              
350             ###############################################
351              
352              
353             sub delete_baginfo_by_key {
354 3     3 1 1304 my ($self, $searchkey) = @_;
355 3         7 my $idx = $self->_find_baginfo_idx($searchkey);
356 3 100       7 if (defined $idx) {
357 2         1 splice @{$self->{bag_info}}, $idx, 1; # delete nth indexed entry
  2         4  
358             }
359 3         10 return 1;
360             }
361              
362             ###############################################
363              
364              
365             sub exists_baginfo_key {
366 2     2 1 5 my ($self, $searchkey) =@_;
367 2         6 return (defined $self->_find_baginfo_idx($searchkey));
368             }
369              
370             ###############################################
371              
372             sub _replace_baginfo_by_first_match {
373 53     53   838 my ($self, $searchkey, $newvalue) = @_;
374 53         117 my $idx = $self->_find_baginfo_idx( $searchkey);
375 52 100       96 if (defined $idx) {
376 13         50 $self->{bag_info}[$idx] = {$searchkey => $newvalue};
377 13         22 return $idx;
378             }
379 39         58 return;
380             }
381              
382             ###############################################
383              
384              
385             sub append_baginfo_by_key {
386 56     56 1 557 my ($self, $searchkey, $newvalue) = @_;
387 56 50       103 if (defined $searchkey) {
388 56 50       122 if ($searchkey =~ m/:/) { croak "key should not contain a colon! (searchkey='$searchkey')"; }
  0         0  
389 56 100       122 if ($self->is_baginfo_key_reserved_as_uniq($searchkey)) {
390 41 100       115 if (defined $self->get_baginfo_values_by_key($searchkey)) {
391             # hmm, search key is marked as uniq and still exists
392 5         12 return;
393             }
394             }
395 51         64 push @{$self->{bag_info}}, {$searchkey => $newvalue};
  51         176  
396             }
397 51         74 return 1;
398             }
399              
400             ###############################################
401              
402              
403             sub add_or_replace_baginfo_by_key {
404 67     67 1 688 my ($self, $searchkey, $newvalue) = @_;
405 67 50       162 if (defined $searchkey) {
406 67 100       187 if ($searchkey =~ m/:/) { croak "key should not contain a colon! (searchkey='$searchkey')"; }
  1         14  
407 66 100       173 if (defined $self->{bag_info}) {
408 50         128 my $idx = $self->_replace_baginfo_by_first_match( $searchkey, $newvalue);
409 50 100       131 if (defined $idx) { return $idx;}
  12         17  
410             }
411 54         148 $self->append_baginfo_by_key( $searchkey, $newvalue );
412 54         70 return -1;
413             }
414             }
415              
416             ###############################################
417              
418              
419             has 'forced_fixity_algorithm' => (
420             is => 'ro',
421             lazy => 1,
422             builder => '_build_forced_fixity_algorithm',
423             );
424              
425             sub has_forced_fixity_algorithm {
426 49     49 0 96 my $self = shift;
427 49         1079 return (defined $self->forced_fixity_algorithm() );
428             } # false if use_plugins used
429              
430             ###############################################
431              
432              
433             has 'manifest_files' => (
434             is => 'ro',
435             lazy => 1,
436             builder => '_build_manifest_files',
437             );
438              
439             ###############################################
440              
441              
442             has 'tagmanifest_files' => (
443             is => 'ro',
444             lazy => 1,
445             builder => '_build_tagmanifest_files',
446             );
447              
448             ###############################################
449              
450              
451             has 'payload_files' => ( # relatively to bagit base
452             is => 'ro',
453             lazy => 1,
454             builder => '_build_payload_files',
455             );
456              
457             ###############################################
458              
459              
460             has 'non_payload_files' => (
461             is=>'ro',
462             lazy => 1,
463             builder => '_build_non_payload_files',
464             );
465              
466             ###############################################
467              
468              
469             has 'plugins' => (
470             is=>'rw',
471             #isa=>'HashRef',
472             );
473              
474             ###############################################
475              
476              
477              
478             has 'manifests' => (
479             is => 'rw',
480             lazy => 1,
481             builder => '_build_manifests'
482             #isa=>'HashRef',
483             );
484              
485             ###############################################
486              
487              
488              
489             has 'algos' => (
490             is=>'rw',
491             #isa=>'HashRef',
492             );
493              
494             ###############################################
495              
496             sub _build_bag_path_arr {
497 0     0   0 my ($self) = @_;
498 0         0 my @split_path = File::Spec->splitdir($self->bag_path);
499 0         0 return @split_path;
500             }
501              
502             sub _build_payload_path_arr {
503 0     0   0 my ($self) = @_;
504 0         0 my @split_path = File::Spec->splitdir($self->payload_path);
505 0         0 return @split_path;
506             }
507              
508             sub _build_rel_payload_path {
509 0     0   0 my ($self) = @_;
510 0         0 my $rel_path = File::Spec->abs2rel( $self->payload_path, $self->bag_path ) ;
511 0         0 return $rel_path;
512             }
513              
514             sub _build_metadata_path_arr {
515 0     0   0 my ($self) = @_;
516 0         0 my @split_path = File::Spec->splitdir($self->metadata_path);
517 0         0 return @split_path;
518             }
519              
520             sub _build_rel_metadata_path {
521 0     0   0 my ($self) = @_;
522 0         0 my $rel_path = File::Spec->abs2rel( $self->metadata_path, $self->bag_path ) ;
523 0         0 return $rel_path;
524             }
525              
526             sub _build_checksum_algos {
527 19     19   916 my($self) = @_;
528 19         37 my @checksums = keys %{ $self->manifests() };
  19         395  
529 19         258 return \@checksums;
530             }
531              
532             sub _build_manifest_files {
533 19     19   6731 my($self) = @_;
534 19         35 my @manifest_files;
535 19         27 foreach my $algo (@{$self->checksum_algos}) {
  19         317  
536 38         698 my $manifest_file = File::Spec->catfile($self->metadata_path, "manifest-$algo.txt");
537 38 100       837 if (-f $manifest_file) {
538 35         136 push @manifest_files, $manifest_file;
539             }
540             }
541 19         103 return \@manifest_files;
542             }
543              
544             sub _build_tagmanifest_files {
545 19     19   2092 my ($self) = @_;
546 19         31 my @tagmanifest_files;
547 19         27 foreach my $algo (@{$self->checksum_algos}) {
  19         308  
548 38         730 my $tagmanifest_file = File::Spec->catfile($self->metadata_path,"tagmanifest-$algo.txt");
549 38 100       956 if (-f $tagmanifest_file) {
550 35         173 push @tagmanifest_files, $tagmanifest_file;
551             }
552             }
553 19         97 return \@tagmanifest_files;
554             }
555              
556             sub __handle_nonportable_local_entry {
557 382     382   547 my $self = shift;
558 382         393 my $local_entry = shift;
559 382         401 my $dir = shift;
560 382         1345 my $rx_portable = qr/^[a-zA-Z0-9._-]+$/;
561 382         1755 my $is_portable = $local_entry =~ m/$rx_portable/;
562 382 100       854 if (! $is_portable) {
563 4         18 my $local_entry_utf8 = decode("UTF-8", $local_entry);
564 4 100       284 if ((!$self->has_force_utf8)) {
565 2         11 my $hexdump = "0x" . unpack('H*', $local_entry);
566 2         7 $local_entry =~m/[^a-zA-Z0-9._-]/; # to find PREMATCH, needed nextline
567 2         7 my $prematch_position = $`;
568 2         336 carp "possible non portable pathname detected in $dir,\n",
569             "got path (hexdump)='$hexdump'(hex),\n",
570             "decoded path='$local_entry_utf8'\n",
571             " "." "x length($prematch_position)."^"."------- first non portable char\n";
572             }
573 4         71 $local_entry = $local_entry_utf8;
574             }
575 382         2232 return $local_entry;
576             }
577              
578              
579              
580             sub __file_find { # own implementation, because File::Find has problems with UTF8 encoded Paths under MSWin32
581             # finds recursively all files in given directory.
582             # if $excludedir is defined, the content will be excluded
583 95     95   2575 my ($self,$dir, $excludedir) = @_;
584 95 100       267 if (defined $excludedir) {
585 93         613 $excludedir = File::Spec->rel2abs( $excludedir);
586             }
587 95         201 my @file_paths;
588              
589             my $finder;
590             $finder = sub {
591 113     113   217 my ($current_dir) = @_; #absolute path
592 113         225 my @todo;
593             my @tmp_file_paths;
594 113         5064 opendir( my $dh, $current_dir);
595 113         7539 my @paths = File::Spec->no_upwards ( readdir $dh );
596 113         1561 closedir $dh;
597 113         449 foreach my $local_entry (@paths) {
598 382         1141 my $path_entry = File::Spec->catdir($current_dir, $self->__handle_nonportable_local_entry($local_entry, $dir));
599 382 100       5215 if (-f $path_entry) {
    50          
600 328         1116 push @tmp_file_paths, $path_entry;
601             } elsif (-d $path_entry) {
602 54 100 100     351 next if ((defined $excludedir) && ($path_entry eq $excludedir));
603 18         52 push @todo, $path_entry;
604             } else {
605 0         0 croak "not a file nor a dir found '$path_entry'";
606             }
607             }
608 113         732 push @file_paths, sort @tmp_file_paths;
609 113         537 foreach my $subdir (sort @todo) {
610 18         70 &$finder($subdir);
611             }
612 95         781 };
613 95         1072 my $absolute = File::Spec->rel2abs( $dir );
614 95         323 &$finder($absolute);
615 95         275 @file_paths = map { File::Spec->abs2rel( $_, $dir)} @file_paths;
  328         13656  
616 95         362 return @file_paths;
617             }
618              
619             sub _build_payload_files{
620 57     57   754 my ($self) = @_;
621 57         1187 my $payload_dir = $self->payload_path;
622 57         5837 my $reldir = File::Spec->abs2rel($payload_dir, $self->bag_path());
623 57         221 $reldir =~ s/^\.$//;
624             my @payload = map {
625 57 50       1167 $reldir eq "" ? $_ : File::Spec->catfile($reldir, $_)
  128         906  
626             } $self->__file_find($payload_dir, File::Spec->rel2abs($self->metadata_path));
627 57 50       396 return wantarray ? @payload : \@payload;
628             }
629              
630              
631             sub __build_read_bagit_txt {
632 117     117   212 my($self) = @_;
633 117         1782 my $bagit = $self->metadata_path;
634 117         1769 my $file = File::Spec->catfile($bagit, "bagit.txt");
635 117 100       6639 open(my $BAGIT, "<:encoding(UTF-8)", $file) or croak("Cannot read '$file': $!");
636 116         12520 my $version_string = <$BAGIT>;
637 116         1419 my $encoding_string = <$BAGIT>;
638 116         1707 close($BAGIT);
639 116 50       405 if (defined $version_string) {
640 116         801 $version_string =~ s/[\r\n]//;
641             }
642 116 100       283 if (defined $encoding_string) {
643 114         328 $encoding_string =~s/[\r\n]//;
644             }
645 116         718 return ($version_string, $encoding_string, $file);
646             }
647              
648             sub _build_bag_version {
649 61     61   2226 my($self) = @_;
650 61         251 my ($version_string, $encoding_string, $file) = $self->__build_read_bagit_txt();
651 60 50       198 croak "Version line missed in '$file" unless defined $version_string;
652 60 100       353 if ($version_string =~ /^BagIt-Version: ([01]\.[0-9]+)$/) {
653 57         429 return $1;
654             } else {
655 3         13 $version_string =~ s/\r//;
656 3         7 $version_string =~ s/^\N{U+FEFF}//;
657 3         97 croak "Version string '$version_string' of '$file' is incorrect";
658             };
659             }
660              
661             sub _build_bag_encoding {
662 56     56   647 my($self) = @_;
663 56         188 my ($version_string, $encoding_string, $file) = $self->__build_read_bagit_txt();
664 56 100       239 croak "Encoding line missed in '$file" unless defined $encoding_string;
665 55 50       227 croak "Encoding '$encoding_string' of '$file' not supported by current Archive::BagIt module!" unless ($encoding_string !~ m/^UTF-8$/);
666 55         248 return $encoding_string;
667             }
668              
669             sub __sort_bag_info {
670             my @sorted = sort {
671 1     1   19 my %tmpa = %{$a};
  11         11  
  11         16  
672 11         11 my %tmpb = %{$b};
  11         13  
673 11         16 my ($ka, $va) = each %tmpa;
674 11         11 my ($kb, $vb) = each %tmpb;
675 11         12 my $kres = $ka cmp $kb;
676 11 100       15 if ($kres != 0) {
677 10         15 return $kres;
678             } else {
679 1         2 return $va cmp $vb;
680             }
681             } @_;
682 1         3 return @sorted;
683             }
684              
685             sub _parse_bag_info { # parses a bag-info textblob
686 757     757   434284 my ($self, $textblob) = @_;
687             # metadata elements are OPTIONAL and MAY be repeated. Because "bag-
688             # info.txt" is intended for human reading and editing, ordering MAY be
689             # significant and the ordering of metadata elements MUST be preserved.
690             #
691             # A metadata element MUST consist of a label, a colon ":", a single
692             # linear whitespace character (space or tab), and a value that is
693             # terminated with an LF, a CR, or a CRLF.
694             #
695             # The label MUST NOT contain a colon (:), LF, or CR. The label MAY
696             # contain linear whitespace characters but MUST NOT start or end with
697             # whitespace.
698             #
699             # It is RECOMMENDED that lines not exceed 79 characters in length.
700             # Long values MAY be continued onto the next line by inserting a LF,
701             # CR, or CRLF, and then indenting the next line with one or more linear
702             # white space characters (spaces or tabs). Except for linebreaks, such
703             # padding does not form part of the value.
704             #
705             # Implementations wishing to support previous BagIt versions MUST
706             # accept multiple linear whitespace characters before and after the
707             # colon when the bag version is earlier than 1.0; such whitespace does
708             # not form part of the label or value.
709             # find all labels
710 757         969 my @labels;
711 757         5205 while ($textblob =~ s/^([^:\s]+)\s*:\s*//m) { # label if starts with chars not colon or whitespace followed by zero or more spaces, a colon, zero or more spaces
712             # label found
713 924         1984 my $label = $1;
714 924         976 my $value='';
715 924 100       35591 if ($textblob =~ s/(.+?)(?=^\S)//ms) {
    50          
716             # value if rest string starts with chars not \r and/or \n until a non-whitespace after \r\n
717 167         408 $value = chomp_portable($1);
718             } elsif ($textblob =~ s/(.*)//s) {
719 757         1681 $value = chomp_portable($1);
720             }
721 924 50       1642 if (defined $label) {
722 924         3673 push @labels, { $label => $value };
723             }
724             }
725             # The RFC does not allow reordering:
726             #my @sorted = __sort_bag_info(@labels);
727             #return \@sorted;
728 757         1663 return \@labels;
729             }
730              
731             sub _build_bag_info {
732 60     60   2690 my ($self) = @_;
733 60         876 my $bagit = $self->metadata_path;
734 60         951 my $file = File::Spec->catfile($bagit, "bag-info.txt");
735 60 100       1196 if (-e $file) {
736 42 50       1356 open(my $BAGINFO, "<:encoding(UTF-8)", $file) or croak("Cannot read $file: $!");
737 42         2358 my @lines;
738 42         886 while ( my $line = <$BAGINFO>) {
739 197         1187 push @lines, $line;
740             }
741 42         564 close($BAGINFO);
742 42         282 my $lines = join("", @lines);
743 42         216 return $self->_parse_bag_info($lines);
744             }
745             # bag-info.txt is optional
746 18         107 return;
747             }
748              
749             sub _build_non_payload_files {
750 35     35   704 my ($self) = @_;
751 35         486 my $non_payload_dir = $self->metadata_path();
752 35         2845 my $reldir = File::Spec->abs2rel($non_payload_dir, $self->bag_path());
753 35         198 $reldir =~ s/^\.$//;
754             my @non_payload = map {
755 35 50       706 $reldir eq "" ? $_ : File::Spec->catfile($reldir, $_)
  174         361  
756             } $self->__file_find($non_payload_dir, File::Spec->rel2abs($self->payload_path));
757 35 50       319 return wantarray ? @non_payload : \@non_payload;
758             }
759              
760             sub _build_forced_fixity_algorithm {
761 48     48   494 my ($self) = @_;
762 48 50       255 if ($self->use_plugins()) {
763 0         0 return;
764             } else {
765 48 100       708 if ($self->bag_version() >= 1.0) {
766 32         1147 return Archive::BagIt::Plugin::Algorithm::SHA512->new(bagit => $self);
767             }
768             else {
769 16         567 return Archive::BagIt::Plugin::Algorithm::MD5->new(bagit => $self);
770             }
771             }
772             }
773              
774             ###############################################
775              
776              
777             sub load_plugins {
778 251     251 1 513 my ($self, @plugins) = @_;
779              
780             #p(@plugins);
781 251         496 my $loaded_plugins = $self->plugins;
782 251         519 @plugins = grep { not exists $loaded_plugins->{$_} } @plugins;
  334         928  
783              
784 251 50       824 return if @plugins == 0;
785 251         476 foreach my $plugin (@plugins) {
786 334 50       1316 load_class ($plugin) or croak ("Can't load $plugin");
787 334         27554 $plugin->new({bagit => $self});
788             }
789              
790 251         1219 return 1;
791             }
792              
793             ###############################################
794              
795              
796             sub load {
797 1     1 1 1127 my ($self) = @_;
798             # call trigger
799 1         3 $self->bag_path;
800 1         21 $self->bag_version;
801 1         21 $self->bag_encoding;
802 1         21 $self->bag_info;
803 1         18 $self->payload_path;
804 1         22 $self->manifest_files;
805 1         17 $self->checksum_algos;
806 1         16 $self->tagmanifest_files;
807 1         8 return 1;
808             }
809              
810             ###############################################
811              
812              
813             sub verify_bag {
814 61     61 1 36136 my ($self,$opts) = @_;
815             #removed the ability to pass in a bag in the parameters, but might want options
816             #like $return all errors rather than dying on first one
817 61         247 my $bagit = $self->bag_path;
818 61         1453 my $version = $self->bag_version(); # to call trigger
819 57         1256 my $encoding = $self->bag_encoding(); # to call trigger
820 56         281 my $baginfo = $self->verify_baginfo(); #to call trigger
821              
822 56         1133 my $fetch_file = File::Spec->catfile($self->metadata_path, "fetch.txt");
823 56         1543 my $payload_dir = $self->payload_path;
824 56         385 my $return_all_errors = $opts->{return_all_errors};
825              
826 56 100       1170 if (-f $fetch_file) {
827 7         209 croak("Fetching via file '$fetch_file' is not supported by current Archive::BagIt implementation")
828             }
829             # check forced fixity
830 49 50       326 if ($self->has_forced_fixity_algorithm()) {
831 49         789 my $forced_fixity_alg = $self->forced_fixity_algorithm()->name();
832 49         989 my $manifest_file = File::Spec->catfile($self->metadata_path, "manifest-$forced_fixity_alg.txt");
833 49 100       1496 croak("Manifest '$manifest_file' is not a regular file or does not exist for given bagit version '$version'") unless -f ($manifest_file);
834             }
835 48 50       597 croak("Payload-directory '$payload_dir' is not a directory or does not exist") unless -d ($payload_dir);
836              
837 48 50       261 unless ($version > .95) {
838 0         0 croak ("Bag Version $version is unsupported");
839             }
840              
841              
842 48         89 my @errors;
843              
844              
845             # check for manifests
846 48         72 foreach my $algorithm ( keys %{ $self->manifests }) {
  48         1152  
847 84         1829 my $res = $self->manifests->{$algorithm}->verify_manifest($self->payload_files, $return_all_errors);
848 66 50 66     365 if ((defined $res) && ($res ne "1")) { push @errors, $res; }
  0         0  
849             }
850             #check for tagmanifests
851 30         66 foreach my $algorithm ( keys %{ $self->manifests }) {
  30         584  
852 60         1314 my $res = $self->manifests->{$algorithm}->verify_tagmanifest($self->non_payload_files, $return_all_errors);
853 56 50 66     338 if ((defined $res) && ($res ne "1")) { push @errors, $res; }
  0         0  
854             }
855 26         53 push @{$self->{errors}}, @errors;
  26         101  
856 26         611 my $err = $self->errors();
857 26         226 my @err = @{ $err };
  26         71  
858 26 100       85 if (scalar( @err ) > 0) {
859 7         257 croak join("\n","bag verify for bagit version '$version' failed with invalid files.", @err);
860             }
861 19         180 return 1;
862             }
863              
864              
865             sub calc_payload_oxum {
866 79     79 1 186 my($self) = @_;
867 79         107 my @payload = @{$self->payload_files};
  79         1378  
868 79         340 my $octets=0;
869 79         122 my $streamcount = scalar @payload;
870 79         169 foreach my $local_name (@payload) {# local_name is relative to bagit base
871 190         1502 my $file = File::Spec->catfile($self->bag_path(), $local_name);
872 190 50       2211 if (-e $file) {
873 190         421 my $filesize = 0;
874 190 100       1959 $filesize = -s $file or carp "empty file $file detected";
875 190         483 $octets += $filesize;
876 0         0 } else { croak "file $file does not exist, $!"; }
877             }
878 79         297 return ($octets, $streamcount);
879             }
880              
881              
882             sub calc_bagsize {
883 16     16 1 39 my($self) = @_;
884 16         55 my ($octets,$streamcount) = $self->calc_payload_oxum();
885 16 50       51 if ($octets < 1024) { return "$octets B"; }
  16 0       87  
    0          
    0          
886 0         0 elsif ($octets < 1024*1024) {return sprintf("%0.1f kB", $octets/1024); }
887 0         0 elsif ($octets < 1024*1024*1024) {return sprintf "%0.1f MB", $octets/(1024*1024); }
888 0         0 elsif ($octets < 1024*1024*1024*1024) {return sprintf "%0.1f GB", $octets/(1024*1024*1024); }
889 0         0 else { return sprintf "%0.2f TB", $octets/(1024*1024*1024*1024); }
890             }
891              
892              
893             sub create_bagit {
894 16     16 1 33 my($self) = @_;
895 16         276 my $metadata_path = $self->metadata_path();
896 16         243 my $bagit_path = File::Spec->catfile( $metadata_path, "bagit.txt");
897 16 50       1770 open(my $BAGIT, ">:encoding(UTF-8)", $bagit_path) or croak("Can't open $bagit_path for writing: $!");
898 16         1624 print($BAGIT "BagIt-Version: 1.0\nTag-File-Character-Encoding: UTF-8");
899 16         1438 close($BAGIT);
900 16         109 return 1;
901             }
902              
903              
904             sub create_baginfo {
905 16     16 1 52 my($self) = @_; # because bag-info.txt allows multiple key-value-entries, hash is replaced
906 16         1837 $self->add_or_replace_baginfo_by_key('Bagging-Date', POSIX::strftime("%Y-%m-%d", gmtime(time)));
907 16         123 $self->add_or_replace_baginfo_by_key('Bag-Software-Agent', 'Archive::BagIt ');
908 16         71 my ($octets, $streams) = $self->calc_payload_oxum();
909 16         84 $self->add_or_replace_baginfo_by_key('Payload-Oxum', "$octets.$streams");
910 16         73 $self->add_or_replace_baginfo_by_key('Bag-Size', $self->calc_bagsize());
911             # The RFC does not allow reordering:
912 16         268 my $metadata_path = $self->metadata_path();
913 16         234 my $bag_info_path = File::Spec->catfile( $metadata_path, "bag-info.txt");
914 16 50       1410 open(my $BAGINFO, ">:encoding(UTF-8)", $bag_info_path) or croak("Can't open $bag_info_path for writing: $!");
915 16         1240 foreach my $entry (@{ $self->bag_info() }) {
  16         367  
916 64         165 my %tmp = %{ $entry };
  64         160  
917 64         150 my ($key, $value) = %tmp;
918 64 50       127 if ($key =~ m/:/) { carp "key should not contain a colon! (searchkey='$key')"; }
  0         0  
919 64         254 print($BAGINFO "$key: $value\n");
920             }
921 16         1338 close($BAGINFO);
922 16         106 return 1;
923             }
924              
925              
926             sub store {
927 16     16 1 55 my($self) = @_;
928 16         82 $self->create_bagit();
929 16         76 $self->create_baginfo();
930             # it is important to create all manifest files first, because tagmanifest should include all manifest-xxx.txt
931 16         30 foreach my $algorithm ( keys %{ $self->manifests }) {
  16         401  
932 32         747 $self->manifests->{$algorithm}->create_manifest();
933             }
934 16         43 foreach my $algorithm ( keys %{ $self->manifests }) {
  16         428  
935 32         750 $self->manifests->{$algorithm}->create_tagmanifest();
936             }
937             # retrigger builds
938 16         124 $self->{checksum_algos} = $self->_build_checksum_algos();
939 16         80 $self->{tagmanifest_files} = $self->_build_tagmanifest_files();
940 16         71 $self->{manifest_files} = $self->_build_manifest_files();
941 16         38 return 1;
942             }
943              
944              
945             sub init_metadata {
946 16     16 1 84 my ($class, $bag_path, $options) = @_;
947 16         76 $bag_path =~ s#/$##; # replace trailing slash
948 16 50       221 unless ( -d $bag_path) { croak ( "source bag directory '$bag_path' doesn't exist"); }
  0         0  
949 16         537 my $self = $class->new(bag_path=>$bag_path, %$options);
950 16 100       275 carp "no payload path" if ! -d $self->payload_path;
951 16 100       345 unless ( -d $self->payload_path) {
952 2         123 rename ($bag_path, $bag_path.".tmp");
953 2         87 mkdir ($bag_path);
954 2         53 rename ($bag_path.".tmp", $self->payload_path);
955             }
956 16 50       609 unless ( -d $self->metadata_path) {
957             #metadata path is not the root path for some reason
958 0         0 mkdir ($self->metadata_path);
959             }
960 16         107 $self->store();
961 16         80 return $self;
962             }
963              
964              
965             sub make_bag {
966 16     16 1 44114 my ($class, $bag_path, $options) = @_;
967 16         49 my $isa = ref $class;
968 16 50       81 if ($isa eq "Archive::BagIt") { # not a class, but an object!
969 0         0 croak "make_bag() only a class subroutine, not useable with objects. Try store() instead!\n";
970             }
971 16         75 my $self = $class->init_metadata($bag_path, $options);
972 16         87 return $self;
973             }
974              
975              
976              
977              
978              
979             __PACKAGE__->meta->make_immutable;
980              
981             1;
982              
983             __END__