File Coverage

blib/lib/Archive/BagIt.pm
Criterion Covered Total %
statement 400 438 91.3
branch 113 156 72.4
condition 8 12 66.6
subroutine 52 59 88.1
pod 18 20 90.0
total 591 685 86.2


line stmt bran cond sub pod time code
1             package Archive::BagIt;
2 9     9   372711 use strict;
  9         41  
  9         299  
3 9     9   50 use warnings;
  9         19  
  9         272  
4 9     9   1888 use utf8;
  9         54  
  9         48  
5 9     9   1681 use open ':std', ':encoding(UTF-8)';
  9         3814  
  9         67  
6 9     9   41798 use Encode qw( decode );
  9         16  
  9         759  
7 9     9   65 use File::Spec ();
  9         14  
  9         216  
8 9     9   4403 use Class::Load qw( load_class );
  9         174073  
  9         577  
9 9     9   84 use Carp qw( carp croak confess);
  9         27  
  9         472  
10 9     9   3057 use POSIX qw( strftime );
  9         38830  
  9         69  
11 9     9   14524 use Moo;
  9         70870  
  9         70  
12             with "Archive::BagIt::Role::Portability";
13              
14             our $VERSION = '0.095'; # VERSION
15              
16             # ABSTRACT: The main module to handle bags.
17              
18              
19              
20             around 'BUILDARGS' , sub {
21             my $orig = shift;
22             my $class = shift;
23             if (@_ == 1 && !ref $_[0]) {
24             return $class->$orig(bag_path=>$_[0]);
25             } else {
26             return $class->$orig(@_);
27             }
28             };
29              
30              
31             sub BUILD {
32 84     84 0 3574 my ($self, $args) = @_;
33 84 100       295 if (!defined $self->use_plugins()) {
34 83         199 return $self->load_plugins(("Archive::BagIt::Plugin::Manifest::MD5", "Archive::BagIt::Plugin::Manifest::SHA512"));
35             }
36 1         6 return $self->load_plugins($self->use_plugins());
37             }
38              
39             ###############################################
40              
41              
42             has 'use_parallel' => (
43             is => 'rw',
44             lazy => 1,
45             default => 0,
46             );
47              
48             ###############################################
49              
50              
51             has 'use_async' => (
52             is => 'rw',
53             lazy => 1,
54             default => 0,
55             );
56              
57             ###############################################
58              
59              
60             has 'force_utf8' => (
61             is => 'rw',
62             lazy => 1,
63             predicate => 'has_force_utf8',
64             );
65              
66             ###############################################
67              
68              
69             has 'bag_path' => (
70             is => 'rw',
71             );
72              
73             # if the user wants specific plugins!
74             has 'use_plugins' => (
75             is => 'rw',
76             lazy => 1,
77             );
78              
79             ###############################################
80              
81             has 'bag_path_arr' => (
82             is => 'ro',
83             lazy => 1,
84             builder => '_build_bag_path_arr',
85             );
86              
87             ###############################################
88              
89              
90             has 'metadata_path' => (
91             is=> 'ro',
92             lazy => 1,
93             builder => '_build_metadata_path',
94             );
95              
96             sub _build_metadata_path {
97 68     68   525 my ($self) = @_;
98 68         453 return $self->bag_path;
99             }
100              
101             ###############################################
102              
103             has 'metadata_path_arr' => (
104             is =>'ro',
105             lazy => 1,
106             builder => '_build_metadata_path_arr',
107             );
108              
109              
110             ###############################################
111              
112             has 'rel_metadata_path' => (
113             is => 'ro',
114             lazy => 1,
115             builder => '_build_rel_metadata_path',
116             );
117              
118             ###############################################
119              
120              
121             has 'payload_path' => (
122             is => 'ro',
123             lazy => 1,
124             builder => '_build_payload_path',
125             );
126              
127             sub _build_payload_path {
128 60     60   484 my ($self) = @_;
129 60         1495 return File::Spec->catdir($self->bag_path, "data");
130             }
131              
132             ###############################################
133              
134             has 'payload_path_arr' => (
135             is => 'ro',
136             lazy => 1,
137             builder => '_build_payload_path_arr',
138             );
139              
140             ###############################################
141              
142             has 'rel_payload_path' => (
143             is => 'ro',
144             lazy => 1,
145             builder => '_build_rel_payload_path',
146             );
147              
148             ###############################################
149              
150              
151             has 'checksum_algos' => (
152             is => 'ro',
153             lazy => 1,
154             builder => '_build_checksum_algos',
155             );
156              
157             ###############################################
158              
159              
160             has 'bag_version' => (
161             is => 'ro',
162             lazy => 1,
163             builder => '_build_bag_version',
164             );
165              
166             ###############################################
167              
168              
169             has 'bag_encoding' => (
170             is => 'ro',
171             lazy => 1,
172             builder => '_build_bag_encoding',
173             );
174              
175             ###############################################
176              
177              
178             has 'bag_info' => (
179             is => 'rw',
180             lazy => 1,
181             builder => '_build_bag_info',
182             predicate => 'has_bag_info'
183             );
184              
185             ###############################################
186              
187              
188             has 'errors' => (
189             is => 'ro',
190             lazy => 1,
191 0     0   0 builder => sub { my $self = shift; return [];},
  0         0  
192             );
193              
194             ###############################################
195              
196              
197              
198             has 'warnings' => (
199             is => 'ro',
200             lazy => 1,
201 0     0   0 builder => sub { my $self = shift; return [];},
  0         0  
202             );
203              
204             ###############################################
205              
206              
207             has 'digest_callback' => (
208             is => 'ro',
209             lazy => 1,
210             builder => sub {
211             my $sub = sub {
212 549     549   928 my ($digestobj, $filename) = @_;
213 549 50       9312 if (-f $filename) {
214 549 50       18858 open(my $fh, '<:raw', $filename) or confess("Cannot open '$filename', $!");
215 549         1595 binmode($fh);
216 549         1827 my $digest = $digestobj->get_hash_string($fh);
217 549 50       6617 close $fh or confess("could not close file '$filename', $!");
218 549         3295 return $digest;
219             } else {
220 0         0 croak "file $filename is not a real file!";
221             }
222 36     36   525 };
223 36         109 return $sub;
224             }
225             );
226              
227             ###############################################
228              
229              
230             sub get_baginfo_values_by_key {
231 111     111 1 1178 my ($self, $searchkey) = @_;
232 111         2143 my $info = $self->bag_info();
233 111         585 my @values;
234 111 50       205 if (defined $searchkey) {
235 111         218 my $lc_flag = $self->is_baginfo_key_reserved( $searchkey );
236 111         168 foreach my $entry (@{ $info }) {
  111         271  
237 363 50       561 return unless defined $entry;
238 363         355 my ($key, $value) = %{ $entry };
  363         716  
239 363 100       561 if ( __case_aware_compare_for_baginfo( $key, $searchkey, $lc_flag) ) {
240 64         171 push @values, $value;
241             }
242             }
243             }
244 111 100       368 return @values if (scalar(@values) > 0);
245 48         123 return;
246             }
247              
248             ###############################################
249              
250              
251             sub is_baginfo_key_reserved_as_uniq {
252 278     278 1 401 my ($self, $searchkey) = @_;
253             # my $rx = qr{Bag-Count|Bag-Group-Identifier|Bag-Size|Bagging-Date|Payload-Oxum};
254 278         1022 return $searchkey =~ m/^(?:Bag(?:-(?:Group-Identifier|Count|Size)|ging-Date)|Payload-Oxum)$/i;
255             }
256              
257             ###############################################
258              
259              
260             sub is_baginfo_key_reserved {
261 390     390 1 623 my ($self, $searchkey) = @_;
262             # my $rx = qr/
263             # Bag-Count|
264             # Bag-Group-Identifier|
265             # Bag-Size|
266             # Bagging-Date|
267             # Contact-Email|
268             # Contact-Name|
269             # Contact-Phone|
270             # External-Description|
271             # External-Identifier|
272             # Internal-Sender-Description|
273             # Internal-Sender-Identifier|
274             # Organisation-Adress|
275             # Payload-Oxum|
276             # Source-Organization
277             # /;
278 390         1584 return $searchkey =~ m/^(?:Bag(?:-(?:Group-Identifier|Count|Size)|ging-Date)|Internal-Sender-(?:Description|Identifier)|External-(?:Description|Identifier)|Contact-(?:(?:Phon|Nam)e|Email)|Source-Organization|Organisation-Adress|Payload-Oxum)$/ix;
279             }
280              
281             ###############################################
282              
283             sub __case_aware_compare_for_baginfo {
284 482     482   766 my ($internal_key, $search_key, $lc_flag) = @_;
285 482   33     2790 return (defined $internal_key) && (
286             ( $lc_flag && ((lc $internal_key) eq (lc $search_key)) ) # for reserved keys use caseinsensitive search
287             ||
288             ( (!$lc_flag) && ($internal_key eq $search_key) ) # for other keys sensitive search
289             )
290             }
291              
292             ###############################################
293              
294             sub _find_baginfo_idx {
295 58     58   134 my ($self, $searchkey) = @_;
296 58 50       108 if (defined $searchkey) {
297 58 100       129 if (-1 < index($searchkey, ":")) {croak "key should not contain a colon! (searchkey='$searchkey')";}
  1         12  
298 57         1269 my $info = $self->bag_info();
299 57         355 my $size = scalar(@{$info});
  57         93  
300 57         115 my $lc_flag = $self->is_baginfo_key_reserved($searchkey);
301 57         161 foreach my $idx (reverse 0.. $size-1) { # for multiple entries return the latest addition
302 119         135 my %entry = %{$info->[$idx]};
  119         353  
303 119         243 my ($key, $value) = %entry;
304 119 100       198 if (__case_aware_compare_for_baginfo($key, $searchkey, $lc_flag)) {
305 16         51 return $idx;
306             }
307             }
308             }
309 41         90 return;
310             }
311             ###############################################
312              
313              
314             sub verify_baginfo {
315 56     56 1 83 my ($self) = @_;
316 56         62 my %keys;
317 56         1192 my $info = $self->bag_info();
318 56         157 my $ret = 1;
319 56 100       112 if (defined $info) {
320 49         55 foreach my $entry (@{$self->bag_info()}) {
  49         956  
321 222         520 my ($key, $value) = %{$entry};
  222         547  
322 222 100       382 if ($self->is_baginfo_key_reserved($key)) {
323 173         535 $keys{ lc $key }++;
324             }
325             else {
326 49         134 $keys{ $key }++
327             }
328             }
329 49         141 foreach my $key (keys %keys) {
330 222 100       350 if ($self->is_baginfo_key_reserved_as_uniq($key)) {
331 129 50       331 if ($keys{$key} > 1) {
332 0         0 push @{$self->{errors}}, "Baginfo key '$key' exists $keys{$key}, but should be uniq!";
  0         0  
333 0         0 $ret = undef;
334             }
335             }
336             }
337             }
338             # check for payload oxum
339 56         135 my ($loaded_payloadoxum) = $self->get_baginfo_values_by_key('Payload-Oxum');
340 56 100       110 if (defined $loaded_payloadoxum) {
341 47         152 my ($octets, $streamcount) = $self->calc_payload_oxum();
342 47 100       168 if ("$octets.$streamcount" ne $loaded_payloadoxum) {
343 5         6 push @{$self->{errors}}, "Payload-Oxum differs, calculated $octets.$streamcount but $loaded_payloadoxum was expected by bag-info.txt";
  5         31  
344 5         10 $ret = undef;
345             }
346             } else {
347 9         30 push @{$self->{warnings}}, "Payload-Oxum was expected in bag-info.txt, but not found!"; # payload-oxum is recommended, but optional
  9         35  
348             }
349 56         188 return $ret;
350             }
351              
352             ###############################################
353              
354              
355             sub delete_baginfo_by_key {
356 3     3 1 1591 my ($self, $searchkey) = @_;
357 3         8 my $idx = $self->_find_baginfo_idx($searchkey);
358 3 100       10 if (defined $idx) {
359 2         3 splice @{$self->{bag_info}}, $idx, 1; # delete nth indexed entry
  2         4  
360             }
361 3         15 return 1;
362             }
363              
364             ###############################################
365              
366              
367             sub exists_baginfo_key {
368 2     2 1 5 my ($self, $searchkey) =@_;
369 2         7 return (defined $self->_find_baginfo_idx($searchkey));
370             }
371              
372             ###############################################
373              
374             sub _replace_baginfo_by_first_match {
375 53     53   935 my ($self, $searchkey, $newvalue) = @_;
376 53         117 my $idx = $self->_find_baginfo_idx( $searchkey);
377 52 100       112 if (defined $idx) {
378 13         50 $self->{bag_info}[$idx] = {$searchkey => $newvalue};
379 13         24 return $idx;
380             }
381 39         62 return;
382             }
383              
384             ###############################################
385              
386              
387             sub append_baginfo_by_key {
388 56     56 1 607 my ($self, $searchkey, $newvalue) = @_;
389 56 50       106 if (defined $searchkey) {
390 56 50       122 if (-1 < index($searchkey, ":")) { croak "key should not contain a colon! (searchkey='$searchkey')"; }
  0         0  
391 56 100       119 if ($self->is_baginfo_key_reserved_as_uniq($searchkey)) {
392 41 100       103 if (defined $self->get_baginfo_values_by_key($searchkey)) {
393             # hmm, search key is marked as uniq and still exists
394 5         14 return;
395             }
396             }
397 51         71 push @{$self->{bag_info}}, {$searchkey => $newvalue};
  51         576  
398             }
399 51         86 return 1;
400             }
401              
402             ###############################################
403              
404              
405             sub add_or_replace_baginfo_by_key {
406 67     67 1 800 my ($self, $searchkey, $newvalue) = @_;
407 67 50       164 if (defined $searchkey) {
408 67 100       178 if (-1 < index($searchkey, ":")) { croak "key should not contain a colon! (searchkey='$searchkey')"; }
  1         20  
409 66 100       164 if (defined $self->{bag_info}) {
410 50         121 my $idx = $self->_replace_baginfo_by_first_match( $searchkey, $newvalue);
411 50 100       132 if (defined $idx) { return $idx;}
  12         17  
412             }
413 54         129 $self->append_baginfo_by_key( $searchkey, $newvalue );
414 54         90 return -1;
415             }
416             }
417              
418             ###############################################
419              
420              
421             has 'forced_fixity_algorithm' => (
422             is => 'ro',
423             lazy => 1,
424             builder => '_build_forced_fixity_algorithm',
425             );
426              
427             sub has_forced_fixity_algorithm {
428 49     49 0 83 my $self = shift;
429 49         1024 return (defined $self->forced_fixity_algorithm() );
430             } # false if use_plugins used
431              
432             ###############################################
433              
434              
435             has 'manifest_files' => (
436             is => 'ro',
437             lazy => 1,
438             builder => '_build_manifest_files',
439             );
440              
441             ###############################################
442              
443              
444             has 'tagmanifest_files' => (
445             is => 'ro',
446             lazy => 1,
447             builder => '_build_tagmanifest_files',
448             );
449              
450             ###############################################
451              
452              
453             has 'payload_files' => ( # relatively to bagit base
454             is => 'ro',
455             lazy => 1,
456             builder => '_build_payload_files',
457             );
458              
459             ###############################################
460              
461              
462             has 'non_payload_files' => (
463             is=>'ro',
464             lazy => 1,
465             builder => '_build_non_payload_files',
466             );
467              
468             ###############################################
469              
470              
471             has 'plugins' => (
472             is=>'rw',
473             #isa=>'HashRef',
474             );
475              
476             ###############################################
477              
478              
479              
480             has 'manifests' => (
481             is => 'rw',
482             lazy => 1,
483             builder => '_build_manifests'
484             #isa=>'HashRef',
485             );
486              
487             ###############################################
488              
489              
490              
491             has 'algos' => (
492             is=>'rw',
493             #isa=>'HashRef',
494             );
495              
496             ###############################################
497              
498             sub _build_bag_path_arr {
499 0     0   0 my ($self) = @_;
500 0         0 my @split_path = File::Spec->splitdir($self->bag_path);
501 0         0 return @split_path;
502             }
503              
504             sub _build_payload_path_arr {
505 0     0   0 my ($self) = @_;
506 0         0 my @split_path = File::Spec->splitdir($self->payload_path);
507 0         0 return @split_path;
508             }
509              
510             sub _build_rel_payload_path {
511 0     0   0 my ($self) = @_;
512 0         0 my $rel_path = File::Spec->abs2rel( $self->payload_path, $self->bag_path ) ;
513 0         0 return $rel_path;
514             }
515              
516             sub _build_metadata_path_arr {
517 0     0   0 my ($self) = @_;
518 0         0 my @split_path = File::Spec->splitdir($self->metadata_path);
519 0         0 return @split_path;
520             }
521              
522             sub _build_rel_metadata_path {
523 0     0   0 my ($self) = @_;
524 0         0 my $rel_path = File::Spec->abs2rel( $self->metadata_path, $self->bag_path ) ;
525 0         0 return $rel_path;
526             }
527              
528             sub _build_checksum_algos {
529 19     19   1026 my($self) = @_;
530 19         28 my @checksums = keys %{ $self->manifests() };
  19         442  
531 19         238 return \@checksums;
532             }
533              
534             sub _build_manifest_files {
535 19     19   8474 my($self) = @_;
536 19         23 my @manifest_files;
537 19         22 foreach my $algo (@{$self->checksum_algos}) {
  19         365  
538 38         843 my $manifest_file = File::Spec->catfile($self->metadata_path, "manifest-$algo.txt");
539 38 100       946 if (-f $manifest_file) {
540 35         559 push @manifest_files, $manifest_file;
541             }
542             }
543 19         100 return \@manifest_files;
544             }
545              
546             sub _build_tagmanifest_files {
547 19     19   2466 my ($self) = @_;
548 19         42 my @tagmanifest_files;
549 19         24 foreach my $algo (@{$self->checksum_algos}) {
  19         327  
550 38         895 my $tagmanifest_file = File::Spec->catfile($self->metadata_path,"tagmanifest-$algo.txt");
551 38 100       1102 if (-f $tagmanifest_file) {
552 35         147 push @tagmanifest_files, $tagmanifest_file;
553             }
554             }
555 19         88 return \@tagmanifest_files;
556             }
557              
558             sub __handle_nonportable_local_entry {
559 382     382   500 my $self = shift;
560 382         436 my $local_entry = shift;
561 382         429 my $dir = shift;
562 382 100       1428 if ($local_entry !~ m/^[a-zA-Z0-9._-]+$/) {
563 4         15 my $local_entry_utf8 = decode("UTF-8", $local_entry);
564 4 100       234 if ((!$self->has_force_utf8)) {
565 2         9 my $hexdump = "0x" . unpack('H*', $local_entry);
566 2         9 $local_entry =~m/[^a-zA-Z0-9._-]/; # to find PREMATCH, needed nextline
567 2         5 my $prematch_position = $`;
568 2         309 carp "possible non portable pathname detected in $dir,\n",
569             "got path (hexdump)='$hexdump'(hex),\n",
570             "decoded path='$local_entry_utf8'\n",
571             " "." "x length($prematch_position)."^"."------- first non portable char\n";
572             }
573 4         86 $local_entry = $local_entry_utf8;
574             }
575 382         2504 return $local_entry;
576             }
577              
578              
579              
580             sub __file_find { # own implementation, because File::Find has problems with UTF8 encoded Paths under MSWin32
581             # finds recursively all files in given directory.
582             # if $excludedir is defined, the content will be excluded
583 95     95   2532 my ($self,$dir, $excludedir) = @_;
584 95 100       211 if (defined $excludedir) {
585 93         568 $excludedir = File::Spec->rel2abs( $excludedir);
586             }
587 95         152 my @file_paths;
588              
589             my $finder;
590             $finder = sub {
591 113     113   190 my ($current_dir) = @_; #absolute path
592 113         176 my @todo;
593             my @tmp_file_paths;
594 113         4431 opendir( my $dh, $current_dir);
595 113         4221 my @paths = File::Spec->no_upwards ( readdir $dh );
596 113         1431 closedir $dh;
597 113         325 foreach my $local_entry (@paths) {
598 382         924 my $path_entry = File::Spec->catdir($current_dir, $self->__handle_nonportable_local_entry($local_entry, $dir));
599 382 100       5886 if (-f $path_entry) {
    50          
600 328         1114 push @tmp_file_paths, $path_entry;
601             } elsif (-d $path_entry) {
602 54 100 100     305 next if ((defined $excludedir) && ($path_entry eq $excludedir));
603 18         59 push @todo, $path_entry;
604             } else {
605 0         0 croak "not a file nor a dir found '$path_entry'";
606             }
607             }
608 113         423 push @file_paths, sort @tmp_file_paths;
609 113         557 foreach my $subdir (sort @todo) {
610 18         101 &$finder($subdir);
611             }
612 95         566 };
613 95         1119 my $absolute = File::Spec->rel2abs( $dir );
614 95         241 &$finder($absolute);
615 95         174 @file_paths = map { File::Spec->abs2rel( $_, $dir)} @file_paths;
  328         15810  
616 95         348 return @file_paths;
617             }
618              
619             sub _build_payload_files{
620 57     57   614 my ($self) = @_;
621 57         874 my $payload_dir = $self->payload_path;
622 57         3756 my $reldir = File::Spec->abs2rel($payload_dir, $self->bag_path());
623 57         170 $reldir =~ s/^\.$//;
624             my @payload = map {
625 57 50       1281 $reldir eq "" ? $_ : File::Spec->catfile($reldir, $_)
  128         846  
626             } $self->__file_find($payload_dir, File::Spec->rel2abs($self->metadata_path));
627 57 50       346 return wantarray ? @payload : \@payload;
628             }
629              
630              
631             sub __build_read_bagit_txt {
632 117     117   154 my($self) = @_;
633 117         1912 my $bagit = $self->metadata_path;
634 117         1639 my $file = File::Spec->catfile($bagit, "bagit.txt");
635 117 100       4905 open(my $BAGIT, "<:encoding(UTF-8)", $file) or croak("Cannot read '$file': $!");
636 116         10736 my $version_string = <$BAGIT>;
637 116         1534 my $encoding_string = <$BAGIT>;
638 116         1490 close($BAGIT);
639 116 50       376 if (defined $version_string) {
640 116         772 $version_string =~ s/[\r\n]//;
641             }
642 116 100       292 if (defined $encoding_string) {
643 114         321 $encoding_string =~s/[\r\n]//;
644             }
645 116         661 return ($version_string, $encoding_string, $file);
646             }
647              
648             sub _build_bag_version {
649 61     61   2350 my($self) = @_;
650 61         139 my ($version_string, $encoding_string, $file) = $self->__build_read_bagit_txt();
651 60 50       151 croak "Version line missed in '$file" unless defined $version_string;
652 60 100       368 if ($version_string =~ /^BagIt-Version: ([01]\.[0-9]+)$/) {
653 57         326 return $1;
654             } else {
655 3         30 $version_string =~ s/\r//;
656 3         10 $version_string =~ s/^\N{U+FEFF}//;
657 3         58 croak "Version string '$version_string' of '$file' is incorrect";
658             };
659             }
660              
661             sub _build_bag_encoding {
662 56     56   493 my($self) = @_;
663 56         111 my ($version_string, $encoding_string, $file) = $self->__build_read_bagit_txt();
664 56 100       173 croak "Encoding line missed in '$file" unless defined $encoding_string;
665 55 50       115 croak "Encoding '$encoding_string' of '$file' not supported by current Archive::BagIt module!" unless ($encoding_string ne "UTF-8");
666 55         200 return $encoding_string;
667             }
668              
669             sub __sort_bag_info {
670             my @sorted = sort {
671 1     1   19 my %tmpa = %{$a};
  11         12  
  11         18  
672 11         12 my %tmpb = %{$b};
  11         21  
673 11         16 my ($ka, $va) = each %tmpa;
674 11         16 my ($kb, $vb) = each %tmpb;
675 11         13 my $kres = $ka cmp $kb;
676 11 100       17 if ($kres != 0) {
677 10         29 return $kres;
678             } else {
679 1         3 return $va cmp $vb;
680             }
681             } @_;
682 1         4 return @sorted;
683             }
684              
685             sub _parse_bag_info { # parses a bag-info textblob
686 757     757   584719 my ($self, $textblob) = @_;
687             # metadata elements are OPTIONAL and MAY be repeated. Because "bag-
688             # info.txt" is intended for human reading and editing, ordering MAY be
689             # significant and the ordering of metadata elements MUST be preserved.
690             #
691             # A metadata element MUST consist of a label, a colon ":", a single
692             # linear whitespace character (space or tab), and a value that is
693             # terminated with an LF, a CR, or a CRLF.
694             #
695             # The label MUST NOT contain a colon (:), LF, or CR. The label MAY
696             # contain linear whitespace characters but MUST NOT start or end with
697             # whitespace.
698             #
699             # It is RECOMMENDED that lines not exceed 79 characters in length.
700             # Long values MAY be continued onto the next line by inserting a LF,
701             # CR, or CRLF, and then indenting the next line with one or more linear
702             # white space characters (spaces or tabs). Except for linebreaks, such
703             # padding does not form part of the value.
704             #
705             # Implementations wishing to support previous BagIt versions MUST
706             # accept multiple linear whitespace characters before and after the
707             # colon when the bag version is earlier than 1.0; such whitespace does
708             # not form part of the label or value.
709             # find all labels
710 757         1019 my @labels;
711 757         5747 while ($textblob =~ s/^([^:\s]+)\s*:\s*//m) { # label if starts with chars not colon or whitespace followed by zero or more spaces, a colon, zero or more spaces
712             # label found
713 924         2275 my $label = $1;
714 924         1176 my $value='';
715 924 100       43402 if ($textblob =~ s/(.+?)(?=^\S)//ms) {
    50          
716             # value if rest string starts with chars not \r and/or \n until a non-whitespace after \r\n
717 167         348 $value = chomp_portable($1);
718             } elsif ($textblob =~ s/(.*)//s) {
719 757         1863 $value = chomp_portable($1);
720             }
721 924 50       1824 if (defined $label) {
722 924         4255 push @labels, { $label => $value };
723             }
724             }
725             # The RFC does not allow reordering:
726             #my @sorted = __sort_bag_info(@labels);
727             #return \@sorted;
728 757         1934 return \@labels;
729             }
730              
731             sub _build_bag_info {
732 60     60   2792 my ($self) = @_;
733 60         996 my $bagit = $self->metadata_path;
734 60         1038 my $file = File::Spec->catfile($bagit, "bag-info.txt");
735 60 100       1293 if (-e $file) {
736 42 50       1470 open(my $BAGINFO, "<:encoding(UTF-8)", $file) or croak("Cannot read $file: $!");
737 42         2747 my @lines;
738 42         998 while ( my $line = <$BAGINFO>) {
739 197         1234 push @lines, $line;
740             }
741 42         506 close($BAGINFO);
742 42         175 my $lines = join("", @lines);
743 42         137 return $self->_parse_bag_info($lines);
744             }
745             # bag-info.txt is optional
746 18         91 return;
747             }
748              
749             sub _build_non_payload_files {
750 35     35   623 my ($self) = @_;
751 35         547 my $non_payload_dir = $self->metadata_path();
752 35         1918 my $reldir = File::Spec->abs2rel($non_payload_dir, $self->bag_path());
753 35         166 $reldir =~ s/^\.$//;
754             my @non_payload = map {
755 35 50       741 $reldir eq "" ? $_ : File::Spec->catfile($reldir, $_)
  174         321  
756             } $self->__file_find($non_payload_dir, File::Spec->rel2abs($self->payload_path));
757 35 50       276 return wantarray ? @non_payload : \@non_payload;
758             }
759              
760             sub _build_forced_fixity_algorithm {
761 48     48   417 my ($self) = @_;
762 48 50       148 if ($self->use_plugins()) {
763 0         0 return;
764             } else {
765 48 100       734 if ($self->bag_version() >= 1.0) {
766 32         923 return Archive::BagIt::Plugin::Algorithm::SHA512->new(bagit => $self);
767             }
768             else {
769 16         479 return Archive::BagIt::Plugin::Algorithm::MD5->new(bagit => $self);
770             }
771             }
772             }
773              
774             ###############################################
775              
776              
777             sub load_plugins {
778 251     251 1 523 my ($self, @plugins) = @_;
779              
780             #p(@plugins);
781 251         501 my $loaded_plugins = $self->plugins;
782 251         395 @plugins = grep { not exists $loaded_plugins->{$_} } @plugins;
  334         959  
783              
784 251 50       572 return if @plugins == 0;
785 251         446 foreach my $plugin (@plugins) {
786 334 50       1208 load_class ($plugin) or croak ("Can't load $plugin");
787 334         26849 $plugin->new({bagit => $self});
788             }
789              
790 251         1243 return 1;
791             }
792              
793             ###############################################
794              
795              
796             sub load {
797 1     1 1 1418 my ($self) = @_;
798             # call trigger
799 1         4 $self->bag_path;
800 1         27 $self->bag_version;
801 1         25 $self->bag_encoding;
802 1         25 $self->bag_info;
803 1         21 $self->payload_path;
804 1         23 $self->manifest_files;
805 1         23 $self->checksum_algos;
806 1         23 $self->tagmanifest_files;
807 1         13 return 1;
808             }
809              
810             ###############################################
811              
812              
813             sub verify_bag {
814 61     61 1 32222 my ($self,$opts) = @_;
815             #removed the ability to pass in a bag in the parameters, but might want options
816             #like $return all errors rather than dying on first one
817 61         159 my $bagit = $self->bag_path;
818 61         1380 my $version = $self->bag_version(); # to call trigger
819 57         1286 my $encoding = $self->bag_encoding(); # to call trigger
820 56         178 my $baginfo = $self->verify_baginfo(); #to call trigger
821              
822 56         1301 my $fetch_file = File::Spec->catfile($self->metadata_path, "fetch.txt");
823 56         1624 my $payload_dir = $self->payload_path;
824 56         364 my $return_all_errors = $opts->{return_all_errors};
825              
826 56 100       1215 if (-f $fetch_file) {
827 7         104 croak("Fetching via file '$fetch_file' is not supported by current Archive::BagIt implementation")
828             }
829             # check forced fixity
830 49 50       177 if ($self->has_forced_fixity_algorithm()) {
831 49         1001 my $forced_fixity_alg = $self->forced_fixity_algorithm()->name();
832 49         1067 my $manifest_file = File::Spec->catfile($self->metadata_path, "manifest-$forced_fixity_alg.txt");
833 49 100       1555 croak("Manifest '$manifest_file' is not a regular file or does not exist for given bagit version '$version'") unless -f ($manifest_file);
834             }
835 48 50       560 croak("Payload-directory '$payload_dir' is not a directory or does not exist") unless -d ($payload_dir);
836              
837 48 50       193 unless ($version > .95) {
838 0         0 croak ("Bag Version $version is unsupported");
839             }
840              
841              
842 48         74 my @errors;
843              
844              
845             # check for manifests
846 48         57 foreach my $algorithm ( keys %{ $self->manifests }) {
  48         1090  
847 85         1864 my $res = $self->manifests->{$algorithm}->verify_manifest($self->payload_files, $return_all_errors);
848 67 50 66     298 if ((defined $res) && ($res ne "1")) { push @errors, $res; }
  0         0  
849             }
850             #check for tagmanifests
851 30         58 foreach my $algorithm ( keys %{ $self->manifests }) {
  30         649  
852 57         1232 my $res = $self->manifests->{$algorithm}->verify_tagmanifest($self->non_payload_files, $return_all_errors);
853 53 50 66     241 if ((defined $res) && ($res ne "1")) { push @errors, $res; }
  0         0  
854             }
855 26         52 push @{$self->{errors}}, @errors;
  26         72  
856 26         578 my $err = $self->errors();
857 26         224 my @err = @{ $err };
  26         60  
858 26 100       65 if (scalar( @err ) > 0) {
859 7         173 croak join("\n","bag verify for bagit version '$version' failed with invalid files.", @err);
860             }
861 19         95 return 1;
862             }
863              
864              
865             sub calc_payload_oxum {
866 79     79 1 123 my($self) = @_;
867 79         81 my @payload = @{$self->payload_files};
  79         1477  
868 79         320 my $octets=0;
869 79         98 my $streamcount = scalar @payload;
870 79         138 foreach my $local_name (@payload) {# local_name is relative to bagit base
871 190         1497 my $file = File::Spec->catfile($self->bag_path(), $local_name);
872 190 50       2345 if (-e $file) {
873 190         397 my $filesize = 0;
874 190 100       2138 $filesize = -s $file or carp "empty file $file detected";
875 190         491 $octets += $filesize;
876 0         0 } else { croak "file $file does not exist, $!"; }
877             }
878 79         303 return ($octets, $streamcount);
879             }
880              
881              
882             sub calc_bagsize {
883 16     16 1 42 my($self) = @_;
884 16         52 my ($octets,$streamcount) = $self->calc_payload_oxum();
885 16 50       54 if ($octets < 1024) { return "$octets B"; }
  16 0       88  
    0          
    0          
886 0         0 elsif ($octets < 1024*1024) {return sprintf("%0.1f kB", $octets/1024); }
887 0         0 elsif ($octets < 1024*1024*1024) {return sprintf "%0.1f MB", $octets/(1024*1024); }
888 0         0 elsif ($octets < 1024*1024*1024*1024) {return sprintf "%0.1f GB", $octets/(1024*1024*1024); }
889 0         0 else { return sprintf "%0.2f TB", $octets/(1024*1024*1024*1024); }
890             }
891              
892              
893             sub create_bagit {
894 16     16 1 27 my($self) = @_;
895 16         322 my $metadata_path = $self->metadata_path();
896 16         270 my $bagit_path = File::Spec->catfile( $metadata_path, "bagit.txt");
897 16 50       1263 open(my $BAGIT, ">:encoding(UTF-8)", $bagit_path) or croak("Can't open $bagit_path for writing: $!");
898 16         1318 print($BAGIT "BagIt-Version: 1.0\nTag-File-Character-Encoding: UTF-8");
899 16         1003 close($BAGIT);
900 16         103 return 1;
901             }
902              
903              
904             sub create_baginfo {
905 16     16 1 50 my($self) = @_; # because bag-info.txt allows multiple key-value-entries, hash is replaced
906 16         931 $self->add_or_replace_baginfo_by_key('Bagging-Date', POSIX::strftime("%Y-%m-%d", gmtime(time)));
907 16         50 $self->add_or_replace_baginfo_by_key('Bag-Software-Agent', 'Archive::BagIt ');
908 16         67 my ($octets, $streams) = $self->calc_payload_oxum();
909 16         89 $self->add_or_replace_baginfo_by_key('Payload-Oxum', "$octets.$streams");
910 16         55 $self->add_or_replace_baginfo_by_key('Bag-Size', $self->calc_bagsize());
911             # The RFC does not allow reordering:
912 16         352 my $metadata_path = $self->metadata_path();
913 16         258 my $bag_info_path = File::Spec->catfile( $metadata_path, "bag-info.txt");
914 16 50       1109 open(my $BAGINFO, ">:encoding(UTF-8)", $bag_info_path) or croak("Can't open $bag_info_path for writing: $!");
915 16         1110 foreach my $entry (@{ $self->bag_info() }) {
  16         395  
916 64         196 my %tmp = %{ $entry };
  64         176  
917 64         128 my ($key, $value) = %tmp;
918 64 50       146 if (-1 < index($key,":")) { carp "key should not contain a colon! (searchkey='$key')"; }
  0         0  
919 64         262 print($BAGINFO "$key: $value\n");
920             }
921 16         1030 close($BAGINFO);
922 16         104 return 1;
923             }
924              
925              
926             sub store {
927 16     16 1 33 my($self) = @_;
928 16         47 $self->create_bagit();
929 16         76 $self->create_baginfo();
930             # it is important to create all manifest files first, because tagmanifest should include all manifest-xxx.txt
931 16         21 foreach my $algorithm ( keys %{ $self->manifests }) {
  16         418  
932 32         802 $self->manifests->{$algorithm}->create_manifest();
933             }
934 16         44 foreach my $algorithm ( keys %{ $self->manifests }) {
  16         423  
935 32         847 $self->manifests->{$algorithm}->create_tagmanifest();
936             }
937             # retrigger builds
938 16         137 $self->{checksum_algos} = $self->_build_checksum_algos();
939 16         55 $self->{tagmanifest_files} = $self->_build_tagmanifest_files();
940 16         83 $self->{manifest_files} = $self->_build_manifest_files();
941 16         24 return 1;
942             }
943              
944              
945             sub init_metadata {
946 16     16 1 40 my ($class, $bag_path, $options) = @_;
947 16         56 $bag_path =~ s#/$##; # replace trailing slash
948 16 50       237 unless ( -d $bag_path) { croak ( "source bag directory '$bag_path' doesn't exist"); }
  0         0  
949 16         490 my $self = $class->new(bag_path=>$bag_path, %$options);
950 16 100       290 carp "no payload path" if ! -d $self->payload_path;
951 16 100       380 unless ( -d $self->payload_path) {
952 2         198 rename ($bag_path, $bag_path.".tmp");
953 2         121 mkdir ($bag_path);
954 2         66 rename ($bag_path.".tmp", $self->payload_path);
955             }
956 16 50       698 unless ( -d $self->metadata_path) {
957             #metadata path is not the root path for some reason
958 0         0 mkdir ($self->metadata_path);
959             }
960 16         85 $self->store();
961 16         50 return $self;
962             }
963              
964              
965             sub make_bag {
966 16     16 1 45731 my ($class, $bag_path, $options) = @_;
967 16         34 my $isa = ref $class;
968 16 50       52 if ($isa eq "Archive::BagIt") { # not a class, but an object!
969 0         0 croak "make_bag() only a class subroutine, not useable with objects. Try store() instead!\n";
970             }
971 16         50 my $self = $class->init_metadata($bag_path, $options);
972 16         58 return $self;
973             }
974              
975              
976              
977              
978              
979             __PACKAGE__->meta->make_immutable;
980              
981             1;
982              
983             __END__