| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Catmandu::Importer::BagIt; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '0.260'; |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 NAME |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
Catmandu::Importer::BagIt - Package that imports BagIt data |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
use Catmandu::Importer::BagIt |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
my $importer = Catmandu::Importer::BagIt->new( |
|
14
|
|
|
|
|
|
|
bags => "/my/bags/*" , |
|
15
|
|
|
|
|
|
|
); |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
my $importer = Catmandu::Importer::BagIt->new( |
|
18
|
|
|
|
|
|
|
bags => ["directory1","directory2"] , |
|
19
|
|
|
|
|
|
|
include_manifests => 0 , |
|
20
|
|
|
|
|
|
|
include_payloads => 0 , |
|
21
|
|
|
|
|
|
|
verify => 1 |
|
22
|
|
|
|
|
|
|
); |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
my $n = $importer->each(sub { |
|
25
|
|
|
|
|
|
|
my $hashref = $_[0]; |
|
26
|
|
|
|
|
|
|
# ... |
|
27
|
|
|
|
|
|
|
}); |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
To convert BagIt directories into a JSON representation with the L<catmandu> command line client: |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# Use a glob to find all directories in /my/path/ |
|
32
|
|
|
|
|
|
|
catmandu convert BagIt --bags '/my/path/*' --verify 1 |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=head1 BagIt |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
The parsed BagIt record is a HASH containing the key '_id' containing the BagIt directory name |
|
37
|
|
|
|
|
|
|
and one or more fields: |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
{ |
|
40
|
|
|
|
|
|
|
'_id' => 'bags/demo01', |
|
41
|
|
|
|
|
|
|
'version' => '0.97', |
|
42
|
|
|
|
|
|
|
'tags' => { |
|
43
|
|
|
|
|
|
|
'Bagging-Date' => '2014-10-03', |
|
44
|
|
|
|
|
|
|
'Bag-Size' => '90.8 KB', |
|
45
|
|
|
|
|
|
|
'Payload-Oxum' => '92877.1' |
|
46
|
|
|
|
|
|
|
}, |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
# If the verify option is true |
|
49
|
|
|
|
|
|
|
'is_valid' => 1, |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# If the include_payloads option is true |
|
52
|
|
|
|
|
|
|
'payload_files' => [ |
|
53
|
|
|
|
|
|
|
'data', |
|
54
|
|
|
|
|
|
|
'data/Catmandu-0.9204.tar.gz' |
|
55
|
|
|
|
|
|
|
], |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
'non_payload_files' => [], |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# If the include_manifests option is true |
|
60
|
|
|
|
|
|
|
'manifest' => { |
|
61
|
|
|
|
|
|
|
'data/Catmandu-0.9204.tar.gz' => 'c8accb44741272d63f6e0d72f34b0fde' |
|
62
|
|
|
|
|
|
|
}, |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
'tagmanifest' => { |
|
65
|
|
|
|
|
|
|
'manifest-md5.txt' => '48e8a074bfe09aa17aa2ca4086b48608', |
|
66
|
|
|
|
|
|
|
'bag-info.txt' => '74a18a1c9f491f7f2360cbd25bb2143e', |
|
67
|
|
|
|
|
|
|
'bagit.txt' => '9e5ad981e0d29adc278f6a294b8c2aca' |
|
68
|
|
|
|
|
|
|
}, |
|
69
|
|
|
|
|
|
|
}; |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=head1 METHODS |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
This module inherits all methods of L<Catmandu::Importer> and by this |
|
74
|
|
|
|
|
|
|
L<Catmandu::Iterable>. |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=head1 CONFIGURATION |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
In addition to the configuration provided by L<Catmandu::Importer> the importer can |
|
79
|
|
|
|
|
|
|
be configured with the following parameters: |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=over |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=item bags |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
Required. An array reference pointing to zero or more BagIt directories. Or, a string that can |
|
86
|
|
|
|
|
|
|
be used as a glob pointing to zero more more directories. |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=item include_manifests |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
If set to a true value, then all manifest files will be parsed and included into the BagIt record. |
|
91
|
|
|
|
|
|
|
Be aware, these checksums will be invalid as soon a you manipulate the BagIt record or files on disk. |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=item include_payloads |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
If set to a true value, then all payloads locations will be parsed and included in the BagIt record. |
|
96
|
|
|
|
|
|
|
Be aware, changing the payload sections will be store new data on disk. |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
=back |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
L<Catmandu>, |
|
103
|
|
|
|
|
|
|
L<Catmandu::Importer>, |
|
104
|
|
|
|
|
|
|
L<Archive::BagIt> |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=head1 AUTHOR |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
Patrick Hochstenbach <Patrick.Hochstenbach@UGent.be> |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
This software is copyright (c) 2014 by Patrick Hochstenbach. |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
|
115
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=cut |
|
118
|
|
|
|
|
|
|
|
|
119
|
2
|
|
|
2
|
|
251561
|
use namespace::clean; |
|
|
2
|
|
|
|
|
16598
|
|
|
|
2
|
|
|
|
|
12
|
|
|
120
|
2
|
|
|
2
|
|
1361
|
use Catmandu::Sane; |
|
|
2
|
|
|
|
|
357979
|
|
|
|
2
|
|
|
|
|
15
|
|
|
121
|
2
|
|
|
2
|
|
721
|
use Catmandu::Util qw(:is); |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
586
|
|
|
122
|
2
|
|
|
2
|
|
1348
|
use Catmandu::BagIt; |
|
|
2
|
|
|
|
|
8
|
|
|
|
2
|
|
|
|
|
82
|
|
|
123
|
2
|
|
|
2
|
|
15
|
use Moo; |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
13
|
|
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
with 'Catmandu::Importer'; |
|
126
|
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
has bags => (is => 'ro' , required => 1); |
|
128
|
|
|
|
|
|
|
has include_manifests => (is => 'ro' , default => sub { undef }); |
|
129
|
|
|
|
|
|
|
has include_payloads => (is => 'ro' , default => sub { undef }); |
|
130
|
|
|
|
|
|
|
has verify => (is => 'ro' , default => sub { undef }); |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
sub generator { |
|
133
|
|
|
|
|
|
|
my ($self) = @_; |
|
134
|
|
|
|
|
|
|
my @bags; |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
if (is_array_ref($self->bags)) { |
|
137
|
|
|
|
|
|
|
@bags = @{ $self->bags }; |
|
138
|
|
|
|
|
|
|
} |
|
139
|
|
|
|
|
|
|
else { |
|
140
|
|
|
|
|
|
|
for (glob($self->bags)) { |
|
141
|
|
|
|
|
|
|
push @bags , $_ if -d $_; |
|
142
|
|
|
|
|
|
|
} |
|
143
|
|
|
|
|
|
|
} |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
sub { |
|
146
|
|
|
|
|
|
|
my $dir = shift @bags; |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
return undef unless defined $dir && -r $dir; |
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
my $bag = $self->read_bag($dir); |
|
151
|
|
|
|
|
|
|
return undef unless defined $bag; |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
$bag; |
|
154
|
|
|
|
|
|
|
}; |
|
155
|
|
|
|
|
|
|
} |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
sub read_bag { |
|
158
|
3
|
|
|
3
|
0
|
10
|
my ($self,$dir) = @_; |
|
159
|
3
|
|
|
|
|
24
|
my $bagit = Catmandu::BagIt->read($dir); |
|
160
|
|
|
|
|
|
|
|
|
161
|
3
|
|
|
|
|
17
|
my $item = { |
|
162
|
|
|
|
|
|
|
_id => $dir , |
|
163
|
|
|
|
|
|
|
version => $bagit->version , |
|
164
|
|
|
|
|
|
|
}; |
|
165
|
|
|
|
|
|
|
|
|
166
|
3
|
50
|
|
|
|
15
|
if ($self->verify) { |
|
167
|
3
|
100
|
|
|
|
13
|
$item->{is_valid} = $bagit->valid ? 1 : 0; |
|
168
|
|
|
|
|
|
|
} |
|
169
|
|
|
|
|
|
|
|
|
170
|
3
|
|
|
|
|
15
|
for my $tag ($bagit->list_info_tags) { |
|
171
|
11
|
|
|
|
|
30
|
my @values = $bagit->get_info($tag); |
|
172
|
11
|
|
|
|
|
55
|
$item->{tags}->{$tag} = join "" , @values; |
|
173
|
|
|
|
|
|
|
} |
|
174
|
|
|
|
|
|
|
|
|
175
|
3
|
100
|
|
|
|
56
|
if ($self->include_payloads) { |
|
176
|
2
|
|
|
|
|
8
|
$item->{payload_files} = [ map { "data/" . $_->filename } $bagit->list_files ]; |
|
|
3
|
|
|
|
|
15
|
|
|
177
|
2
|
|
|
|
|
6
|
$item->{non_payload_files} = [ $bagit->list_tagsum ]; |
|
178
|
|
|
|
|
|
|
} |
|
179
|
|
|
|
|
|
|
|
|
180
|
3
|
50
|
|
|
|
13
|
if ($self->include_manifests) { |
|
181
|
|
|
|
|
|
|
|
|
182
|
3
|
|
|
|
|
10
|
for my $file ($bagit->list_tagsum) { |
|
183
|
9
|
|
|
|
|
61
|
my $sum = $bagit->get_tagsum($file); |
|
184
|
9
|
|
|
|
|
32
|
$item->{tagmanifest}->{$file} = $sum; |
|
185
|
|
|
|
|
|
|
} |
|
186
|
|
|
|
|
|
|
|
|
187
|
3
|
|
|
|
|
13
|
for my $file ($bagit->list_checksum) { |
|
188
|
21
|
|
|
|
|
407
|
my $sum = $bagit->get_checksum($file); |
|
189
|
21
|
|
|
|
|
70
|
$item->{manifest}->{"data/$file"} = $sum; |
|
190
|
|
|
|
|
|
|
} |
|
191
|
|
|
|
|
|
|
} |
|
192
|
|
|
|
|
|
|
|
|
193
|
3
|
|
|
|
|
41
|
$item; |
|
194
|
|
|
|
|
|
|
} |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
1; |