line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Catmandu::Importer::BagIt; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '0.250'; |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 NAME |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
Catmandu::Importer::BagIt - Package that imports BagIt data |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 SYNOPSIS |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
use Catmandu::Importer::BagIt |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
my $importer = Catmandu::Importer::BagIt->new( |
14
|
|
|
|
|
|
|
bags => "/my/bags/*" , |
15
|
|
|
|
|
|
|
); |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
my $importer = Catmandu::Importer::BagIt->new( |
18
|
|
|
|
|
|
|
bags => ["directory1","directory2"] , |
19
|
|
|
|
|
|
|
include_manifests => 0 , |
20
|
|
|
|
|
|
|
include_payloads => 0 , |
21
|
|
|
|
|
|
|
verify => 1 |
22
|
|
|
|
|
|
|
); |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
my $n = $importer->each(sub { |
25
|
|
|
|
|
|
|
my $hashref = $_[0]; |
26
|
|
|
|
|
|
|
# ... |
27
|
|
|
|
|
|
|
}); |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
To convert BagIt directories into a JSON representation with the L<catmandu> command line client: |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# Use a glob to find all directories in /my/path/ |
32
|
|
|
|
|
|
|
catmandu convert BagIt --bags '/my/path/*' --verify 1 |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=head1 BagIt |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
The parsed BagIt record is a HASH containing the key '_id' containing the BagIt directory name |
37
|
|
|
|
|
|
|
and one or more fields: |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
{ |
40
|
|
|
|
|
|
|
'_id' => 'bags/demo01', |
41
|
|
|
|
|
|
|
'version' => '0.97', |
42
|
|
|
|
|
|
|
'tags' => { |
43
|
|
|
|
|
|
|
'Bagging-Date' => '2014-10-03', |
44
|
|
|
|
|
|
|
'Bag-Size' => '90.8 KB', |
45
|
|
|
|
|
|
|
'Payload-Oxum' => '92877.1' |
46
|
|
|
|
|
|
|
}, |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
# If the verify option is true |
49
|
|
|
|
|
|
|
'is_valid' => 1, |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# If the include_payloads option is true |
52
|
|
|
|
|
|
|
'payload_files' => [ |
53
|
|
|
|
|
|
|
'data', |
54
|
|
|
|
|
|
|
'data/Catmandu-0.9204.tar.gz' |
55
|
|
|
|
|
|
|
], |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
'non_payload_files' => [], |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# If the include_manifests option is true |
60
|
|
|
|
|
|
|
'manifest' => { |
61
|
|
|
|
|
|
|
'data/Catmandu-0.9204.tar.gz' => 'c8accb44741272d63f6e0d72f34b0fde' |
62
|
|
|
|
|
|
|
}, |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
'tagmanifest' => { |
65
|
|
|
|
|
|
|
'manifest-md5.txt' => '48e8a074bfe09aa17aa2ca4086b48608', |
66
|
|
|
|
|
|
|
'bag-info.txt' => '74a18a1c9f491f7f2360cbd25bb2143e', |
67
|
|
|
|
|
|
|
'bagit.txt' => '9e5ad981e0d29adc278f6a294b8c2aca' |
68
|
|
|
|
|
|
|
}, |
69
|
|
|
|
|
|
|
}; |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=head1 METHODS |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
This module inherits all methods of L<Catmandu::Importer> and by this |
74
|
|
|
|
|
|
|
L<Catmandu::Iterable>. |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=head1 CONFIGURATION |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
In addition to the configuration provided by L<Catmandu::Importer> the importer can |
79
|
|
|
|
|
|
|
be configured with the following parameters: |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=over |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=item bags |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
Required. An array reference pointing to zero or more BagIt directories. Or, a string that can |
86
|
|
|
|
|
|
|
be used as a glob pointing to zero more more directories. |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=item include_manifests |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
If set to a true value, then all manifest files will be parsed and included into the BagIt record. |
91
|
|
|
|
|
|
|
Be aware, these checksums will be invalid as soon a you manipulate the BagIt record or files on disk. |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=item include_payloads |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
If set to a true value, then all payloads locations will be parsed and included in the BagIt record. |
96
|
|
|
|
|
|
|
Be aware, changing the payload sections will be store new data on disk. |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
=back |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=head1 SEE ALSO |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
L<Catmandu>, |
103
|
|
|
|
|
|
|
L<Catmandu::Importer>, |
104
|
|
|
|
|
|
|
L<Archive::BagIt> |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=head1 AUTHOR |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
Patrick Hochstenbach <Patrick.Hochstenbach@UGent.be> |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
This software is copyright (c) 2014 by Patrick Hochstenbach. |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
115
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=cut |
118
|
|
|
|
|
|
|
|
119
|
2
|
|
|
2
|
|
239686
|
use namespace::clean; |
|
2
|
|
|
|
|
15781
|
|
|
2
|
|
|
|
|
13
|
|
120
|
2
|
|
|
2
|
|
1283
|
use Catmandu::Sane; |
|
2
|
|
|
|
|
352151
|
|
|
2
|
|
|
|
|
15
|
|
121
|
2
|
|
|
2
|
|
651
|
use Catmandu::Util qw(:is); |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
546
|
|
122
|
2
|
|
|
2
|
|
1382
|
use Catmandu::BagIt; |
|
2
|
|
|
|
|
9
|
|
|
2
|
|
|
|
|
84
|
|
123
|
2
|
|
|
2
|
|
14
|
use Moo; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
9
|
|
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
with 'Catmandu::Importer'; |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
has bags => (is => 'ro' , required => 1); |
128
|
|
|
|
|
|
|
has include_manifests => (is => 'ro' , default => sub { undef }); |
129
|
|
|
|
|
|
|
has include_payloads => (is => 'ro' , default => sub { undef }); |
130
|
|
|
|
|
|
|
has verify => (is => 'ro' , default => sub { undef }); |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
sub generator { |
133
|
|
|
|
|
|
|
my ($self) = @_; |
134
|
|
|
|
|
|
|
my @bags; |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
if (is_array_ref($self->bags)) { |
137
|
|
|
|
|
|
|
@bags = @{ $self->bags }; |
138
|
|
|
|
|
|
|
} |
139
|
|
|
|
|
|
|
else { |
140
|
|
|
|
|
|
|
for (glob($self->bags)) { |
141
|
|
|
|
|
|
|
push @bags , $_ if -d $_; |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
sub { |
146
|
|
|
|
|
|
|
my $dir = shift @bags; |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
return undef unless defined $dir && -r $dir; |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
my $bag = $self->read_bag($dir); |
151
|
|
|
|
|
|
|
return undef unless defined $bag; |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
$bag; |
154
|
|
|
|
|
|
|
}; |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
sub read_bag { |
158
|
3
|
|
|
3
|
0
|
9
|
my ($self,$dir) = @_; |
159
|
3
|
|
|
|
|
24
|
my $bagit = Catmandu::BagIt->read($dir); |
160
|
|
|
|
|
|
|
|
161
|
3
|
|
|
|
|
18
|
my $item = { |
162
|
|
|
|
|
|
|
_id => $dir , |
163
|
|
|
|
|
|
|
version => $bagit->version , |
164
|
|
|
|
|
|
|
}; |
165
|
|
|
|
|
|
|
|
166
|
3
|
50
|
|
|
|
16
|
if ($self->verify) { |
167
|
3
|
100
|
|
|
|
12
|
$item->{is_valid} = $bagit->valid ? 1 : 0; |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
|
170
|
3
|
|
|
|
|
15
|
for my $tag ($bagit->list_info_tags) { |
171
|
11
|
|
|
|
|
27
|
my @values = $bagit->get_info($tag); |
172
|
11
|
|
|
|
|
45
|
$item->{tags}->{$tag} = join "" , @values; |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
|
175
|
3
|
100
|
|
|
|
40
|
if ($self->include_payloads) { |
176
|
2
|
|
|
|
|
8
|
$item->{payload_files} = [ map { "data/" . $_->filename } $bagit->list_files ]; |
|
3
|
|
|
|
|
14
|
|
177
|
2
|
|
|
|
|
7
|
$item->{non_payload_files} = [ $bagit->list_tagsum ]; |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
|
180
|
3
|
50
|
|
|
|
16
|
if ($self->include_manifests) { |
181
|
|
|
|
|
|
|
|
182
|
3
|
|
|
|
|
11
|
for my $file ($bagit->list_tagsum) { |
183
|
9
|
|
|
|
|
58
|
my $sum = $bagit->get_tagsum($file); |
184
|
9
|
|
|
|
|
32
|
$item->{tagmanifest}->{$file} = $sum; |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
|
187
|
3
|
|
|
|
|
10
|
for my $file ($bagit->list_checksum) { |
188
|
21
|
|
|
|
|
45
|
my $sum = $bagit->get_checksum($file); |
189
|
21
|
|
|
|
|
71
|
$item->{manifest}->{"data/$file"} = $sum; |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
|
193
|
3
|
|
|
|
|
43
|
$item; |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
1; |