| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Catmandu::Exporter::BagIt; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '0.250'; |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 NAME |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
Catmandu::Exporter::BagIt - Package that exports data as BagIts |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
use Catmandu::Exporter::BagIt; |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
my $exporter = Catmandu::Exporter::BagIt->new( |
|
14
|
|
|
|
|
|
|
overwrite => 0 , |
|
15
|
|
|
|
|
|
|
); |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
$exporter->add($bagit_record); |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
$exporter->commit; |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=head1 BagIt |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
The parsed BagIt record is a HASH containing the key '_id' containing the BagIt directory name |
|
24
|
|
|
|
|
|
|
and one or more fields: |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
{ |
|
27
|
|
|
|
|
|
|
'_id' => 'bags/demo01', |
|
28
|
|
|
|
|
|
|
'version' => '0.97', # Not required, all bags will be 0.97 |
|
29
|
|
|
|
|
|
|
'tags' => { |
|
30
|
|
|
|
|
|
|
'Bagging-Date' => '2014-10-03', # Not required, generated ... |
|
31
|
|
|
|
|
|
|
'Bag-Software-Agent' => 'FooBar', # Not required, generated ... |
|
32
|
|
|
|
|
|
|
'DC-Title' => 'My downloads' , |
|
33
|
|
|
|
|
|
|
'DC-Creator' => 'Bunny, Bugs' , |
|
34
|
|
|
|
|
|
|
}, |
|
35
|
|
|
|
|
|
|
}, |
|
36
|
|
|
|
|
|
|
'fetch' => [ |
|
37
|
|
|
|
|
|
|
{ 'http://server/download1.pdf' => 'data/my_download1.pdf' } , |
|
38
|
|
|
|
|
|
|
{ 'http://server2/download2.pdf' => 'data/my_download2.pdf' } , |
|
39
|
|
|
|
|
|
|
], |
|
40
|
|
|
|
|
|
|
}; |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
All URL's in the fetch array will be mirrored and added to the bag. All payload files should |
|
43
|
|
|
|
|
|
|
be put in the 'data' subdirectory as shown in the example above. |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
You can also add files from disk, using the "files" array: |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
{ |
|
48
|
|
|
|
|
|
|
'_id' => 'bags/demo01', |
|
49
|
|
|
|
|
|
|
'files' => [ |
|
50
|
|
|
|
|
|
|
{ '/tmp/download1.pdf' => 'data/my_download1.pdf' } , |
|
51
|
|
|
|
|
|
|
{ '/tmp/download2.pdf' => 'data/my_download2.pdf' } , |
|
52
|
|
|
|
|
|
|
], |
|
53
|
|
|
|
|
|
|
}; |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=head1 METHODS |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
This module inherits all methods of L<Catmandu::Exporter>. |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=head1 CONFIGURATION |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
In addition to the configuration provided by L<Catmandu::Exporter> the exporter can |
|
62
|
|
|
|
|
|
|
be configured with the following parameters: |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
=over |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=item ignore_existing |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
Optional. Skip an item when the BagIt for it already exists. |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=item overwrite |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
Optional. Throws an Catmandu::Error when the exporter tries to overwrite an existing directory. |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=back |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
L<Catmandu>, |
|
79
|
|
|
|
|
|
|
L<Catmandu::Exporter>, |
|
80
|
|
|
|
|
|
|
L<Archive::BagIt> |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=head1 AUTHOR |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
Patrick Hochstenbach <Patrick.Hochstenbach@UGent.be> |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
This software is copyright (c) 2014 by Patrick Hochstenbach. |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
|
91
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=cut |
|
94
|
|
|
|
|
|
|
|
|
95
|
1
|
|
|
1
|
|
819
|
use namespace::clean; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
7
|
|
|
96
|
1
|
|
|
1
|
|
206
|
use Catmandu::Sane; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
7
|
|
|
97
|
1
|
|
|
1
|
|
275
|
use Catmandu::BagIt; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
35
|
|
|
98
|
1
|
|
|
1
|
|
7
|
use Path::Tiny; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
61
|
|
|
99
|
1
|
|
|
1
|
|
7
|
use File::Spec; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
22
|
|
|
100
|
1
|
|
|
1
|
|
5
|
use IO::File; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
161
|
|
|
101
|
1
|
|
|
1
|
|
489
|
use LWP::Simple; |
|
|
1
|
|
|
|
|
8289
|
|
|
|
1
|
|
|
|
|
6
|
|
|
102
|
1
|
|
|
1
|
|
359
|
use Moo; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
8
|
|
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
with 'Catmandu::Exporter'; |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
has user_agent => (is => 'ro'); |
|
107
|
|
|
|
|
|
|
has ignore_existing => (is => 'ro' , default => sub { 0 }); |
|
108
|
|
|
|
|
|
|
has overwrite => (is => 'ro' , default => sub { 0 }); |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
sub _mtime { |
|
111
|
0
|
|
|
0
|
|
|
my $file = $_[0]; |
|
112
|
0
|
|
|
|
|
|
my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks) = stat($file); |
|
113
|
0
|
|
|
|
|
|
return $mtime; |
|
114
|
|
|
|
|
|
|
} |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
sub add { |
|
117
|
|
|
|
|
|
|
my ($self, $data) = @_; |
|
118
|
|
|
|
|
|
|
my $directory = $data->{_id}; |
|
119
|
|
|
|
|
|
|
$directory =~ s{\/$}{}; |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
return 1 if -d $directory && $self->ignore_existing; |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
Catmandu::Error->throw("$directory exists") if -d $directory && ! $self->overwrite; |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
my $bagit = defined($self->user_agent) ? |
|
126
|
|
|
|
|
|
|
Catmandu::BagIt->new(user_agent => $self->user_agent) : |
|
127
|
|
|
|
|
|
|
Catmandu::BagIt->new(); |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
if (exists $data->{tags}) { |
|
130
|
|
|
|
|
|
|
for my $tag (keys %{$data->{tags}}) { |
|
131
|
|
|
|
|
|
|
$bagit->add_info($tag,$data->{tags}->{$tag}); |
|
132
|
|
|
|
|
|
|
} |
|
133
|
|
|
|
|
|
|
} |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
if (exists $data->{fetch}) { |
|
136
|
|
|
|
|
|
|
for my $fetch (@{$data->{fetch}}) { |
|
137
|
|
|
|
|
|
|
my ($url) = keys %$fetch; |
|
138
|
|
|
|
|
|
|
my $file = $fetch->{$url}; |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
my $data_dir = File::Spec->catfile($directory,'data'); |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
path($data_dir)->mkpath unless -d $data_dir; |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
my $tmp = Path::Tiny->tempfile |
|
145
|
|
|
|
|
|
|
or Catmandu::Error->throw("Could not create temp file"); |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
# For now using a simplistic mirror operation |
|
148
|
|
|
|
|
|
|
my $fname = $tmp->stringify; |
|
149
|
|
|
|
|
|
|
my $response = $bagit->user_agent->mirror($url,$fname); |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
unless ($response->is_success) { |
|
152
|
|
|
|
|
|
|
undef($tmp); |
|
153
|
|
|
|
|
|
|
Catmandu::Error->throw("failed to mirror $url to $fname : " . $response->status_line); |
|
154
|
|
|
|
|
|
|
} |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
$file =~ s{^data/}{}; |
|
157
|
|
|
|
|
|
|
$bagit->add_file($file,IO::File->new($fname)); |
|
158
|
|
|
|
|
|
|
# close the bag to keep the number of open file handles to a minimum |
|
159
|
|
|
|
|
|
|
# only the files that are flagged 'dirty' will be written |
|
160
|
|
|
|
|
|
|
$bagit->write($directory, overwrite => 1); |
|
161
|
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
undef($tmp); |
|
163
|
|
|
|
|
|
|
} |
|
164
|
|
|
|
|
|
|
} |
|
165
|
|
|
|
|
|
|
if ( exists $data->{files} ) { |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
for my $file ( @{ $data->{files} } ) { |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
my($source) = keys %$file; |
|
170
|
|
|
|
|
|
|
my $destination = $file->{$source}; |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
-f $source or Catmandu::Error->throw("source file $source does not exist"); |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
my $data_dir = File::Spec->catfile( $directory, "data" ); |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
path($data_dir)->mkpath unless -d $data_dir; |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
my $destination_path = File::Spec->catfile( $directory, $destination ); |
|
179
|
|
|
|
|
|
|
my $destination_entry = $destination; |
|
180
|
|
|
|
|
|
|
$destination_entry =~ s{^data/}{}; |
|
181
|
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
#only add when destination is either older, or does not exist yet |
|
183
|
|
|
|
|
|
|
if ( |
|
184
|
|
|
|
|
|
|
(-f $destination_path && _mtime($source) > _mtime($destination_path)) || |
|
185
|
|
|
|
|
|
|
!(-f $destination_path) |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
) { |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
$bagit->add_file($destination_entry, IO::File->new($source)); |
|
190
|
|
|
|
|
|
|
$bagit->write($directory, overwrite => 1); |
|
191
|
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
} |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
} |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
} |
|
197
|
|
|
|
|
|
|
1; |
|
198
|
|
|
|
|
|
|
} |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
sub commit { 1 } |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
1; |