line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Catmandu::Exporter::BagIt; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '0.250'; |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 NAME |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
Catmandu::Exporter::BagIt - Package that exports data as BagIts |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 SYNOPSIS |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
use Catmandu::Exporter::BagIt; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
my $exporter = Catmandu::Exporter::BagIt->new( |
14
|
|
|
|
|
|
|
overwrite => 0 , |
15
|
|
|
|
|
|
|
); |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
$exporter->add($bagit_record); |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
$exporter->commit; |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=head1 BagIt |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
The parsed BagIt record is a HASH containing the key '_id' containing the BagIt directory name |
24
|
|
|
|
|
|
|
and one or more fields: |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
{ |
27
|
|
|
|
|
|
|
'_id' => 'bags/demo01', |
28
|
|
|
|
|
|
|
'version' => '0.97', # Not required, all bags will be 0.97 |
29
|
|
|
|
|
|
|
'tags' => { |
30
|
|
|
|
|
|
|
'Bagging-Date' => '2014-10-03', # Not required, generated ... |
31
|
|
|
|
|
|
|
'Bag-Software-Agent' => 'FooBar', # Not required, generated ... |
32
|
|
|
|
|
|
|
'DC-Title' => 'My downloads' , |
33
|
|
|
|
|
|
|
'DC-Creator' => 'Bunny, Bugs' , |
34
|
|
|
|
|
|
|
}, |
35
|
|
|
|
|
|
|
}, |
36
|
|
|
|
|
|
|
'fetch' => [ |
37
|
|
|
|
|
|
|
{ 'http://server/download1.pdf' => 'data/my_download1.pdf' } , |
38
|
|
|
|
|
|
|
{ 'http://server2/download2.pdf' => 'data/my_download2.pdf' } , |
39
|
|
|
|
|
|
|
], |
40
|
|
|
|
|
|
|
}; |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
All URL's in the fetch array will be mirrored and added to the bag. All payload files should |
43
|
|
|
|
|
|
|
be put in the 'data' subdirectory as shown in the example above. |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
You can also add files from disk, using the "files" array: |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
{ |
48
|
|
|
|
|
|
|
'_id' => 'bags/demo01', |
49
|
|
|
|
|
|
|
'files' => [ |
50
|
|
|
|
|
|
|
{ '/tmp/download1.pdf' => 'data/my_download1.pdf' } , |
51
|
|
|
|
|
|
|
{ '/tmp/download2.pdf' => 'data/my_download2.pdf' } , |
52
|
|
|
|
|
|
|
], |
53
|
|
|
|
|
|
|
}; |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=head1 METHODS |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
This module inherits all methods of L<Catmandu::Exporter>. |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=head1 CONFIGURATION |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
In addition to the configuration provided by L<Catmandu::Exporter> the exporter can |
62
|
|
|
|
|
|
|
be configured with the following parameters: |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
=over |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=item ignore_existing |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
Optional. Skip an item when the BagIt for it already exists. |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=item overwrite |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
Optional. Throws an Catmandu::Error when the exporter tries to overwrite an existing directory. |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=back |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=head1 SEE ALSO |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
L<Catmandu>, |
79
|
|
|
|
|
|
|
L<Catmandu::Exporter>, |
80
|
|
|
|
|
|
|
L<Archive::BagIt> |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=head1 AUTHOR |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
Patrick Hochstenbach <Patrick.Hochstenbach@UGent.be> |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
This software is copyright (c) 2014 by Patrick Hochstenbach. |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
91
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=cut |
94
|
|
|
|
|
|
|
|
95
|
1
|
|
|
1
|
|
819
|
use namespace::clean; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
7
|
|
96
|
1
|
|
|
1
|
|
206
|
use Catmandu::Sane; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
7
|
|
97
|
1
|
|
|
1
|
|
275
|
use Catmandu::BagIt; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
35
|
|
98
|
1
|
|
|
1
|
|
7
|
use Path::Tiny; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
61
|
|
99
|
1
|
|
|
1
|
|
7
|
use File::Spec; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
22
|
|
100
|
1
|
|
|
1
|
|
5
|
use IO::File; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
161
|
|
101
|
1
|
|
|
1
|
|
489
|
use LWP::Simple; |
|
1
|
|
|
|
|
8289
|
|
|
1
|
|
|
|
|
6
|
|
102
|
1
|
|
|
1
|
|
359
|
use Moo; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
8
|
|
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
with 'Catmandu::Exporter'; |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
has user_agent => (is => 'ro'); |
107
|
|
|
|
|
|
|
has ignore_existing => (is => 'ro' , default => sub { 0 }); |
108
|
|
|
|
|
|
|
has overwrite => (is => 'ro' , default => sub { 0 }); |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
sub _mtime { |
111
|
0
|
|
|
0
|
|
|
my $file = $_[0]; |
112
|
0
|
|
|
|
|
|
my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks) = stat($file); |
113
|
0
|
|
|
|
|
|
return $mtime; |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
sub add { |
117
|
|
|
|
|
|
|
my ($self, $data) = @_; |
118
|
|
|
|
|
|
|
my $directory = $data->{_id}; |
119
|
|
|
|
|
|
|
$directory =~ s{\/$}{}; |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
return 1 if -d $directory && $self->ignore_existing; |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
Catmandu::Error->throw("$directory exists") if -d $directory && ! $self->overwrite; |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
my $bagit = defined($self->user_agent) ? |
126
|
|
|
|
|
|
|
Catmandu::BagIt->new(user_agent => $self->user_agent) : |
127
|
|
|
|
|
|
|
Catmandu::BagIt->new(); |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
if (exists $data->{tags}) { |
130
|
|
|
|
|
|
|
for my $tag (keys %{$data->{tags}}) { |
131
|
|
|
|
|
|
|
$bagit->add_info($tag,$data->{tags}->{$tag}); |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
if (exists $data->{fetch}) { |
136
|
|
|
|
|
|
|
for my $fetch (@{$data->{fetch}}) { |
137
|
|
|
|
|
|
|
my ($url) = keys %$fetch; |
138
|
|
|
|
|
|
|
my $file = $fetch->{$url}; |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
my $data_dir = File::Spec->catfile($directory,'data'); |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
path($data_dir)->mkpath unless -d $data_dir; |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
my $tmp = Path::Tiny->tempfile |
145
|
|
|
|
|
|
|
or Catmandu::Error->throw("Could not create temp file"); |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
# For now using a simplistic mirror operation |
148
|
|
|
|
|
|
|
my $fname = $tmp->stringify; |
149
|
|
|
|
|
|
|
my $response = $bagit->user_agent->mirror($url,$fname); |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
unless ($response->is_success) { |
152
|
|
|
|
|
|
|
undef($tmp); |
153
|
|
|
|
|
|
|
Catmandu::Error->throw("failed to mirror $url to $fname : " . $response->status_line); |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
$file =~ s{^data/}{}; |
157
|
|
|
|
|
|
|
$bagit->add_file($file,IO::File->new($fname)); |
158
|
|
|
|
|
|
|
# close the bag to keep the number of open file handles to a minimum |
159
|
|
|
|
|
|
|
# only the files that are flagged 'dirty' will be written |
160
|
|
|
|
|
|
|
$bagit->write($directory, overwrite => 1); |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
undef($tmp); |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
if ( exists $data->{files} ) { |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
for my $file ( @{ $data->{files} } ) { |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
my($source) = keys %$file; |
170
|
|
|
|
|
|
|
my $destination = $file->{$source}; |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
-f $source or Catmandu::Error->throw("source file $source does not exist"); |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
my $data_dir = File::Spec->catfile( $directory, "data" ); |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
path($data_dir)->mkpath unless -d $data_dir; |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
my $destination_path = File::Spec->catfile( $directory, $destination ); |
179
|
|
|
|
|
|
|
my $destination_entry = $destination; |
180
|
|
|
|
|
|
|
$destination_entry =~ s{^data/}{}; |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
#only add when destination is either older, or does not exist yet |
183
|
|
|
|
|
|
|
if ( |
184
|
|
|
|
|
|
|
(-f $destination_path && _mtime($source) > _mtime($destination_path)) || |
185
|
|
|
|
|
|
|
!(-f $destination_path) |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
) { |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
$bagit->add_file($destination_entry, IO::File->new($source)); |
190
|
|
|
|
|
|
|
$bagit->write($directory, overwrite => 1); |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
} |
197
|
|
|
|
|
|
|
1; |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
sub commit { 1 } |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
1; |