line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# mt-aws-glacier - Amazon Glacier sync client |
2
|
|
|
|
|
|
|
# Copyright (C) 2012-2014 Victor Efimov |
3
|
|
|
|
|
|
|
# http://mt-aws.com (also http://vs-dev.com) vs@vs-dev.com |
4
|
|
|
|
|
|
|
# License: GPLv3 |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# This file is part of "mt-aws-glacier" |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# mt-aws-glacier is free software: you can redistribute it and/or modify |
9
|
|
|
|
|
|
|
# it under the terms of the GNU General Public License as published by |
10
|
|
|
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or |
11
|
|
|
|
|
|
|
# (at your option) any later version. |
12
|
|
|
|
|
|
|
# |
13
|
|
|
|
|
|
|
# mt-aws-glacier is distributed in the hope that it will be useful, |
14
|
|
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
15
|
|
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16
|
|
|
|
|
|
|
# GNU General Public License for more details. |
17
|
|
|
|
|
|
|
# |
18
|
|
|
|
|
|
|
# You should have received a copy of the GNU General Public License |
19
|
|
|
|
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>. |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
package App::MtAws::MetaData; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
our $VERSION = '1.114_2'; |
24
|
|
|
|
|
|
|
|
25
|
24
|
|
|
24
|
|
68490
|
use strict; |
|
24
|
|
|
|
|
43
|
|
|
24
|
|
|
|
|
614
|
|
26
|
24
|
|
|
24
|
|
98
|
use warnings; |
|
24
|
|
|
|
|
33
|
|
|
24
|
|
|
|
|
554
|
|
27
|
24
|
|
|
24
|
|
88
|
use utf8; |
|
24
|
|
|
|
|
21
|
|
|
24
|
|
|
|
|
122
|
|
28
|
24
|
|
|
24
|
|
515
|
use Encode; |
|
24
|
|
|
|
|
29
|
|
|
24
|
|
|
|
|
1631
|
|
29
|
|
|
|
|
|
|
|
30
|
24
|
|
|
24
|
|
11450
|
use MIME::Base64; |
|
24
|
|
|
|
|
12674
|
|
|
24
|
|
|
|
|
1169
|
|
31
|
24
|
|
|
24
|
|
3926
|
use JSON::XS; |
|
24
|
|
|
|
|
26357
|
|
|
24
|
|
|
|
|
1035
|
|
32
|
24
|
|
|
24
|
|
3975
|
use POSIX; |
|
24
|
|
|
|
|
35009
|
|
|
24
|
|
|
|
|
158
|
|
33
|
24
|
|
|
24
|
|
44386
|
use Time::Local; |
|
24
|
|
|
|
|
39
|
|
|
24
|
|
|
|
|
1109
|
|
34
|
24
|
|
|
24
|
|
12169
|
use App::MtAws::DateTime; |
|
24
|
|
|
|
|
48
|
|
|
24
|
|
|
|
|
1137
|
|
35
|
|
|
|
|
|
|
|
36
|
24
|
|
|
24
|
|
124
|
use constant MAX_SIZE => 1024; |
|
24
|
|
|
|
|
30
|
|
|
24
|
|
|
|
|
1124
|
|
37
|
24
|
|
|
24
|
|
90
|
use constant META_JOB_TYPE_FULL => 'full'; |
|
24
|
|
|
|
|
27
|
|
|
24
|
|
|
|
|
899
|
|
38
|
|
|
|
|
|
|
|
39
|
24
|
|
|
24
|
|
86
|
use Exporter 'import'; |
|
24
|
|
|
|
|
26
|
|
|
24
|
|
|
|
|
22121
|
|
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
our @EXPORT = qw/meta_decode meta_job_decode meta_encode meta_job_encode META_JOB_TYPE_FULL/; |
42
|
|
|
|
|
|
|
=pod |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
MT-AWS-GLACIER metadata format ('x-amz-archive-description' field). |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
Function definitions: |
47
|
|
|
|
|
|
|
===================== |
48
|
|
|
|
|
|
|
base64url() input - byte sequence, output - byte sequence |
49
|
|
|
|
|
|
|
Is Base64 URL algorithm: http://en.wikipedia.org/wiki/Base64#URL_applications |
50
|
|
|
|
|
|
|
basically it's base64 but with '=' padding removed, characters '+', '/' replaced with '-', '_' resp. and no new lines. |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
json_utf8() - input - Hash, output - byte sequence |
53
|
|
|
|
|
|
|
JSON string in UTF-8 representation. Can contain not-escaped UTF-8 characters. Will not contain linefeed. Hash objects are unordered. |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
latin1_to_utf8() - input - byte sequence, output - byte sequence |
56
|
|
|
|
|
|
|
Treats input data as Latin1 (ISO 8859-1) encoded sequence and converts it to UTF-8 sequence |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
isoO8601() - input - time, output - character string |
59
|
|
|
|
|
|
|
ISOO8601 time in the following format YYYYMMDDTHHMMSSZ. Only UTC timezone. No leap seconds supported. |
60
|
|
|
|
|
|
|
Supported year range is from 1000 to 9999 |
61
|
|
|
|
|
|
|
When encoding isoO8601() mt-aws-glacier will not store leap seconds. When decoding from isoO8601 leap seconds will be dropped. |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
{'filename': FILENAME, 'mtime': iso8601(MTIME)} |
64
|
|
|
|
|
|
|
Hash with two keys: 'filename' and 'mtime'. Corresponds to JSON 'Object'. |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
Input data: |
67
|
|
|
|
|
|
|
===================== |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
FILENAME (character string) |
70
|
|
|
|
|
|
|
Is a relative filename (no leading slash). Filename is taken from file system and treated as a character sequence |
71
|
|
|
|
|
|
|
with known encoding. |
72
|
|
|
|
|
|
|
MTIME (time) |
73
|
|
|
|
|
|
|
is file last modification time with 1 second resolution. Can be below Y1970. |
74
|
|
|
|
|
|
|
Internal representation is epoch time, so it can be any valid epoch time (including negative values and zero).Supported |
75
|
|
|
|
|
|
|
range - from year 1000 to 9999 (inclusive) |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
Version 'mt2' |
78
|
|
|
|
|
|
|
===================== |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
x-amz-archive-description = 'mt2' <space> base64url(json_utf8({'filename': FILENAME, 'mtime': iso8601(MTIME)})) |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
Version 'mt1' |
83
|
|
|
|
|
|
|
===================== |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
x-amz-archive-description = 'mt1' <space> base64url(latin1_to_utf8(json_utf8({'filename': FILENAME, 'mtime': iso8601(MTIME)}))) |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
This format actually contains a bug - data is double encoded. However it does not affect data integrity. UTF-8 double encoded data can be |
88
|
|
|
|
|
|
|
perfectly decoded (see http://www.j3e.de/linux/convmv/man/) - that's why the bug was unnoticed during one month. |
89
|
|
|
|
|
|
|
This format was in use starting from version 0.80beta (2012-12-27) till 0.84beta (2013-01-28). |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
NOTES: |
92
|
|
|
|
|
|
|
===================== |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
1) This specification assumes that in our programming language we have two different types of Strings: Byte string (byte sequence) and Character strings. |
95
|
|
|
|
|
|
|
Byte string is sequence of octets. Character string is an internal representation of sequence of characters. Character strings cannot have encodings |
96
|
|
|
|
|
|
|
by definition - it's internal, encoding is known to language implementation. |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
Some programming languages (like Ruby) have different model, when every string is a sequence of bytes with a known encoding (or no encoding at all). |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
2) According to this spec. Same (FILENAME,MTIME) values can produce different x-amz-archive-description, as JSON hash is unordered. |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
3) This specification explains how to _encode_ data (because it's a specification). However it's easy to |
103
|
|
|
|
|
|
|
understant how to decode it back. |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
4) Path separator in filename is '/' |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=cut |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
my $meta_coder = ($JSON::XS::VERSION ge '1.4') ? |
110
|
|
|
|
|
|
|
JSON::XS->new->utf8->max_depth(1)->max_size(MAX_SIZE) : # some additional abuse-protection |
111
|
|
|
|
|
|
|
JSON::XS->new->utf8; # it's still protected by length checking below |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
sub meta_decode |
114
|
|
|
|
|
|
|
{ |
115
|
2478
|
|
|
2478
|
0
|
1042751
|
my ($str) = @_; |
116
|
2478
|
100
|
|
|
|
5210
|
return unless defined $str; # protect from undef $str |
117
|
|
|
|
|
|
|
|
118
|
2475
|
|
|
|
|
3853
|
my ($marker, $b64) = _split_meta($str); |
119
|
2475
|
100
|
|
|
|
4139
|
return unless defined $marker; |
120
|
2462
|
100
|
|
|
|
4340
|
if ($marker eq 'mt1') { |
|
|
100
|
|
|
|
|
|
121
|
2080
|
|
|
|
|
2799
|
return _decode_filename_and_mtime(_decode_json(_decode_utf8(_decode_b64($b64)))); |
122
|
|
|
|
|
|
|
} elsif ($marker eq 'mt2') { |
123
|
380
|
|
|
|
|
527
|
return _decode_filename_and_mtime(_decode_json(_decode_b64($b64))); |
124
|
|
|
|
|
|
|
} else { |
125
|
2
|
|
|
|
|
5
|
return; |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
sub meta_job_decode |
130
|
|
|
|
|
|
|
{ |
131
|
18
|
|
|
18
|
0
|
1106
|
my ($str) = @_; |
132
|
18
|
100
|
|
|
|
39
|
return unless defined $str; # protect from undef $str |
133
|
|
|
|
|
|
|
|
134
|
16
|
|
|
|
|
30
|
my ($marker, $b64) = _split_meta($str); |
135
|
16
|
100
|
|
|
|
34
|
return unless defined $marker; |
136
|
15
|
100
|
|
|
|
26
|
if ($marker eq 'mtijob1') { |
137
|
13
|
|
|
|
|
23
|
_decode_jobs(_decode_json(_decode_b64($b64))); |
138
|
|
|
|
|
|
|
} else { |
139
|
2
|
|
|
|
|
8
|
return; |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
sub _split_meta |
144
|
|
|
|
|
|
|
{ |
145
|
2491
|
|
|
2491
|
|
2424
|
my ($str) = @_; |
146
|
2491
|
|
|
|
|
5839
|
my ($marker, $b64) = split(' ', $str); # split will return empty list if string is empty or contains spaces only |
147
|
2491
|
100
|
100
|
|
|
10419
|
return if !defined $b64 || length($b64) > MAX_SIZE; |
148
|
2477
|
|
|
|
|
4828
|
return ($marker, $b64); |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
sub _decode_b64 |
152
|
|
|
|
|
|
|
{ |
153
|
2507
|
|
|
2507
|
|
9535
|
my ($str) = @_; |
154
|
2507
|
|
|
|
|
2592
|
return eval { |
155
|
2507
|
|
|
|
|
2876
|
$str =~ tr{-_}{+/}; |
156
|
2507
|
|
|
|
|
3356
|
my $padding_n = length($str) % 4; |
157
|
2507
|
100
|
|
|
|
4676
|
$str .= ('=' x (4 - $padding_n) ) if $padding_n; |
158
|
2507
|
|
|
|
|
8028
|
MIME::Base64::decode_base64($str); |
159
|
|
|
|
|
|
|
}; # undef if eval failed |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
sub _decode_utf8 |
163
|
|
|
|
|
|
|
{ |
164
|
2101
|
|
|
2101
|
|
3676
|
my ($str) = @_; |
165
|
2101
|
100
|
|
|
|
3017
|
return unless defined $str; |
166
|
2099
|
|
|
|
|
1737
|
return eval { |
167
|
2099
|
|
|
|
|
6686
|
decode("UTF-8", $str, Encode::DIE_ON_ERR|Encode::LEAVE_SRC) |
168
|
|
|
|
|
|
|
}; # undef if eval failed |
169
|
|
|
|
|
|
|
} |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
sub _decode_json |
172
|
|
|
|
|
|
|
{ |
173
|
2482
|
|
|
2482
|
|
72787
|
my ($str) = @_; |
174
|
2482
|
100
|
|
|
|
4254
|
return unless defined $str; |
175
|
2480
|
|
|
|
|
2324
|
eval { $meta_coder->decode($str) } |
|
2480
|
|
|
|
|
14123
|
|
176
|
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
sub _decode_filename_and_mtime |
179
|
|
|
|
|
|
|
{ |
180
|
2467
|
|
|
2467
|
|
2581
|
my ($h) = @_; |
181
|
2467
|
100
|
|
|
|
3894
|
return unless defined $h; |
182
|
2453
|
100
|
100
|
|
|
9138
|
return unless defined($h->{filename}) && defined($h->{mtime}); |
183
|
|
|
|
|
|
|
# TODO: is that good to return undef everytime something missing? Maybe return error in case signature etc |
184
|
|
|
|
|
|
|
# correct but time is broken - it's more robust. |
185
|
2447
|
100
|
|
|
|
5468
|
defined(my $mtime = iso8601_to_epoch($h->{mtime})) or return; |
186
|
2441
|
|
|
|
|
60314
|
return ($h->{filename}, $mtime); |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
sub _decode_jobs |
190
|
|
|
|
|
|
|
{ |
191
|
13
|
|
|
13
|
|
15
|
my ($h) = @_; |
192
|
13
|
100
|
|
|
|
34
|
return unless defined $h; |
193
|
9
|
100
|
|
|
|
27
|
return unless defined($h->{type}); |
194
|
7
|
|
|
|
|
36
|
return ($h->{type}); |
195
|
|
|
|
|
|
|
} |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
sub meta_encode |
198
|
|
|
|
|
|
|
{ |
199
|
420
|
|
|
420
|
0
|
135675
|
my ($relfilename, $mtime) = @_; |
200
|
420
|
100
|
100
|
|
|
1717
|
return unless defined($mtime) && defined($relfilename); |
201
|
418
|
100
|
|
|
|
698
|
defined(my $res = _encode_b64(_encode_json(_encode_filename_and_mtime($relfilename, $mtime)))) or return; |
202
|
417
|
|
|
|
|
1253
|
$res = "mt2 ".$res; |
203
|
417
|
100
|
|
|
|
842
|
return if length($res) > MAX_SIZE; |
204
|
415
|
|
|
|
|
917
|
return $res; |
205
|
|
|
|
|
|
|
} |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
sub meta_job_encode |
208
|
|
|
|
|
|
|
{ |
209
|
8
|
|
|
8
|
0
|
16278
|
my ($type) = @_; |
210
|
8
|
|
|
|
|
29
|
my $res = "mtijob1 "._encode_b64(_encode_json({ type => $type })); |
211
|
8
|
100
|
|
|
|
28
|
return if length($res) > MAX_SIZE; |
212
|
7
|
|
|
|
|
23
|
return $res; |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
sub _encode_b64 |
216
|
|
|
|
|
|
|
{ |
217
|
462
|
|
|
462
|
|
3314
|
my ($str) = @_; |
218
|
462
|
100
|
|
|
|
760
|
return unless defined $str; |
219
|
461
|
|
|
|
|
1192
|
my $res = MIME::Base64::encode_base64($str,''); |
220
|
461
|
|
|
|
|
1621
|
$res =~ s/=+\z//; |
221
|
461
|
|
|
|
|
669
|
$res =~ tr{+/}{-_}; |
222
|
461
|
|
|
|
|
997
|
return $res; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
sub _encode_utf8 |
226
|
|
|
|
|
|
|
{ |
227
|
18
|
|
|
18
|
|
4242
|
my ($str) = @_; |
228
|
18
|
|
|
|
|
79
|
return encode("UTF-8",$str,Encode::DIE_ON_ERR|Encode::LEAVE_SRC); |
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
sub _encode_filename_and_mtime |
232
|
|
|
|
|
|
|
{ |
233
|
426
|
|
|
426
|
|
4326
|
my ($relfilename, $mtime) = @_; |
234
|
426
|
100
|
|
|
|
899
|
defined(my $iso = epoch_to_iso8601($mtime)) or return; |
235
|
|
|
|
|
|
|
return { |
236
|
425
|
|
|
|
|
1772
|
mtime => $iso, |
237
|
|
|
|
|
|
|
filename => $relfilename |
238
|
|
|
|
|
|
|
}; |
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
sub _encode_json |
242
|
|
|
|
|
|
|
{ |
243
|
435
|
|
|
435
|
|
529
|
my ($h) = @_; |
244
|
435
|
100
|
|
|
|
706
|
return unless defined $h; |
245
|
434
|
|
|
|
|
2804
|
return $meta_coder->encode($h); |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
1; |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
__END__ |