File Coverage

lib/App/MtAws/MetaData.pm
Criterion Covered Total %
statement 102 102 100.0
branch 46 46 100.0
condition 9 9 100.0
subroutine 26 26 100.0
pod 0 4 0.0
total 183 187 97.8


line stmt bran cond sub pod time code
1             # mt-aws-glacier - Amazon Glacier sync client
2             # Copyright (C) 2012-2014 Victor Efimov
3             # http://mt-aws.com (also http://vs-dev.com) vs@vs-dev.com
4             # License: GPLv3
5             #
6             # This file is part of "mt-aws-glacier"
7             #
8             # mt-aws-glacier is free software: you can redistribute it and/or modify
9             # it under the terms of the GNU General Public License as published by
10             # the Free Software Foundation, either version 3 of the License, or
11             # (at your option) any later version.
12             #
13             # mt-aws-glacier is distributed in the hope that it will be useful,
14             # but WITHOUT ANY WARRANTY; without even the implied warranty of
15             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16             # GNU General Public License for more details.
17             #
18             # You should have received a copy of the GNU General Public License
19             # along with this program. If not, see <http://www.gnu.org/licenses/>.
20              
21             package App::MtAws::MetaData;
22              
23             our $VERSION = '1.114_2';
24              
25 24     24   68490 use strict;
  24         43  
  24         614  
26 24     24   98 use warnings;
  24         33  
  24         554  
27 24     24   88 use utf8;
  24         21  
  24         122  
28 24     24   515 use Encode;
  24         29  
  24         1631  
29              
30 24     24   11450 use MIME::Base64;
  24         12674  
  24         1169  
31 24     24   3926 use JSON::XS;
  24         26357  
  24         1035  
32 24     24   3975 use POSIX;
  24         35009  
  24         158  
33 24     24   44386 use Time::Local;
  24         39  
  24         1109  
34 24     24   12169 use App::MtAws::DateTime;
  24         48  
  24         1137  
35              
36 24     24   124 use constant MAX_SIZE => 1024;
  24         30  
  24         1124  
37 24     24   90 use constant META_JOB_TYPE_FULL => 'full';
  24         27  
  24         899  
38              
39 24     24   86 use Exporter 'import';
  24         26  
  24         22121  
40              
41             our @EXPORT = qw/meta_decode meta_job_decode meta_encode meta_job_encode META_JOB_TYPE_FULL/;
42             =pod
43              
44             MT-AWS-GLACIER metadata format ('x-amz-archive-description' field).
45              
46             Function definitions:
47             =====================
48             base64url() input - byte sequence, output - byte sequence
49             Is Base64 URL algorithm: http://en.wikipedia.org/wiki/Base64#URL_applications
50             basically it's base64 but with '=' padding removed, characters '+', '/' replaced with '-', '_' resp. and no new lines.
51              
52             json_utf8() - input - Hash, output - byte sequence
53             JSON string in UTF-8 representation. Can contain not-escaped UTF-8 characters. Will not contain linefeed. Hash objects are unordered.
54              
55             latin1_to_utf8() - input - byte sequence, output - byte sequence
56             Treats input data as Latin1 (ISO 8859-1) encoded sequence and converts it to UTF-8 sequence
57              
58             isoO8601() - input - time, output - character string
59             ISOO8601 time in the following format YYYYMMDDTHHMMSSZ. Only UTC timezone. No leap seconds supported.
60             Supported year range is from 1000 to 9999
61             When encoding isoO8601() mt-aws-glacier will not store leap seconds. When decoding from isoO8601 leap seconds will be dropped.
62              
63             {'filename': FILENAME, 'mtime': iso8601(MTIME)}
64             Hash with two keys: 'filename' and 'mtime'. Corresponds to JSON 'Object'.
65              
66             Input data:
67             =====================
68              
69             FILENAME (character string)
70             Is a relative filename (no leading slash). Filename is taken from file system and treated as a character sequence
71             with known encoding.
72             MTIME (time)
73             is file last modification time with 1 second resolution. Can be below Y1970.
74             Internal representation is epoch time, so it can be any valid epoch time (including negative values and zero).Supported
75             range - from year 1000 to 9999 (inclusive)
76              
77             Version 'mt2'
78             =====================
79              
80             x-amz-archive-description = 'mt2' <space> base64url(json_utf8({'filename': FILENAME, 'mtime': iso8601(MTIME)}))
81              
82             Version 'mt1'
83             =====================
84              
85             x-amz-archive-description = 'mt1' <space> base64url(latin1_to_utf8(json_utf8({'filename': FILENAME, 'mtime': iso8601(MTIME)})))
86              
87             This format actually contains a bug - data is double encoded. However it does not affect data integrity. UTF-8 double encoded data can be
88             perfectly decoded (see http://www.j3e.de/linux/convmv/man/) - that's why the bug was unnoticed during one month.
89             This format was in use starting from version 0.80beta (2012-12-27) till 0.84beta (2013-01-28).
90              
91             NOTES:
92             =====================
93              
94             1) This specification assumes that in our programming language we have two different types of Strings: Byte string (byte sequence) and Character strings.
95             Byte string is sequence of octets. Character string is an internal representation of sequence of characters. Character strings cannot have encodings
96             by definition - it's internal, encoding is known to language implementation.
97              
98             Some programming languages (like Ruby) have different model, when every string is a sequence of bytes with a known encoding (or no encoding at all).
99              
100             2) According to this spec. Same (FILENAME,MTIME) values can produce different x-amz-archive-description, as JSON hash is unordered.
101              
102             3) This specification explains how to _encode_ data (because it's a specification). However it's easy to
103             understant how to decode it back.
104              
105             4) Path separator in filename is '/'
106              
107             =cut
108              
109             my $meta_coder = ($JSON::XS::VERSION ge '1.4') ?
110             JSON::XS->new->utf8->max_depth(1)->max_size(MAX_SIZE) : # some additional abuse-protection
111             JSON::XS->new->utf8; # it's still protected by length checking below
112              
113             sub meta_decode
114             {
115 2478     2478 0 1042751 my ($str) = @_;
116 2478 100       5210 return unless defined $str; # protect from undef $str
117              
118 2475         3853 my ($marker, $b64) = _split_meta($str);
119 2475 100       4139 return unless defined $marker;
120 2462 100       4340 if ($marker eq 'mt1') {
    100          
121 2080         2799 return _decode_filename_and_mtime(_decode_json(_decode_utf8(_decode_b64($b64))));
122             } elsif ($marker eq 'mt2') {
123 380         527 return _decode_filename_and_mtime(_decode_json(_decode_b64($b64)));
124             } else {
125 2         5 return;
126             }
127             }
128              
129             sub meta_job_decode
130             {
131 18     18 0 1106 my ($str) = @_;
132 18 100       39 return unless defined $str; # protect from undef $str
133              
134 16         30 my ($marker, $b64) = _split_meta($str);
135 16 100       34 return unless defined $marker;
136 15 100       26 if ($marker eq 'mtijob1') {
137 13         23 _decode_jobs(_decode_json(_decode_b64($b64)));
138             } else {
139 2         8 return;
140             }
141             }
142              
143             sub _split_meta
144             {
145 2491     2491   2424 my ($str) = @_;
146 2491         5839 my ($marker, $b64) = split(' ', $str); # split will return empty list if string is empty or contains spaces only
147 2491 100 100     10419 return if !defined $b64 || length($b64) > MAX_SIZE;
148 2477         4828 return ($marker, $b64);
149             }
150              
151             sub _decode_b64
152             {
153 2507     2507   9535 my ($str) = @_;
154 2507         2592 return eval {
155 2507         2876 $str =~ tr{-_}{+/};
156 2507         3356 my $padding_n = length($str) % 4;
157 2507 100       4676 $str .= ('=' x (4 - $padding_n) ) if $padding_n;
158 2507         8028 MIME::Base64::decode_base64($str);
159             }; # undef if eval failed
160             }
161              
162             sub _decode_utf8
163             {
164 2101     2101   3676 my ($str) = @_;
165 2101 100       3017 return unless defined $str;
166 2099         1737 return eval {
167 2099         6686 decode("UTF-8", $str, Encode::DIE_ON_ERR|Encode::LEAVE_SRC)
168             }; # undef if eval failed
169             }
170              
171             sub _decode_json
172             {
173 2482     2482   72787 my ($str) = @_;
174 2482 100       4254 return unless defined $str;
175 2480         2324 eval { $meta_coder->decode($str) }
  2480         14123  
176             }
177              
178             sub _decode_filename_and_mtime
179             {
180 2467     2467   2581 my ($h) = @_;
181 2467 100       3894 return unless defined $h;
182 2453 100 100     9138 return unless defined($h->{filename}) && defined($h->{mtime});
183             # TODO: is that good to return undef everytime something missing? Maybe return error in case signature etc
184             # correct but time is broken - it's more robust.
185 2447 100       5468 defined(my $mtime = iso8601_to_epoch($h->{mtime})) or return;
186 2441         60314 return ($h->{filename}, $mtime);
187             }
188              
189             sub _decode_jobs
190             {
191 13     13   15 my ($h) = @_;
192 13 100       34 return unless defined $h;
193 9 100       27 return unless defined($h->{type});
194 7         36 return ($h->{type});
195             }
196              
197             sub meta_encode
198             {
199 420     420 0 135675 my ($relfilename, $mtime) = @_;
200 420 100 100     1717 return unless defined($mtime) && defined($relfilename);
201 418 100       698 defined(my $res = _encode_b64(_encode_json(_encode_filename_and_mtime($relfilename, $mtime)))) or return;
202 417         1253 $res = "mt2 ".$res;
203 417 100       842 return if length($res) > MAX_SIZE;
204 415         917 return $res;
205             }
206              
207             sub meta_job_encode
208             {
209 8     8 0 16278 my ($type) = @_;
210 8         29 my $res = "mtijob1 "._encode_b64(_encode_json({ type => $type }));
211 8 100       28 return if length($res) > MAX_SIZE;
212 7         23 return $res;
213             }
214              
215             sub _encode_b64
216             {
217 462     462   3314 my ($str) = @_;
218 462 100       760 return unless defined $str;
219 461         1192 my $res = MIME::Base64::encode_base64($str,'');
220 461         1621 $res =~ s/=+\z//;
221 461         669 $res =~ tr{+/}{-_};
222 461         997 return $res;
223             }
224              
225             sub _encode_utf8
226             {
227 18     18   4242 my ($str) = @_;
228 18         79 return encode("UTF-8",$str,Encode::DIE_ON_ERR|Encode::LEAVE_SRC);
229             }
230              
231             sub _encode_filename_and_mtime
232             {
233 426     426   4326 my ($relfilename, $mtime) = @_;
234 426 100       899 defined(my $iso = epoch_to_iso8601($mtime)) or return;
235             return {
236 425         1772 mtime => $iso,
237             filename => $relfilename
238             };
239             }
240              
241             sub _encode_json
242             {
243 435     435   529 my ($h) = @_;
244 435 100       706 return unless defined $h;
245 434         2804 return $meta_coder->encode($h);
246             }
247              
248             1;
249              
250             __END__