File Coverage

lib/App/MtAws/Journal.pm
Criterion Covered Total %
statement 220 222 99.1
branch 148 172 86.0
condition 32 41 78.0
subroutine 33 33 100.0
pod 0 11 0.0
total 433 479 90.4


line stmt bran cond sub pod time code
1             # mt-aws-glacier - Amazon Glacier sync client
2             # Copyright (C) 2012-2014 Victor Efimov
3             # http://mt-aws.com (also http://vs-dev.com) vs@vs-dev.com
4             # License: GPLv3
5             #
6             # This file is part of "mt-aws-glacier"
7             #
8             # mt-aws-glacier is free software: you can redistribute it and/or modify
9             # it under the terms of the GNU General Public License as published by
10             # the Free Software Foundation, either version 3 of the License, or
11             # (at your option) any later version.
12             #
13             # mt-aws-glacier is distributed in the hope that it will be useful,
14             # but WITHOUT ANY WARRANTY; without even the implied warranty of
15             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16             # GNU General Public License for more details.
17             #
18             # You should have received a copy of the GNU General Public License
19             # along with this program. If not, see <http://www.gnu.org/licenses/>.
20              
21             package App::MtAws::Journal;
22              
23             our $VERSION = '1.114_2';
24              
25 23     23   785239 use strict;
  23         44  
  23         698  
26 23     23   101 use warnings;
  23         39  
  23         691  
27 23     23   116 use utf8;
  23         35  
  23         152  
28              
29              
30 23     23   553 use File::Find;
  23         35  
  23         1408  
31 23     23   115 use File::Spec 3.12;
  23         576  
  23         696  
32 23     23   114 use Encode;
  23         34  
  23         1766  
33 23     23   120 use Carp;
  23         36  
  23         1404  
34 23     23   127 use IO::Handle;
  23         40  
  23         829  
35 23     23   129 use App::MtAws::Utils;
  23         50  
  23         3231  
36 23     23   128 use App::MtAws::Exceptions;
  23         33  
  23         1464  
37 23     23   122 use App::MtAws::Filter;
  23         32  
  23         913  
38 23     23   11788 use App::MtAws::FileVersions;
  23         46  
  23         73557  
39              
40             sub new
41             {
42 1467     1467 0 2638738 my ($class, %args) = @_;
43              
44 1467         5189 my %checkargs = %args;
45             exists $checkargs{$_} && delete $checkargs{$_}
46 1467   66     16820 for qw/root_dir journal_file journal_encoding output_version leaf_optimization use_active_retrievals filter follow/;
47 1467 50       3076 confess "Unknown argument(s) to Journal constructor: ".join(', ', keys %checkargs) if %checkargs; # TODO: test
48              
49 1467         1994 my $self = \%args;
50 1467         2328 bless $self, $class;
51              
52 1467   100     8533 $self->{journal_encoding} ||= 'UTF-8';
53              
54 1467 100       3166 if (defined $self->{root_dir}) {
55             # copied from File::Spec::catfile
56 1444         10560 $self->{canon_root_dir} = File::Spec->catdir($self->{root_dir});
57 1444 50       6008 $self->{canon_root_dir} .= "/" unless substr($self->{canon_root_dir},-1) eq "/";
58             }
59              
60 1467 50       3084 defined($self->{journal_file}) || confess;
61 1467         2433 $self->{journal_h} = {};
62 1467         2233 $self->{archive_h} = {};
63              
64 1467         2133 $self->{used_versions} = {};
65 1467 100       5745 $self->{output_version} = 'B' unless defined($self->{output_version});
66 1467         1974 $self->{last_supported_version} = 'C';
67 1467         3578 $self->{first_unsupported_version} = chr(ord($self->{last_supported_version})+1);
68              
69 1467         4211 return $self;
70             }
71              
72             #
73             # Reading journal
74             #
75              
76             # sub read_journal
77              
78             sub read_journal
79             {
80 191     191 0 3850 my ($self, %args) = @_;
81 191 100       739 confess unless defined $args{should_exist};
82 189 50       541 confess unless length($self->{journal_file});
83 189         361 $self->{last_read_time} = time();
84 189 50       413 $self->{active_retrievals} = {} if $self->{use_active_retrievals};
85              
86 189         714 my $binary_filename = binaryfilename $self->{journal_file};
87 189 100 100     10762 if ($args{should_exist} && !-e $binary_filename) {
    100          
88 1         76 confess;
89             } elsif (-e $binary_filename) {
90             open_file(my $F, $self->{journal_file}, file_encoding => $self->{journal_encoding}, mode => '<') or
91             die exception journal_open_error => "Unable to open journal file %string filename% for reading, errno=%errno%",
92 163 50       827 filename => $self->{journal_file}, 'ERRNO';
93 161         246 my $lineno = 0;
94 161         3017 while (!eof($F)) {
95 1333 50       8493 defined( my $line = <$F> ) or confess;
96 1333         1998 ++$lineno;
97 1333 100       9292 $line =~ s/\r?\n$// or
98             die exception journal_format_error => "Invalid format of journal, line %lineno% not fully written", lineno => $lineno;
99 1328         2693 $self->process_line($line, $lineno);
100             }
101 156 50       1867 close $F or confess;
102             }
103 181         609 $self->_index_archives_as_files();
104 181         547 return;
105             }
106              
107             sub open_for_write
108             {
109 16     16 0 63 my ($self) = @_;
110             open_file($self->{append_file}, $self->{journal_file}, mode => '>>', file_encoding => $self->{journal_encoding}) or
111             die exception journal_open_error => "Unable to open journal file %string filename% for writing, errno=%errno%",
112 16 50       85 filename => $self->{journal_file}, 'ERRNO';
113 16         111 $self->{append_file}->autoflush();
114             }
115              
116             sub close_for_write
117             {
118 14     14 0 284 my ($self) = @_;
119 14 100       150 $self->{append_file} or confess;
120 13 50       155 close $self->{append_file} or confess;
121             }
122              
123             sub process_line
124             {
125 2357     2357 0 108655 my ($self, $line, $lineno) = @_;
126 2357         5995 try_drop_utf8_flag $line;
127 2357         16357 my ($ver, $time, $archive_id, $size, $mtime, $treehash, $relfilename, $job_id);
128             # TODO: replace \S and \s, make tests for this
129              
130             # Journal version 'A', 'B', 'C'
131             # 'B' and 'C' two way compatible
132             # 'A' is not compatible, but share some common code
133 2357 100 100     29960 if (($ver, $time, $archive_id, $size, $mtime, $treehash, $relfilename) =
    100          
    100          
    100          
    100          
    100          
    100          
    100          
134             $line =~ /^([ABC])\t([0-9]{1,20})\tCREATED\t(\S+)\t([0-9]{1,20})\t([+-]?[0-9]{1,20}|NONE)\t(\S+)\t(.*?)$/) {
135 1146 50       2722 confess "invalid filename" unless is_relative_filename($relfilename);
136              
137             # here goes difference between 'A' and 'B','C'
138 1146 100       2176 if ($ver eq 'A') {
139 106 100       420 confess if $mtime eq 'NONE'; # this is not supported by format 'A'
140              
141             # version 'A' produce records with mtime set even when there is no mtime in Amazon metadata
142             # (this is possible when archive uploaded by 3rd party program, or mtglacier <= v0.7)
143             # we detect this as $archive_id eq $relfilename - this is practical impossible
144             # unless such record was created by download-inventory command
145 105 100       257 $mtime = undef if ($archive_id eq $relfilename);
146             } else {
147 1040 100       1866 $mtime = undef if $mtime eq 'NONE';
148             }
149              
150              
151 1145 100       7843 $self->_add_archive({
152             relfilename => $relfilename,
153             time => $time+0, # numify
154             archive_id => $archive_id,
155             size => $size+0, # numify
156             mtime => defined($mtime) ? $mtime + 0 : undef,
157             treehash => $treehash,
158             });
159 1145 100       6372 $self->{used_versions}->{$ver} = 1 unless $self->{used_versions}->{$ver};
160             } elsif (($ver, $time, $archive_id, $relfilename) = $line =~ /^([ABC])\t([0-9]{1,20})\tDELETED\t(\S+)\t(.*?)$/) {
161 279         913 $self->_delete_archive($archive_id, $relfilename);
162 279 100       2140 $self->{used_versions}->{$ver} = 1 unless $self->{used_versions}->{$ver};
163             } elsif (($ver, $time, $archive_id, $job_id) = $line =~ /^([ABC])\t([0-9]{1,20})\tRETRIEVE_JOB\t(\S+)\t(.*?)$/) {
164 165         705 $self->_retrieve_job($time+0, $archive_id, $job_id);
165 165 50       1630 $self->{used_versions}->{$ver} = 1 unless $self->{used_versions}->{$ver};
166              
167             # Journal version '0'
168              
169             } elsif (($time, $archive_id, $size, $treehash, $relfilename) =
170             $line =~ /^([0-9]{1,20}) CREATED (\S+) ([0-9]{1,20}) (\S+) (.*?)$/) {
171 278 50       737 confess "invalid filename" unless is_relative_filename($relfilename);
172 278         1779 $self->_add_archive({
173             relfilename => $relfilename,
174             time => $time+0,
175             mtime => undef,
176             archive_id => $archive_id,
177             size => $size+0,
178             treehash => $treehash,
179             });
180 278 100       1466 $self->{used_versions}->{0} = 1 unless $self->{used_versions}->{0};
181             } elsif (($archive_id, $relfilename) = $line =~ /^[0-9]{1,20}\s+DELETED\s+(\S+)\s+(.*?)$/) { # TODO: delete file, parse time too!
182 92         362 $self->_delete_archive($archive_id, $relfilename);
183 92 100       872 $self->{used_versions}->{0} = 1 unless $self->{used_versions}->{0};
184             } elsif (($time, $archive_id) = $line =~ /^([0-9]{1,20})\s+RETRIEVE_JOB\s+(\S+)$/) {
185 68         348 $self->_retrieve_job($time+0, $archive_id);
186 68 50       736 $self->{used_versions}->{0} = 1 unless $self->{used_versions}->{0};
187             } elsif ( ($line =~ /^([0-9]{1,20}) /) || ($line =~ /^[A-$self->{last_supported_version}]\t/) ) {
188 256         664 die exception journal_format_error_broken => "Invalid format of journal, line %lineno% is broken: %line%",
189             lineno => $lineno, line => hex_dump_string($line);
190             } elsif ( ($line =~ /^[$self->{first_unsupported_version}-Z]\t/) ) {
191 23         56 die exception journal_format_error_future => "Invalid format of journal, line %lineno% is from future version of mtglacier",
192             lineno => $lineno;
193             } else {
194 50         173 die exception journal_format_error_unknown => "Invalid format of journal, line %lineno% is in unknown format: %line%",
195             lineno => $lineno, line => hex_dump_string($line);
196             }
197             }
198              
199             sub _add_archive
200             {
201 1536     1536   2615 my ($self, $args) = @_;
202 1536 100       2900 if ($self->check_filenames($args->{relfilename})) {
203 1393 100       4378 confess "duplicate entry" if $self->{archive_h}{$args->{archive_id}};
204 1392         3594 $self->{archive_h}{$args->{archive_id}} = $args;
205             }
206             }
207              
208             sub _delete_archive
209             {
210 147     147   5205 my ($self, $archive_id, $relfilename) = @_;
211 147 100 100     920 confess unless defined $archive_id && defined $relfilename;
212              
213 145 100       427 if ($self->{archive_h}{$archive_id}) {
214 142         589 delete $self->{archive_h}{$archive_id}; # archive_id found, deleting it without checking filename filter
215             } else {
216 3 100       10 confess "archive $archive_id not found in archive_h" # not found - confess, unless it's excluded by filter
217             if $self->check_filenames($relfilename); # TODO: put it to backlog, process later?
218             }
219             }
220              
221             sub _add_filename
222             {
223 1064     1064   8782 my ($self, $args) = @_;
224 1064         1080 my $relfilename = $args->{relfilename};
225 1064 100       1473 if ($self->{journal_h}{$relfilename}) {
226 20 100       69 if (ref $self->{journal_h}{$relfilename} eq ref {}) {
227 13         76 my $v = App::MtAws::FileVersions->new();
228 13         50 $v->add($self->{journal_h}{$relfilename});
229 13         27 $v->add($args);
230 13         33 $self->{journal_h}{$relfilename} = $v;
231             } else {
232 7         24 $self->{journal_h}{$relfilename}->add($args);
233             }
234             } else {
235 1044         2471 $self->{journal_h}{$relfilename} = $args
236             }
237             }
238              
239             sub _index_archives_as_files
240             {
241 428     428   8435 my ($self) = @_;
242 428         490 $self->_add_filename($_) for (values %{$self->{archive_h}});
  428         2137  
243             }
244              
245             sub _retrieve_job
246             {
247 24     24   170 my ($self, $time, $archive_id, $job_id) = @_;
248 24 100 100     103 if ($self->{use_active_retrievals} && $self->{last_read_time} - $time < 24*60*60) { # data is available for appx. 24+4 hours. but we assume 24 hours
249 21         18 my $r = $self->{active_retrievals};
250 21 100 100     73 if (!$r->{$archive_id} || $r->{$archive_id}->{time} < $time ) {
251 18         71 $self->{active_retrievals}->{$archive_id} = { time => $time, job_id => $job_id };
252             }
253             }
254             }
255              
256             sub latest
257             {
258 139     139 0 159 my ($self, $relfilename) = @_;
259 139 100       507 my $e = $self->{journal_h}{$relfilename} or confess "$relfilename not found in journal";
260 138 100       483 (ref $e eq ref {}) ? $e : $e->latest();
261             }
262              
263             #
264             # Wrting journal
265             #
266              
267             sub add_entry
268             {
269 47     47 0 946 my ($self, $e) = @_;
270              
271 47 50       108 confess unless $self->{output_version} eq 'B';
272              
273             # TODO: time should be ascending?
274              
275 47 100       84 if ($e->{type} eq 'CREATED') {
    100          
    50          
276             #" CREATED $archive_id $data->{filesize} $data->{final_hash} $data->{relfilename}"
277 45   33     250 defined( $e->{$_} ) || confess "bad $_" for (qw/time archive_id size treehash relfilename/);
278 45 50       109 confess "invalid filename" unless is_relative_filename($e->{relfilename});
279 45 100       93 my $mtime = defined($e->{mtime}) ? $e->{mtime} : 'NONE';
280 45         197 $self->_write_line("B\t$e->{time}\tCREATED\t$e->{archive_id}\t$e->{size}\t$mtime\t$e->{treehash}\t$e->{relfilename}");
281             } elsif ($e->{type} eq 'DELETED') {
282             # DELETED $data->{archive_id} $data->{relfilename}
283 1   33     7 defined( $e->{$_} ) || confess "bad $_" for (qw/archive_id relfilename/);
284 1 50       3 confess "invalid filename" unless is_relative_filename($e->{relfilename});
285 1         6 $self->_write_line("B\t$e->{time}\tDELETED\t$e->{archive_id}\t$e->{relfilename}");
286             } elsif ($e->{type} eq 'RETRIEVE_JOB') {
287             # RETRIEVE_JOB $data->{archive_id}
288 1   33     6 defined( $e->{$_} ) || confess "bad $_" for (qw/archive_id job_id/);
289 1         5 $self->_write_line("B\t$e->{time}\tRETRIEVE_JOB\t$e->{archive_id}\t$e->{job_id}");
290             } else {
291 0         0 confess "Unexpected else";
292             }
293             }
294              
295             sub _write_line
296             {
297 47     47   173 my ($self, $line) = @_;
298 47 100       234 confess unless $self->{append_file};
299 46 50       38 confess unless print { $self->{append_file} } $line."\n";
  46         1231  
300             # TODO: fsync()
301             }
302              
303             #
304             # Reading file listing
305             #
306              
307              
308             sub read_files
309             {
310 240     240 0 10672 my ($self, $mode, $max_number_of_files) = @_;
311              
312 240         889 my %checkmode = %$mode;
313 240   66     1922 defined $checkmode{$_} && delete $checkmode{$_} for qw/new existing missing/;
314 240 50       675 confess "Unknown mode: ".join(';', keys %checkmode) if %checkmode;
315              
316 240 50       771 confess unless defined($self->{root_dir});
317              
318 240 100       753 my %missing = $mode->{'missing'} ? %{$self->{journal_h}} : ();
  23         88  
319              
320 240         1164 $self->{listing} = { new => [], existing => [], missing => [] };
321 240         380 my $i = 0;
322             # TODO: find better workaround than "-s"
323 240         370 $File::Find::prune = 0;
324 240         539 $File::Find::dont_use_nlink = !$self->{leaf_optimization};
325              
326             File::Find::find({ wanted => sub {
327 2760 100   2760   34354 if ($self->_listing_exceeed_max_number_of_files($max_number_of_files)) {
328 3         3 $File::Find::prune = 1;
329 3         3 return;
330             }
331              
332 2757 50       5499 if (++$i % 1000 == 0) {
333 0         0 print "Found $i local files\n";
334             }
335              
336             # note that this exception is probably thrown even if a directory below transfer root contains invalid chars
337 2757 100       7711 die exception(invalid_chars_filename => "Not allowed characters in filename: %filename%", filename => hex_dump_string($_))
338             if /[\r\n\t]/;
339              
340 2754 100       31718 if (-d) {
341 1042         1737 my $dir = character_filename($_);
342 1042         2023 $dir =~ s!/$!!; # make sure there is no trailing slash. just in case.
343 1042         2690 my $reldir = abs2rel($dir, $self->{root_dir}, allow_rel_base => 1);
344 1042 100 100     69486 if ($self->{filter} && $reldir ne '.') {
345 600         2831 my ($match, $matchsubdirs) = $self->{filter}->check_dir($reldir."/");
346 600 100 66     36479 if (!$match && $matchsubdirs) {
347 120         1172 $File::Find::prune = 1;
348             }
349             }
350             } else {
351             # file can be not existing here (i.e. dangling symlink)
352 1712         3205 my $filename = character_filename(my $binaryfilename = $_);
353 1711         4547 my $orig_relfilename = abs2rel($filename, $self->{root_dir}, allow_rel_base => 1);
354 1711 100       44001 if ($self->check_filenames($orig_relfilename)) {
355 1443 100       2549 if ($self->_is_file_exists($binaryfilename)) {
356 1413         2029 my $relfilename;
357 1413 50       3276 confess "Invalid filename: ".hex_dump_string($orig_relfilename)
358             unless defined($relfilename = sanity_relative_filename($orig_relfilename));
359 1413 100       2937 if (my $use_mode = $self->_can_read_filename_for_mode($orig_relfilename, $mode)) {
360 1120         995 push @{$self->{listing}{$use_mode}}, { relfilename => $relfilename }; # TODO: we can reduce memory usage even more. we don't need hash here probably??
  1120         4112  
361             }
362 1413 100       32431 delete $missing{$relfilename} if ($mode->{missing});
363             }
364             }
365             }
366 240 100       3195 }, no_chdir => 1, $self->{follow} ? (follow => 1, follow_skip => 2) : () }, (binaryfilename($self->{root_dir})));
367              
368 236 100 100     4522 if ($mode->{missing} && !$self->_listing_exceeed_max_number_of_files($max_number_of_files)) {
369 22         82 for (keys %missing) {
370 46 100       175 unless ($self->_is_file_exists(binaryfilename $self->absfilename($_))) {
371 42         721 push @{$self->{listing}{missing}}, { relfilename => $_ };
  42         108  
372 42 100       63 last if $self->_listing_exceeed_max_number_of_files($max_number_of_files);
373             }
374             }
375             }
376             }
377              
378             sub _listing_exceeed_max_number_of_files
379             {
380 2825     2825   3114 my ($self, $max_number_of_files) = @_;
381             ($max_number_of_files && (
382             (
383 98         145 (scalar @{$self->{listing}{new}}) +
384 98         154 (scalar @{$self->{listing}{existing}}) +
385 2825 100       8370 (scalar @{$self->{listing}{missing}})
  98         461  
386             ) >= $max_number_of_files)
387             );
388             }
389              
390             sub character_filename
391             {
392 2754     2754 0 3252 my ($binaryfilename) = @_;
393 2754         2119 my $filename;
394 2754         5891 my $enc = get_filename_encoding();
395             die exception invalid_octets_filename => "Invalid octets in filename, does not map to desired encoding %string enc%: %filename%",
396             enc => $enc, filename => hex_dump_string($binaryfilename),
397 2754 100       3278 unless (defined($filename = eval { decode($enc, $binaryfilename, Encode::DIE_ON_ERR|Encode::LEAVE_SRC) }));
  2754         10335  
398 2753         79040 $filename;
399             }
400              
401             sub _is_file_exists
402             {
403 1278     1278   1755 my ($self, $filename) = @_;
404 1278 100       38443 (-f $filename) && (-s $filename);
405             }
406              
407             sub absfilename
408             {
409 1221     1221 0 2285090 my ($self, $relfilename) = @_;
410 1221 50       3579 confess unless defined($self->{canon_root_dir});
411              
412             # Originally it was: File::Spec->rel2abs($relfilename, $self->{root_dir});
413              
414             # TODO: maybe add File::Spec->canonpath() and fix absfilename_correct=./dirA/file3 ?
415 1221         8201 $self->{canon_root_dir}.$relfilename;
416             }
417              
418              
419             sub _can_read_filename_for_mode
420             {
421 1414     1414   1861 my ($self, $relfilename, $mode) = @_;
422              
423 1414 100       2865 if (defined($self->{journal_h}->{$relfilename})) {
424 521 100       1102 if ($mode->{existing}) {
    100          
425 279         755 return 'existing';
426             } elsif ($mode->{new}) { # AND not $mode->{existing}
427 235         4625 print "Skip $relfilename\n";
428 235         841 return 0;
429             } else {
430 7         17 return 0;
431             }
432             } else {
433 893 100       1466 if ($mode->{new}) {
    100          
434 845         2147 return 'new';
435             } elsif ($mode->{existing}) { # AND not $mode->{new}
436 39         1016 print "Not exists $relfilename\n";
437 39         157 return 0;
438             } else {
439 9         21 return 0;
440             }
441             }
442             }
443              
444             sub check_filenames
445             {
446 3250     3250 0 3677 my $self = shift;
447 3250 100       14073 !$self->{filter} || $self->{filter}->check_filenames(@_)
448             }
449              
450             1;