line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# mt-aws-glacier - Amazon Glacier sync client |
2
|
|
|
|
|
|
|
# Copyright (C) 2012-2014 Victor Efimov |
3
|
|
|
|
|
|
|
# http://mt-aws.com (also http://vs-dev.com) vs@vs-dev.com |
4
|
|
|
|
|
|
|
# License: GPLv3 |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# This file is part of "mt-aws-glacier" |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# mt-aws-glacier is free software: you can redistribute it and/or modify |
9
|
|
|
|
|
|
|
# it under the terms of the GNU General Public License as published by |
10
|
|
|
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or |
11
|
|
|
|
|
|
|
# (at your option) any later version. |
12
|
|
|
|
|
|
|
# |
13
|
|
|
|
|
|
|
# mt-aws-glacier is distributed in the hope that it will be useful, |
14
|
|
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
15
|
|
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16
|
|
|
|
|
|
|
# GNU General Public License for more details. |
17
|
|
|
|
|
|
|
# |
18
|
|
|
|
|
|
|
# You should have received a copy of the GNU General Public License |
19
|
|
|
|
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>. |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=pod |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
(I) |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
filter, include, exclude options allow you to construct a list of RULES to select only certain files for the operation. |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
(II) |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
--filter |
31
|
|
|
|
|
|
|
Adds one or several RULES to the list of rules. |
32
|
|
|
|
|
|
|
One filter value can contain multiple rules, it has same effect as multiple filter values with one RULE each. |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
--filter='RULE1 RULE2' --filter 'RULE3' |
35
|
|
|
|
|
|
|
is same as |
36
|
|
|
|
|
|
|
--filter 'RULE1 RULE2 RULE3' |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
RULES: [+-]PATTERN [+-]PATTERN ... |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
RULES should be a sequence of PATTERNS, prepended with '+' or '-' and separated by a spaces. |
41
|
|
|
|
|
|
|
There can be a space between '+'/'-' and PATTERN. |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
'+' means INCLUDE PATTERN, '-' means EXCLUDE PATTERN |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
Note: If RULES contain spaces or wildcards, you must quote it |
46
|
|
|
|
|
|
|
Note: although, PATTERN can contain spaces, you cannot use if, because RULES separated by a space(s). |
47
|
|
|
|
|
|
|
Note: PATTERN can be empty |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
--include=PATTERN |
51
|
|
|
|
|
|
|
Adds an INCLUDE PATTERN to list of rules |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
--exclude=PATTERN |
54
|
|
|
|
|
|
|
Adds an EXCLUDE PATTERN to list of rules |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
Note: You can use spaces in PATTERNs here |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
(III) |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
PATTERN: |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
1) if the pattern starts with a / then it is anchored to a particular spot in the hierarchy of files, otherwise it is matched against the final |
63
|
|
|
|
|
|
|
component of the filename. |
64
|
|
|
|
|
|
|
2) if the pattern ends with a / then it will only match a directory and all files/subdirectories inside this directory. It won't match regular file. |
65
|
|
|
|
|
|
|
Note that if directory is empty, it won't be synchronized to Amazon Glacier, as it does not support directories |
66
|
|
|
|
|
|
|
3) if pattern does not end with a '/', it won't match directory (directories are not supported by Amazon Glacier, so it has no sense to match a directory |
67
|
|
|
|
|
|
|
without subdirectories). However if, in future versions we find a way to store empty directories in glacier, this behaviour could change. |
68
|
|
|
|
|
|
|
4) Wildcard '*' matches any path component, but it stops at slashes. |
69
|
|
|
|
|
|
|
5) Wildcard '**' matches anything, including slashes. |
70
|
|
|
|
|
|
|
6) When wildcard '**' meant to be a separated path component (i.e. surrounded with slashes/beginning of line/end of line), it matches 0 or more subdirectories |
71
|
|
|
|
|
|
|
7) Wildcard '?' matches any character except a slash (/). |
72
|
|
|
|
|
|
|
8) if the pattern contains a / (not counting a trailing /) then it is matched against the full pathname, including any leading directories. |
73
|
|
|
|
|
|
|
Otherwise it is matched only against the final component of the filename. |
74
|
|
|
|
|
|
|
9) if PATTERN is empty, it matches anything. |
75
|
|
|
|
|
|
|
10) If PATTERN is started with '!' it only match when rest of pattern (i.e. without '!') does not match. |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
(IV) |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
How rules are processed: |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
1) A filename is checked agains all rules in the list. Once filename match PATTERN, file is included or excluded depending of what kind of PATTERN matched. |
82
|
|
|
|
|
|
|
No other rules checked after first match. |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
2) When traverse directory tree, unlike Rsync, if a directory (and all subdirectories) match exclude pattern, process is not stopped. So |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
--filter '+/tmp/data/a/b/c -/tmp/data -' will work (it will match /tmp/data/a/b/c) |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
3) In some cases, to reduce disk IO, directory traversal into excluded directory can be stopped. |
89
|
|
|
|
|
|
|
This only can happen when mtgalcier absolutely sure that it won't break (2) behaviour. |
90
|
|
|
|
|
|
|
It's guaraneed that traversal stop only in case when |
91
|
|
|
|
|
|
|
a) directory match EXCLUDE rule without '!' prefix, ending with '/' or '**', or empty rule |
92
|
|
|
|
|
|
|
"dir/" |
93
|
|
|
|
|
|
|
"/some/dir/" |
94
|
|
|
|
|
|
|
"prefix** |
95
|
|
|
|
|
|
|
"/some/dir/prefix** |
96
|
|
|
|
|
|
|
b) AND there is no INCLUDE rules before this exclude RULE |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
4) When we process both local files and Journal filelist (sync, restore commands), rule applied to BOTH sides. |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=cut |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
package App::MtAws::Filter; |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
our $VERSION = '1.114_2'; |
105
|
|
|
|
|
|
|
|
106
|
113
|
|
|
113
|
|
588
|
use strict; |
|
113
|
|
|
|
|
178
|
|
|
113
|
|
|
|
|
3721
|
|
107
|
113
|
|
|
113
|
|
544
|
use warnings; |
|
113
|
|
|
|
|
184
|
|
|
113
|
|
|
|
|
3662
|
|
108
|
113
|
|
|
113
|
|
518
|
use utf8; |
|
113
|
|
|
|
|
180
|
|
|
113
|
|
|
|
|
795
|
|
109
|
113
|
|
|
113
|
|
2457
|
use Carp; |
|
113
|
|
|
|
|
181
|
|
|
113
|
|
|
|
|
7588
|
|
110
|
|
|
|
|
|
|
|
111
|
113
|
|
|
113
|
|
573
|
use Exporter 'import'; |
|
113
|
|
|
|
|
179
|
|
|
113
|
|
|
|
|
118476
|
|
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
sub new |
115
|
|
|
|
|
|
|
{ |
116
|
472
|
|
|
472
|
0
|
703686
|
my ($class, %args) = @_; |
117
|
472
|
|
|
|
|
742
|
my $self = \%args; |
118
|
472
|
|
|
|
|
702
|
bless $self, $class; |
119
|
|
|
|
|
|
|
|
120
|
472
|
|
|
|
|
1256
|
$self->_init_substitutions( |
121
|
|
|
|
|
|
|
"\Q**\E" => '.*', |
122
|
|
|
|
|
|
|
"\Q/**/\E" => '(/|/.*/)', |
123
|
|
|
|
|
|
|
"\Q*\E" => '[^/]*', |
124
|
|
|
|
|
|
|
"\Q?\E" => '[^/]' |
125
|
|
|
|
|
|
|
); |
126
|
|
|
|
|
|
|
|
127
|
472
|
|
|
|
|
856
|
return $self; |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
sub check_filenames |
131
|
|
|
|
|
|
|
{ |
132
|
1909
|
|
|
1909
|
0
|
2773
|
my $self = shift; |
133
|
|
|
|
|
|
|
map { |
134
|
1909
|
|
|
|
|
2220
|
my ($res, $subdir) = $self->check_dir($_); |
|
1937
|
|
|
|
|
3020
|
|
135
|
1937
|
100
|
|
|
|
16509
|
$res ? $_ : (); |
136
|
|
|
|
|
|
|
} @_; |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
sub check_dir |
140
|
|
|
|
|
|
|
{ |
141
|
2551
|
|
|
2551
|
0
|
5886
|
my ($self, $dir) = @_; |
142
|
2551
|
|
|
|
|
2392
|
my $res = 1; # default action - include! |
143
|
2551
|
|
|
|
|
2155
|
my $match_subdirs = undef; |
144
|
2551
|
|
|
|
|
1977
|
for my $filter (@{$self->{filters}}) { |
|
2551
|
|
|
|
|
4720
|
|
145
|
6614
|
100
|
|
|
|
11746
|
$match_subdirs = 0 if ($filter->{action} eq '+'); # match_subdirs true only when we exclude this filename and we can to exclude all subdirs |
146
|
6614
|
100
|
|
|
|
35460
|
if ($filter->{notmatch} ? ("/$dir" !~ $filter->{re}) : ("/$dir" =~ $filter->{re})) { |
|
|
100
|
|
|
|
|
|
147
|
2546
|
|
|
|
|
3384
|
$res = !!($filter->{action} eq '+'); |
148
|
2546
|
100
|
|
|
|
4901
|
$match_subdirs = $filter->{match_subdirs} unless defined $match_subdirs; |
149
|
2546
|
|
|
|
|
2919
|
last; |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
} |
152
|
2551
|
|
|
|
|
6217
|
return $res, $match_subdirs; |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
sub parse_filters |
156
|
|
|
|
|
|
|
{ |
157
|
146
|
|
|
146
|
0
|
384
|
my $self = shift; |
158
|
146
|
|
|
|
|
455
|
my @patterns = $self->_filters_to_pattern(@_); |
159
|
146
|
100
|
|
|
|
371
|
return unless @patterns; |
160
|
131
|
|
|
|
|
397
|
my @res = $self->_patterns_to_regexp(@patterns); |
161
|
131
|
|
|
|
|
167
|
push @{$self->{filters}}, @res; |
|
131
|
|
|
|
|
677
|
|
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
sub parse_include |
165
|
|
|
|
|
|
|
{ |
166
|
32
|
|
|
32
|
0
|
46
|
my $self = shift; |
167
|
32
|
|
|
|
|
128
|
my @res = $self->_patterns_to_regexp({ pattern => shift(), action => '+'}); |
168
|
32
|
|
|
|
|
68
|
push @{$self->{filters}}, @res; |
|
32
|
|
|
|
|
123
|
|
169
|
|
|
|
|
|
|
} |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
sub parse_exclude |
172
|
|
|
|
|
|
|
{ |
173
|
37
|
|
|
37
|
0
|
55
|
my $self = shift; |
174
|
37
|
|
|
|
|
163
|
my @res = $self->_patterns_to_regexp({ pattern => shift(), action => '-'}); |
175
|
37
|
|
|
|
|
79
|
push @{$self->{filters}}, @res; |
|
37
|
|
|
|
|
154
|
|
176
|
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
sub _filters_to_pattern |
179
|
|
|
|
|
|
|
{ |
180
|
384
|
|
|
384
|
|
45740
|
my $self = shift; |
181
|
|
|
|
|
|
|
map { # for each +/-PATTERN |
182
|
|
|
|
|
|
|
# this will return arrayref with two elements: first + or -, second: the PATTERN |
183
|
795
|
50
|
|
|
|
2787
|
/^\s*([+-])\s*(\S*)\s*$/ or confess "[$_]"; |
184
|
795
|
|
|
|
|
3498
|
{ action => $1, pattern => $2 } |
185
|
|
|
|
|
|
|
} map { # for each of filter arguments |
186
|
384
|
|
|
|
|
1456
|
my @parsed = /\G(\s*[+-]\s*\S*\s*)/g; |
|
481
|
|
|
|
|
2968
|
|
187
|
481
|
100
|
|
|
|
1398
|
$self->{error} = $_, return unless @parsed; # regexp does not match |
188
|
474
|
100
|
|
|
|
1544
|
$self->{error} = $', return if length($') > 0; # not all of the string parsed |
189
|
462
|
|
|
|
|
1167
|
@parsed; # we can return multiple +/-PATTERNS for each filter argument |
190
|
|
|
|
|
|
|
} @_; |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
sub _init_substitutions |
194
|
|
|
|
|
|
|
{ |
195
|
474
|
|
|
474
|
|
3248
|
my $self = shift; |
196
|
|
|
|
|
|
|
|
197
|
474
|
|
|
|
|
1579
|
my %subst = @_; # we treat args as hash |
198
|
|
|
|
|
|
|
|
199
|
474
|
|
|
|
|
457
|
my (@all); |
200
|
474
|
|
|
|
|
1842
|
while (my ($k, undef) = splice @_, 0, 2) { push @all, $k }; # but now we treat args as array |
|
1891
|
|
|
|
|
3766
|
|
201
|
|
|
|
|
|
|
|
202
|
474
|
|
|
|
|
814
|
$self->{all_re} = '('.join('|', map { quotemeta } @all ).')'; |
|
1891
|
|
|
|
|
3923
|
|
203
|
474
|
|
|
|
|
1323
|
$self->{subst} = \%subst; |
204
|
|
|
|
|
|
|
} |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
sub _pattern_to_regexp |
207
|
|
|
|
|
|
|
{ |
208
|
410
|
|
|
410
|
|
540
|
my ($self, $pattern) = @_; |
209
|
410
|
|
|
|
|
644
|
my $notmatch = ($pattern =~ /^!/); |
210
|
410
|
100
|
|
|
|
826
|
$pattern =~ s/^!// if $notmatch; # TODO: optimize |
211
|
410
|
50
|
|
|
|
685
|
confess unless defined $pattern; |
212
|
410
|
100
|
|
|
|
1300
|
return match_subdirs => !$notmatch, re => qr/.*/, notmatch => $notmatch unless length($pattern); |
213
|
|
|
|
|
|
|
|
214
|
340
|
|
|
|
|
443
|
my $re = quotemeta $pattern; |
215
|
340
|
|
|
|
|
2154
|
$re =~ s!$self->{all_re}!$self->{subst}->{$&}!ge; |
|
242
|
|
|
|
|
1027
|
|
216
|
340
|
100
|
|
|
|
1138
|
$re = ($pattern =~ m!(/.)!) ? "^/?$re" : "(^|/)$re"; |
217
|
340
|
100
|
|
|
|
921
|
$re .= '$' unless ($pattern =~ m!/$!); |
218
|
340
|
|
100
|
|
|
10883
|
return match_subdirs => $pattern =~ m!(^|/|\*\*)$! && !$notmatch, re => qr/$re/, notmatch => $notmatch; |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
sub _patterns_to_regexp |
222
|
|
|
|
|
|
|
{ |
223
|
277
|
|
|
277
|
|
523
|
my $self = shift; |
224
|
|
|
|
|
|
|
# of course order of regexps is important |
225
|
|
|
|
|
|
|
# how regexps works: |
226
|
|
|
|
|
|
|
# http://perldoc.perl.org/perlretut.html#Grouping-things-and-hierarchical-matching |
227
|
|
|
|
|
|
|
map { |
228
|
277
|
|
|
|
|
405
|
{ (%$_, $self->_pattern_to_regexp($_->{pattern})) }; |
|
410
|
|
|
|
|
1336
|
|
229
|
|
|
|
|
|
|
} @_; |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
1; |