line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package MogileFS::Worker::Fsck; |
2
|
|
|
|
|
|
|
|
3
|
21
|
|
|
21
|
|
130
|
use strict; |
|
21
|
|
|
|
|
36
|
|
|
21
|
|
|
|
|
538
|
|
4
|
21
|
|
|
21
|
|
85
|
use base 'MogileFS::Worker'; |
|
21
|
|
|
|
|
35
|
|
|
21
|
|
|
|
|
2169
|
|
5
|
|
|
|
|
|
|
use fields ( |
6
|
21
|
|
|
|
|
89
|
'opt_nostat', # bool: do we trust mogstoreds? skipping size stats? |
7
|
|
|
|
|
|
|
'opt_checksum', # (class|off|MD5) checksum mode |
8
|
21
|
|
|
21
|
|
109
|
); |
|
21
|
|
|
|
|
32
|
|
9
|
21
|
|
|
21
|
|
1183
|
use MogileFS::Util qw(every error debug); |
|
21
|
|
|
|
|
44
|
|
|
21
|
|
|
|
|
900
|
|
10
|
21
|
|
|
21
|
|
97
|
use MogileFS::Config; |
|
21
|
|
|
|
|
34
|
|
|
21
|
|
|
|
|
1741
|
|
11
|
21
|
|
|
21
|
|
118
|
use MogileFS::Server; |
|
21
|
|
|
|
|
29
|
|
|
21
|
|
|
|
|
382
|
|
12
|
21
|
|
|
21
|
|
78
|
use List::Util (); |
|
21
|
|
|
|
|
38
|
|
|
21
|
|
|
|
|
378
|
|
13
|
21
|
|
|
21
|
|
97
|
use Time::HiRes (); |
|
21
|
|
|
|
|
29
|
|
|
21
|
|
|
|
|
469
|
|
14
|
|
|
|
|
|
|
|
15
|
21
|
|
|
21
|
|
88
|
use constant SUCCESS => 0; |
|
21
|
|
|
|
|
32
|
|
|
21
|
|
|
|
|
932
|
|
16
|
21
|
|
|
21
|
|
113
|
use constant TEMPORARY => 1; |
|
21
|
|
|
|
|
32
|
|
|
21
|
|
|
|
|
829
|
|
17
|
21
|
|
|
21
|
|
105
|
use constant PERMANENT => 2; |
|
21
|
|
|
|
|
48
|
|
|
21
|
|
|
|
|
884
|
|
18
|
21
|
|
|
21
|
|
101
|
use constant REPLICATE => 3; |
|
21
|
|
|
|
|
27
|
|
|
21
|
|
|
|
|
975
|
|
19
|
|
|
|
|
|
|
|
20
|
21
|
|
|
21
|
|
99
|
use constant EV_NO_PATHS => "NOPA"; |
|
21
|
|
|
|
|
37
|
|
|
21
|
|
|
|
|
806
|
|
21
|
21
|
|
|
21
|
|
93
|
use constant EV_POLICY_VIOLATION => "POVI"; |
|
21
|
|
|
|
|
39
|
|
|
21
|
|
|
|
|
787
|
|
22
|
21
|
|
|
21
|
|
116
|
use constant EV_FILE_MISSING => "MISS"; |
|
21
|
|
|
|
|
30
|
|
|
21
|
|
|
|
|
840
|
|
23
|
21
|
|
|
21
|
|
109
|
use constant EV_BAD_LENGTH => "BLEN"; |
|
21
|
|
|
|
|
32
|
|
|
21
|
|
|
|
|
768
|
|
24
|
21
|
|
|
21
|
|
93
|
use constant EV_CANT_FIX => "GONE"; |
|
21
|
|
|
|
|
33
|
|
|
21
|
|
|
|
|
804
|
|
25
|
21
|
|
|
21
|
|
95
|
use constant EV_START_SEARCH => "SRCH"; |
|
21
|
|
|
|
|
33
|
|
|
21
|
|
|
|
|
880
|
|
26
|
21
|
|
|
21
|
|
99
|
use constant EV_FOUND_FID => "FOND"; |
|
21
|
|
|
|
|
35
|
|
|
21
|
|
|
|
|
808
|
|
27
|
21
|
|
|
21
|
|
93
|
use constant EV_RE_REPLICATE => "REPL"; |
|
21
|
|
|
|
|
30
|
|
|
21
|
|
|
|
|
865
|
|
28
|
21
|
|
|
21
|
|
105
|
use constant EV_BAD_COUNT => "BCNT"; |
|
21
|
|
|
|
|
28
|
|
|
21
|
|
|
|
|
825
|
|
29
|
21
|
|
|
21
|
|
101
|
use constant EV_BAD_CHECKSUM => "BSUM"; |
|
21
|
|
|
|
|
40
|
|
|
21
|
|
|
|
|
953
|
|
30
|
21
|
|
|
21
|
|
108
|
use constant EV_NO_CHECKSUM => "NSUM"; |
|
21
|
|
|
|
|
30
|
|
|
21
|
|
|
|
|
832
|
|
31
|
21
|
|
|
21
|
|
105
|
use constant EV_MULTI_CHECKSUM => "MSUM"; |
|
21
|
|
|
|
|
29
|
|
|
21
|
|
|
|
|
783
|
|
32
|
21
|
|
|
21
|
|
93
|
use constant EV_BAD_HASHTYPE => "BALG"; |
|
21
|
|
|
|
|
29
|
|
|
21
|
|
|
|
|
712
|
|
33
|
|
|
|
|
|
|
|
34
|
21
|
|
|
21
|
|
104
|
use POSIX (); |
|
21
|
|
|
|
|
43
|
|
|
21
|
|
|
|
|
6748
|
|
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
my $nowish; # approximate unixtime, updated once per loop. |
37
|
|
|
|
|
|
|
|
38
|
0
|
|
|
0
|
0
|
|
sub watchdog_timeout { 120 } |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
sub work { |
41
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
# this can be CPU-intensive. let's nice ourselves down. |
44
|
0
|
|
|
|
|
|
POSIX::nice(10); |
45
|
|
|
|
|
|
|
|
46
|
0
|
|
|
|
|
|
my $sto = Mgd::get_store(); |
47
|
0
|
|
|
|
|
|
my $max_checked = 0; |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
every(2.0, sub { |
50
|
0
|
|
|
0
|
|
|
my $sleep_set = shift; |
51
|
0
|
|
|
|
|
|
$nowish = time(); |
52
|
0
|
|
|
|
|
|
local $Mgd::nowish = $nowish; |
53
|
|
|
|
|
|
|
|
54
|
0
|
|
|
|
|
|
my $queue_todo = $self->queue_todo('fsck'); |
55
|
|
|
|
|
|
|
# This counts the same as a $self->still_alive; |
56
|
0
|
|
|
|
|
|
$self->send_to_parent('worker_bored 50 fsck'); |
57
|
0
|
0
|
|
|
|
|
return unless @{$queue_todo}; |
|
0
|
|
|
|
|
|
|
58
|
0
|
0
|
|
|
|
|
return unless $self->validate_dbh; |
59
|
|
|
|
|
|
|
|
60
|
0
|
|
|
|
|
|
my @fids = (); |
61
|
0
|
|
|
|
|
|
while (my $todo = shift @{$queue_todo}) { |
|
0
|
|
|
|
|
|
|
62
|
0
|
|
|
|
|
|
my $fid = MogileFS::FID->new($todo->{fid}); |
63
|
0
|
0
|
|
|
|
|
if ($fid->exists) { |
64
|
0
|
|
|
|
|
|
push(@fids, $fid); |
65
|
|
|
|
|
|
|
} else { |
66
|
|
|
|
|
|
|
# FID stopped existing before being checked. |
67
|
0
|
|
|
|
|
|
$sto->delete_fid_from_file_to_queue($fid->id, FSCK_QUEUE); |
68
|
|
|
|
|
|
|
} |
69
|
|
|
|
|
|
|
} |
70
|
0
|
0
|
|
|
|
|
return unless @fids; |
71
|
|
|
|
|
|
|
|
72
|
0
|
|
0
|
|
|
|
$self->{opt_nostat} = MogileFS::Config->server_setting('fsck_opt_policy_only') || 0; |
73
|
0
|
|
|
|
|
|
my $alg = MogileFS::Config->server_setting_cached("fsck_checksum"); |
74
|
0
|
0
|
0
|
|
|
|
if (defined($alg) && $alg eq "off") { |
75
|
0
|
|
|
|
|
|
$self->{opt_checksum} = "off"; |
76
|
|
|
|
|
|
|
} else { |
77
|
0
|
0
|
|
|
|
|
$self->{opt_checksum} = MogileFS::Checksum->valid_alg($alg) ? $alg : 0; |
78
|
|
|
|
|
|
|
} |
79
|
0
|
|
|
|
|
|
MogileFS::FID->mass_load_devids(@fids); |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
# don't sleep in loop, next round, since we found stuff to work on |
82
|
|
|
|
|
|
|
# this round... |
83
|
0
|
|
|
|
|
|
$sleep_set->(0); |
84
|
|
|
|
|
|
|
|
85
|
0
|
|
|
|
|
|
my $new_max; |
86
|
0
|
|
|
|
|
|
my $hit_problem = 0; |
87
|
|
|
|
|
|
|
|
88
|
0
|
|
|
|
|
|
foreach my $fid (@fids) { |
89
|
0
|
0
|
|
|
|
|
if (!$self->check_fid($fid)) { |
90
|
|
|
|
|
|
|
# some connectivity problem... retry this fid later. |
91
|
|
|
|
|
|
|
# (don't dequeue it) |
92
|
0
|
|
|
|
|
|
$self->still_alive; |
93
|
0
|
|
|
|
|
|
next; |
94
|
|
|
|
|
|
|
} |
95
|
0
|
|
|
|
|
|
$sto->delete_fid_from_file_to_queue($fid->id, FSCK_QUEUE); |
96
|
|
|
|
|
|
|
} |
97
|
0
|
|
|
|
|
|
}); |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
# given a $fid (MogileFS::FID, with pre-populated ->devids data) |
101
|
|
|
|
|
|
|
# return 0 if reachability problems. |
102
|
|
|
|
|
|
|
# return 1 if fid was checked (regardless of there being problems or not) |
103
|
|
|
|
|
|
|
# if no problems, no action. |
104
|
|
|
|
|
|
|
# if problems, log & enqueue fixes |
105
|
21
|
|
|
21
|
|
129
|
use constant STALLED => 0; |
|
21
|
|
|
|
|
32
|
|
|
21
|
|
|
|
|
902
|
|
106
|
21
|
|
|
21
|
|
102
|
use constant HANDLED => 1; |
|
21
|
|
|
|
|
34
|
|
|
21
|
|
|
|
|
13830
|
|
107
|
|
|
|
|
|
|
sub check_fid { |
108
|
0
|
|
|
0
|
0
|
|
my ($self, $fid) = @_; |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
my $fix = sub { |
111
|
0
|
|
|
0
|
|
|
my ($reason, $recheck) = @_; |
112
|
0
|
|
|
|
|
|
my $fixed; |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
# we cached devids without locking for the fast path, |
115
|
|
|
|
|
|
|
# ensure we get an up-to-date list in the slow path. |
116
|
0
|
|
|
|
|
|
$fid->forget_cached_devids; |
117
|
|
|
|
|
|
|
|
118
|
0
|
|
|
|
|
|
my $sto = Mgd::get_store(); |
119
|
0
|
0
|
|
|
|
|
unless ($sto->should_begin_replicating_fidid($fid->id)) { |
120
|
0
|
|
|
|
|
|
error("Fsck stalled for fid $fid: failed to acquire lock"); |
121
|
0
|
|
|
|
|
|
return STALLED; |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
0
|
0
|
|
|
|
|
unless ($fid->exists) { |
125
|
|
|
|
|
|
|
# FID stopped existing while doing (or waiting on) |
126
|
|
|
|
|
|
|
# the fast check, give up on this fid |
127
|
0
|
|
|
|
|
|
$sto->note_done_replicating($fid->id); |
128
|
0
|
|
|
|
|
|
return HANDLED; |
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
# we may have a lockless check which failed, retry the check |
132
|
|
|
|
|
|
|
# with the lock and see if it succeeds here: |
133
|
0
|
0
|
|
|
|
|
if ($recheck) { |
134
|
0
|
|
|
|
|
|
$fixed = $recheck->(); |
135
|
0
|
0
|
|
|
|
|
if (!$fixed) { |
136
|
0
|
|
|
|
|
|
$fid->fsck_log($reason); |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
} |
139
|
|
|
|
|
|
|
|
140
|
0
|
|
0
|
|
|
|
$fixed ||= eval { $self->fix_fid($fid) }; |
|
0
|
|
|
|
|
|
|
141
|
0
|
|
|
|
|
|
my $err = $@; |
142
|
0
|
|
|
|
|
|
$sto->note_done_replicating($fid->id); |
143
|
0
|
0
|
|
|
|
|
if (! defined $fixed) { |
144
|
0
|
|
|
|
|
|
error("Fsck stalled for fid $fid: $err"); |
145
|
0
|
|
|
|
|
|
return STALLED; |
146
|
|
|
|
|
|
|
} |
147
|
0
|
0
|
|
|
|
|
$fid->fsck_log(EV_CANT_FIX) if ! $fixed; |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
# that might've all taken awhile, let's update our approximate time |
150
|
0
|
|
|
|
|
|
$nowish = $self->still_alive; |
151
|
0
|
|
|
|
|
|
return HANDLED; |
152
|
0
|
|
|
|
|
|
}; |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
# first obvious fucked-up case: no devids even presumed to exist. |
155
|
0
|
0
|
|
|
|
|
unless ($fid->devids) { |
156
|
|
|
|
|
|
|
# weird, recheck with a lock and then log it if it fails |
157
|
|
|
|
|
|
|
# and attempt a fix (which will do a search over all |
158
|
|
|
|
|
|
|
# devices as a last-ditch effort to locate it) |
159
|
0
|
|
|
0
|
|
|
return $fix->(EV_NO_PATHS, sub { $fid->devids }); |
|
0
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
# first, see if the assumed devids meet the replication policy for |
163
|
|
|
|
|
|
|
# the fid's class. |
164
|
0
|
0
|
|
|
|
|
unless ($fid->devids_meet_policy) { |
165
|
|
|
|
|
|
|
# recheck for policy violation under a lock, logging the violation |
166
|
|
|
|
|
|
|
# if we failed. |
167
|
0
|
|
|
0
|
|
|
return $fix->(EV_POLICY_VIOLATION, sub { $fid->devids_meet_policy }); |
|
0
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
# This is a simple fixup case |
171
|
|
|
|
|
|
|
# If we got here, we already know we have no policy violation and |
172
|
|
|
|
|
|
|
# don't need to call $fix->() to just fix a devcount |
173
|
0
|
|
|
|
|
|
$self->maybe_fix_devcount($fid); |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
# missing checksum row |
176
|
0
|
0
|
0
|
|
|
|
if ($fid->class->hashtype && ! $fid->checksum) { |
177
|
0
|
|
|
|
|
|
return $fix->(); |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
# in the fast case, do nothing else (don't check if assumed file |
181
|
|
|
|
|
|
|
# locations are actually there). in the fast case, all we do is |
182
|
|
|
|
|
|
|
# check the replication policy, which is already done, so finish. |
183
|
0
|
0
|
|
|
|
|
return HANDLED if $self->{opt_nostat}; |
184
|
|
|
|
|
|
|
|
185
|
0
|
0
|
0
|
|
|
|
if ($self->{opt_checksum} && $self->{opt_checksum} ne "off") { |
186
|
0
|
|
|
|
|
|
return $fix->(); |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
# stat each device to see if it's still there. on first problem, |
190
|
|
|
|
|
|
|
# stop and go into the slow(er) fix function. |
191
|
0
|
|
|
|
|
|
my $err; |
192
|
|
|
|
|
|
|
my $rv = $self->parallel_check_sizes([ $fid->devfids ], sub { |
193
|
0
|
|
|
0
|
|
|
my ($dfid, $disk_size) = @_; |
194
|
0
|
0
|
|
|
|
|
if (! defined $disk_size) { |
195
|
0
|
|
|
|
|
|
my $dev = $dfid->device; |
196
|
|
|
|
|
|
|
# We end up checking is_perm_dead twice, but that's the way the |
197
|
|
|
|
|
|
|
# flow goes... |
198
|
0
|
0
|
|
|
|
|
if ($dev->dstate->is_perm_dead) { |
199
|
0
|
|
|
|
|
|
$err = "needfix"; |
200
|
0
|
|
|
|
|
|
return 0; |
201
|
|
|
|
|
|
|
} |
202
|
0
|
|
|
|
|
|
error("Connectivity problem reaching device " . $dev->id . " on host " . $dev->host->ip . "\n"); |
203
|
0
|
|
|
|
|
|
$err = "stalled"; |
204
|
0
|
|
|
|
|
|
return 0; |
205
|
|
|
|
|
|
|
} |
206
|
0
|
0
|
|
|
|
|
return 1 if $disk_size == $fid->length; |
207
|
0
|
|
|
|
|
|
$err = "needfix"; |
208
|
|
|
|
|
|
|
# Note: not doing fsck_log, as fix_fid will log status for each device. |
209
|
0
|
|
|
|
|
|
return 0; |
210
|
0
|
|
|
|
|
|
}); |
211
|
|
|
|
|
|
|
|
212
|
0
|
0
|
|
|
|
|
if ($rv) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
213
|
0
|
0
|
0
|
|
|
|
return ($fid->class->hashtype && !($self->{opt_checksum} && $self->{opt_checksum} eq "off")) |
214
|
|
|
|
|
|
|
? $fix->() : HANDLED; |
215
|
|
|
|
|
|
|
} elsif ($err eq "stalled") { |
216
|
0
|
|
|
|
|
|
return STALLED; |
217
|
|
|
|
|
|
|
} elsif ($err eq "needfix") { |
218
|
0
|
|
|
|
|
|
return $fix->(); |
219
|
|
|
|
|
|
|
} else { |
220
|
0
|
|
|
|
|
|
die "Unknown error checking fid sizes in parallel.\n"; |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
# returns true if all size checks succeeded, false otherwise |
225
|
|
|
|
|
|
|
sub parallel_check_sizes { |
226
|
0
|
|
|
0
|
0
|
|
my ($self, $dflist, $cb) = @_; |
227
|
0
|
|
|
|
|
|
my $expect = scalar @$dflist; |
228
|
0
|
|
|
|
|
|
my ($good, $done) = (0, 0); |
229
|
|
|
|
|
|
|
|
230
|
0
|
|
|
|
|
|
foreach my $df (@$dflist) { |
231
|
|
|
|
|
|
|
$df->size_on_disk(sub { |
232
|
0
|
|
|
0
|
|
|
my ($size) = @_; |
233
|
0
|
|
|
|
|
|
$done++; |
234
|
0
|
0
|
|
|
|
|
if ($cb->($df, $size)) { |
235
|
0
|
|
|
|
|
|
$good++; |
236
|
|
|
|
|
|
|
} else { |
237
|
|
|
|
|
|
|
# use another timer to force PostLoopCallback to run |
238
|
0
|
|
|
|
|
|
Danga::Socket->AddTimer(0, sub { $self->still_alive }); |
|
0
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
} |
240
|
0
|
|
|
|
|
|
}); |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
|
243
|
0
|
|
|
0
|
|
|
Danga::Socket->SetPostLoopCallback(sub { $done != $expect }); |
|
0
|
|
|
|
|
|
|
244
|
0
|
|
|
|
|
|
Danga::Socket->EventLoop; |
245
|
|
|
|
|
|
|
|
246
|
0
|
|
|
|
|
|
return $good == $expect; |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
# this is the slow path. if something above in check_fid finds |
250
|
|
|
|
|
|
|
# something amiss in any way, we went the slow path on a fid and try |
251
|
|
|
|
|
|
|
# really hard to fix the situation. |
252
|
|
|
|
|
|
|
# |
253
|
|
|
|
|
|
|
# return true if situation handled, 0 if nothing could be done. |
254
|
|
|
|
|
|
|
# die on errors (like connectivity problems). |
255
|
21
|
|
|
21
|
|
131
|
use constant CANT_FIX => 0; |
|
21
|
|
|
|
|
41
|
|
|
21
|
|
|
|
|
25868
|
|
256
|
|
|
|
|
|
|
sub fix_fid { |
257
|
0
|
|
|
0
|
0
|
|
my ($self, $fid) = @_; |
258
|
0
|
|
|
|
|
|
debug(sprintf("Fixing FID %d", $fid->id)); |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
# make devfid objects from the devids that this fid is on, |
261
|
0
|
|
|
|
|
|
my @dfids = map { MogileFS::DevFID->new($_, $fid) } $fid->devids; |
|
0
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
# track all known good copies (dev objects), as well as all bad |
264
|
|
|
|
|
|
|
# copies (places it should've been, but isn't) |
265
|
0
|
|
|
|
|
|
my @good_devs; |
266
|
|
|
|
|
|
|
my @bad_devs; |
267
|
0
|
|
|
|
|
|
my %already_checked; # devid -> 1. |
268
|
0
|
|
0
|
|
|
|
my $alg = $fid->class->hashname || $self->{opt_checksum}; |
269
|
0
|
|
|
|
|
|
my $checksums = {}; |
270
|
0
|
|
|
0
|
|
|
my $ping_cb = sub { $self->still_alive }; |
|
0
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
my $check_dfids = sub { |
273
|
0
|
|
|
0
|
|
|
my $is_desperate_mode = shift; |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
# stat all devices. |
276
|
0
|
|
|
|
|
|
foreach my $dfid (@dfids) { |
277
|
0
|
|
|
|
|
|
my $dev = $dfid->device; |
278
|
0
|
0
|
|
|
|
|
next if $already_checked{$dev->id}++; |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
# Got a dead link, but reaper hasn't cleared it yet? |
281
|
0
|
0
|
|
|
|
|
if ($dev->dstate->is_perm_dead) { |
282
|
0
|
|
|
|
|
|
push @bad_devs, $dev; |
283
|
0
|
|
|
|
|
|
next; |
284
|
|
|
|
|
|
|
} |
285
|
|
|
|
|
|
|
|
286
|
0
|
|
|
|
|
|
my $disk_size = $dfid->size_on_disk; |
287
|
0
|
0
|
|
|
|
|
die "dev " . $dev->id . " unreachable" unless defined $disk_size; |
288
|
|
|
|
|
|
|
|
289
|
0
|
0
|
|
|
|
|
if ($disk_size == $fid->length) { |
290
|
0
|
0
|
0
|
|
|
|
if ($alg && $alg ne "off") { |
291
|
0
|
|
|
|
|
|
my $digest = $self->checksum_on_disk($dfid, $alg, $ping_cb); |
292
|
0
|
0
|
|
|
|
|
unless (defined $digest) { |
293
|
0
|
|
|
|
|
|
die "dev " . $dev->id . " unreachable"; |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
# DELETE could've hit right after size check |
297
|
0
|
0
|
|
|
|
|
if ($digest eq "-1") { |
298
|
0
|
0
|
|
|
|
|
unless ($is_desperate_mode) { |
299
|
0
|
|
|
|
|
|
$fid->fsck_log(EV_FILE_MISSING, $dev); |
300
|
|
|
|
|
|
|
} |
301
|
0
|
|
|
|
|
|
push @bad_devs, $dfid->device; |
302
|
0
|
|
|
|
|
|
next; |
303
|
|
|
|
|
|
|
} |
304
|
0
|
|
0
|
|
|
|
push @{$checksums->{$digest} ||= []}, $dfid->device; |
|
0
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
|
307
|
0
|
|
|
|
|
|
push @good_devs, $dfid->device; |
308
|
|
|
|
|
|
|
# if we were doing a desperate search, one is enough, we can stop now! |
309
|
0
|
0
|
|
|
|
|
return if $is_desperate_mode; |
310
|
0
|
|
|
|
|
|
next; |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
# don't log in desperate mode, as we'd have "file missing!" log entries |
314
|
|
|
|
|
|
|
# for every device in the normal case, which is expected. |
315
|
0
|
0
|
|
|
|
|
unless ($is_desperate_mode) { |
316
|
0
|
0
|
|
|
|
|
if ($disk_size == -1) { |
317
|
0
|
|
|
|
|
|
$fid->fsck_log(EV_FILE_MISSING, $dev); |
318
|
|
|
|
|
|
|
} else { |
319
|
0
|
|
|
|
|
|
$fid->fsck_log(EV_BAD_LENGTH, $dev); |
320
|
|
|
|
|
|
|
} |
321
|
|
|
|
|
|
|
} |
322
|
|
|
|
|
|
|
|
323
|
0
|
|
|
|
|
|
push @bad_devs, $dfid->device; |
324
|
|
|
|
|
|
|
} |
325
|
0
|
|
|
|
|
|
}; |
326
|
|
|
|
|
|
|
|
327
|
0
|
|
|
|
|
|
$check_dfids->(); |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
# if we didn't find it anywhere, let's go do an exhaustive search over |
330
|
|
|
|
|
|
|
# all devices, looking for it... |
331
|
0
|
0
|
|
|
|
|
unless (@good_devs) { |
332
|
|
|
|
|
|
|
# replace @dfids with list of all (alive) devices. dups will be ignored by |
333
|
|
|
|
|
|
|
# check_dfids |
334
|
0
|
|
|
|
|
|
$fid->fsck_log(EV_START_SEARCH); |
335
|
|
|
|
|
|
|
@dfids = List::Util::shuffle( |
336
|
0
|
|
|
|
|
|
map { MogileFS::DevFID->new($_, $fid) } |
337
|
0
|
|
|
|
|
|
grep { $_->dstate->should_fsck_search_on } |
|
0
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
Mgd::device_factory()->get_all |
339
|
|
|
|
|
|
|
); |
340
|
0
|
|
|
|
|
|
$check_dfids->("desperate"); |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
# still can't fix it? |
343
|
0
|
0
|
|
|
|
|
unless (@good_devs) { |
344
|
0
|
|
|
|
|
|
$self->forget_bad_devs($fid, @bad_devs); |
345
|
0
|
|
|
|
|
|
$fid->update_devcount; |
346
|
0
|
|
|
|
|
|
return CANT_FIX; |
347
|
|
|
|
|
|
|
} |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
# wow, we actually found it! |
350
|
0
|
|
|
|
|
|
$fid->note_on_device($good_devs[0]); # at least one good one. |
351
|
0
|
|
|
|
|
|
$fid->fsck_log(EV_FOUND_FID); |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
# fall through to check policy (which will most likely be |
354
|
|
|
|
|
|
|
# wrong, with only one file_on record...) and re-replicate |
355
|
|
|
|
|
|
|
} |
356
|
|
|
|
|
|
|
|
357
|
0
|
|
|
|
|
|
$self->forget_bad_devs($fid, @bad_devs); |
358
|
|
|
|
|
|
|
# in case the devcount or similar was fixed. |
359
|
0
|
|
|
|
|
|
$fid->want_reload; |
360
|
|
|
|
|
|
|
|
361
|
0
|
0
|
0
|
|
|
|
$self->fix_checksums($fid, $alg, $checksums) if $alg && $alg ne "off"; |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
# Note: this will reload devids, if they called 'note_on_device' |
364
|
|
|
|
|
|
|
# or 'forget_about_device' |
365
|
0
|
0
|
|
|
|
|
unless ($fid->devids_meet_policy) { |
366
|
0
|
|
|
|
|
|
$fid->enqueue_for_replication(in => 1); |
367
|
0
|
|
|
|
|
|
$fid->fsck_log(EV_RE_REPLICATE); |
368
|
0
|
|
|
|
|
|
return HANDLED; |
369
|
|
|
|
|
|
|
} |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
# Clean up the device count if it's wrong |
372
|
0
|
|
|
|
|
|
$self->maybe_fix_devcount($fid); |
373
|
|
|
|
|
|
|
|
374
|
0
|
|
|
|
|
|
return HANDLED; |
375
|
|
|
|
|
|
|
} |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
sub forget_file_on_with_bad_checksums { |
378
|
0
|
|
|
0
|
0
|
|
my ($self, $fid, $checksums) = @_; |
379
|
0
|
|
|
|
|
|
foreach my $bdevs (values %$checksums) { |
380
|
0
|
|
|
|
|
|
foreach my $bdev (@$bdevs) { |
381
|
0
|
|
|
|
|
|
error("removing file_on mapping for fid=" . $fid->id . ", dev=" . $bdev->id); |
382
|
0
|
|
|
|
|
|
$fid->forget_about_device($bdev); |
383
|
|
|
|
|
|
|
} |
384
|
|
|
|
|
|
|
} |
385
|
|
|
|
|
|
|
} |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
# returns -1 on missing, |
388
|
|
|
|
|
|
|
# undef on connectivity error, |
389
|
|
|
|
|
|
|
# else checksum of file on disk (after HTTP GET or mogstored read) |
390
|
|
|
|
|
|
|
sub checksum_on_disk { |
391
|
0
|
|
|
0
|
0
|
|
my ($self, $dfid, $alg, $ping_cb) = @_; |
392
|
0
|
|
|
|
|
|
return $dfid->checksum_on_disk($alg, $ping_cb, "fsck"); |
393
|
|
|
|
|
|
|
} |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
sub bad_checksums_errmsg { |
396
|
0
|
|
|
0
|
0
|
|
my ($self, $alg, $checksums) = @_; |
397
|
0
|
|
|
|
|
|
my @err; |
398
|
|
|
|
|
|
|
|
399
|
0
|
|
|
|
|
|
foreach my $checksum (keys %$checksums) { |
400
|
0
|
|
|
|
|
|
my $bdevs = join(",", map { $_->id } @{$checksums->{$checksum}}); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
401
|
0
|
|
|
|
|
|
$checksum = unpack("H*", $checksum); |
402
|
0
|
|
|
|
|
|
push @err, "$alg:$checksum on devids=[$bdevs]" |
403
|
|
|
|
|
|
|
} |
404
|
|
|
|
|
|
|
|
405
|
0
|
|
|
|
|
|
return join('; ', @err); |
406
|
|
|
|
|
|
|
} |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
# we don't now what checksum the file is supposed to be, but some |
409
|
|
|
|
|
|
|
# of the devices had checksums that didn't match the other(s). |
410
|
|
|
|
|
|
|
sub auto_checksums_bad { |
411
|
0
|
|
|
0
|
0
|
|
my ($self, $fid, $checksums) = @_; |
412
|
0
|
|
|
|
|
|
my $alg = $self->{opt_checksum}; |
413
|
0
|
|
|
|
|
|
my $err = $self->bad_checksums_errmsg($alg, $checksums); |
414
|
|
|
|
|
|
|
|
415
|
0
|
|
|
|
|
|
error("$fid has multiple checksums: $err"); |
416
|
0
|
|
|
|
|
|
$fid->fsck_log(EV_MULTI_CHECKSUM); |
417
|
|
|
|
|
|
|
} |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
sub all_checksums_bad { |
420
|
0
|
|
|
0
|
0
|
|
my ($self, $fid, $checksums) = @_; |
421
|
0
|
0
|
|
|
|
|
my $alg = $fid->class->hashname or return; # class could've changed |
422
|
0
|
|
|
|
|
|
my $cur_checksum = $fid->checksum; |
423
|
0
|
|
|
|
|
|
my $err = $self->bad_checksums_errmsg($alg, $checksums); |
424
|
0
|
0
|
|
|
|
|
my $cur = $cur_checksum ? "Expected: $cur_checksum" |
425
|
|
|
|
|
|
|
: "No known valid checksum"; |
426
|
0
|
|
|
|
|
|
error("all checksums bad: $err. $cur"); |
427
|
0
|
|
|
|
|
|
$fid->fsck_log(EV_BAD_CHECKSUM); |
428
|
|
|
|
|
|
|
} |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
sub fix_checksums { |
431
|
0
|
|
|
0
|
0
|
|
my ($self, $fid, $alg, $checksums) = @_; |
432
|
0
|
|
|
|
|
|
my $cur_checksum = $fid->checksum; |
433
|
0
|
|
|
|
|
|
my @all_checksums = keys(%$checksums); |
434
|
|
|
|
|
|
|
|
435
|
0
|
0
|
|
|
|
|
if (scalar(@all_checksums) == 1) { # all checksums match, good! |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
436
|
0
|
|
|
|
|
|
my $disk_checksum = $all_checksums[0]; |
437
|
0
|
0
|
|
|
|
|
if ($cur_checksum) { |
438
|
0
|
0
|
|
|
|
|
if ($cur_checksum->{checksum} ne $disk_checksum) { |
439
|
0
|
|
|
|
|
|
my $expect = $cur_checksum->info; |
440
|
0
|
|
|
|
|
|
my $actual = "$alg:" . unpack("H*", $disk_checksum); |
441
|
0
|
|
|
|
|
|
error("$cur_checksum does not match disk: $actual"); |
442
|
0
|
0
|
|
|
|
|
if ($alg ne $cur_checksum->hashname) { |
443
|
0
|
|
|
|
|
|
$fid->fsck_log(EV_BAD_HASHTYPE); |
444
|
|
|
|
|
|
|
} else { |
445
|
0
|
|
|
|
|
|
$fid->fsck_log(EV_BAD_CHECKSUM); |
446
|
|
|
|
|
|
|
} |
447
|
|
|
|
|
|
|
} |
448
|
|
|
|
|
|
|
} else { # fresh row to checksum |
449
|
0
|
|
|
|
|
|
my $hashtype = $fid->class->hashtype; |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
# we store this in the database |
452
|
0
|
0
|
|
|
|
|
if ($hashtype) { |
453
|
0
|
|
|
|
|
|
my %row = ( |
454
|
|
|
|
|
|
|
fid => $fid->id, |
455
|
|
|
|
|
|
|
checksum => $disk_checksum, |
456
|
|
|
|
|
|
|
hashtype => $hashtype, |
457
|
|
|
|
|
|
|
); |
458
|
0
|
|
|
|
|
|
my $new_checksum = MogileFS::Checksum->new(\%row); |
459
|
0
|
|
|
|
|
|
debug("creating new checksum=$new_checksum"); |
460
|
0
|
|
|
|
|
|
$fid->fsck_log(EV_NO_CHECKSUM); |
461
|
0
|
|
|
|
|
|
$new_checksum->save; |
462
|
|
|
|
|
|
|
} else { |
463
|
0
|
|
|
|
|
|
my $hex_checksum = unpack("H*", $disk_checksum); |
464
|
0
|
|
|
|
|
|
my $alg = $self->{opt_checksum}; |
465
|
0
|
|
|
|
|
|
debug("fsck_checksum=auto good: $fid $alg:$hex_checksum"); |
466
|
|
|
|
|
|
|
} |
467
|
|
|
|
|
|
|
} |
468
|
|
|
|
|
|
|
} elsif ($cur_checksum) { |
469
|
0
|
|
|
|
|
|
my $good = delete($checksums->{$cur_checksum->{checksum}}); |
470
|
0
|
0
|
0
|
|
|
|
if ($good && (scalar(@$good) > 0)) { |
471
|
0
|
|
|
|
|
|
$self->forget_file_on_with_bad_checksums($fid, $checksums); |
472
|
|
|
|
|
|
|
# will fail $fid->devids_meet_policy and re-replicate |
473
|
|
|
|
|
|
|
} else { |
474
|
0
|
|
|
|
|
|
$self->all_checksums_bad($fid, $checksums); |
475
|
|
|
|
|
|
|
} |
476
|
|
|
|
|
|
|
} elsif ($self->{opt_checksum}) { |
477
|
0
|
|
|
|
|
|
$self->auto_checksums_bad($fid, $checksums); |
478
|
|
|
|
|
|
|
} else { |
479
|
0
|
|
|
|
|
|
$self->all_checksums_bad($fid, $checksums); |
480
|
|
|
|
|
|
|
} |
481
|
|
|
|
|
|
|
} |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
# remove the file_on mappings for devices that were bogus/missing. |
484
|
|
|
|
|
|
|
sub forget_bad_devs { |
485
|
0
|
|
|
0
|
0
|
|
my ($self, $fid, @bad_devs) = @_; |
486
|
0
|
|
|
|
|
|
foreach my $bdev (@bad_devs) { |
487
|
0
|
|
|
|
|
|
error("removing file_on mapping for fid=" . $fid->id . ", dev=" . $bdev->id); |
488
|
0
|
|
|
|
|
|
$fid->forget_about_device($bdev); |
489
|
|
|
|
|
|
|
} |
490
|
|
|
|
|
|
|
} |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
sub maybe_fix_devcount { |
493
|
|
|
|
|
|
|
# don't even log BCNT errors if skip_devcount is enabled |
494
|
0
|
0
|
|
0
|
0
|
|
return if MogileFS::Config->server_setting_cached('skip_devcount'); |
495
|
|
|
|
|
|
|
|
496
|
0
|
|
|
|
|
|
my ($self, $fid) = @_; |
497
|
0
|
0
|
|
|
|
|
return if scalar($fid->devids) == $fid->devcount; |
498
|
|
|
|
|
|
|
# log a bad count |
499
|
0
|
|
|
|
|
|
$fid->fsck_log(EV_BAD_COUNT); |
500
|
0
|
|
|
|
|
|
$fid->update_devcount(); |
501
|
|
|
|
|
|
|
} |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
1; |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
# Local Variables: |
506
|
|
|
|
|
|
|
# mode: perl |
507
|
|
|
|
|
|
|
# c-basic-indent: 4 |
508
|
|
|
|
|
|
|
# indent-tabs-mode: nil |
509
|
|
|
|
|
|
|
# End: |