line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package MogileFS::Worker::Fsck; |
2
|
|
|
|
|
|
|
|
3
|
21
|
|
|
21
|
|
168
|
use strict; |
|
21
|
|
|
|
|
46
|
|
|
21
|
|
|
|
|
1025
|
|
4
|
21
|
|
|
21
|
|
144
|
use base 'MogileFS::Worker'; |
|
21
|
|
|
|
|
46
|
|
|
21
|
|
|
|
|
3652
|
|
5
|
|
|
|
|
|
|
use fields ( |
6
|
21
|
|
|
|
|
202
|
'opt_nostat', # bool: do we trust mogstoreds? skipping size stats? |
7
|
|
|
|
|
|
|
'opt_checksum', # (class|off|MD5) checksum mode |
8
|
21
|
|
|
21
|
|
133
|
); |
|
21
|
|
|
|
|
52
|
|
9
|
21
|
|
|
21
|
|
1495
|
use MogileFS::Util qw(every error debug); |
|
21
|
|
|
|
|
43
|
|
|
21
|
|
|
|
|
1885
|
|
10
|
21
|
|
|
21
|
|
133
|
use MogileFS::Config; |
|
21
|
|
|
|
|
72
|
|
|
21
|
|
|
|
|
3976
|
|
11
|
21
|
|
|
21
|
|
141
|
use MogileFS::Server; |
|
21
|
|
|
|
|
62
|
|
|
21
|
|
|
|
|
514
|
|
12
|
21
|
|
|
21
|
|
138
|
use List::Util (); |
|
21
|
|
|
|
|
63
|
|
|
21
|
|
|
|
|
391
|
|
13
|
21
|
|
|
21
|
|
114
|
use Time::HiRes (); |
|
21
|
|
|
|
|
45
|
|
|
21
|
|
|
|
|
529
|
|
14
|
|
|
|
|
|
|
|
15
|
21
|
|
|
21
|
|
123
|
use constant SUCCESS => 0; |
|
21
|
|
|
|
|
47
|
|
|
21
|
|
|
|
|
1455
|
|
16
|
21
|
|
|
21
|
|
118
|
use constant TEMPORARY => 1; |
|
21
|
|
|
|
|
58
|
|
|
21
|
|
|
|
|
961
|
|
17
|
21
|
|
|
21
|
|
111
|
use constant PERMANENT => 2; |
|
21
|
|
|
|
|
45
|
|
|
21
|
|
|
|
|
978
|
|
18
|
21
|
|
|
21
|
|
119
|
use constant REPLICATE => 3; |
|
21
|
|
|
|
|
83
|
|
|
21
|
|
|
|
|
1145
|
|
19
|
|
|
|
|
|
|
|
20
|
21
|
|
|
21
|
|
208
|
use constant EV_NO_PATHS => "NOPA"; |
|
21
|
|
|
|
|
54
|
|
|
21
|
|
|
|
|
1195
|
|
21
|
21
|
|
|
21
|
|
123
|
use constant EV_POLICY_VIOLATION => "POVI"; |
|
21
|
|
|
|
|
41
|
|
|
21
|
|
|
|
|
944
|
|
22
|
21
|
|
|
21
|
|
128
|
use constant EV_FILE_MISSING => "MISS"; |
|
21
|
|
|
|
|
56
|
|
|
21
|
|
|
|
|
1021
|
|
23
|
21
|
|
|
21
|
|
120
|
use constant EV_BAD_LENGTH => "BLEN"; |
|
21
|
|
|
|
|
1136
|
|
|
21
|
|
|
|
|
1002
|
|
24
|
21
|
|
|
21
|
|
126
|
use constant EV_CANT_FIX => "GONE"; |
|
21
|
|
|
|
|
41
|
|
|
21
|
|
|
|
|
917
|
|
25
|
21
|
|
|
21
|
|
110
|
use constant EV_START_SEARCH => "SRCH"; |
|
21
|
|
|
|
|
41
|
|
|
21
|
|
|
|
|
1000
|
|
26
|
21
|
|
|
21
|
|
116
|
use constant EV_FOUND_FID => "FOND"; |
|
21
|
|
|
|
|
42
|
|
|
21
|
|
|
|
|
934
|
|
27
|
21
|
|
|
21
|
|
114
|
use constant EV_RE_REPLICATE => "REPL"; |
|
21
|
|
|
|
|
51
|
|
|
21
|
|
|
|
|
1224
|
|
28
|
21
|
|
|
21
|
|
135
|
use constant EV_BAD_COUNT => "BCNT"; |
|
21
|
|
|
|
|
269
|
|
|
21
|
|
|
|
|
1069
|
|
29
|
21
|
|
|
21
|
|
126
|
use constant EV_BAD_CHECKSUM => "BSUM"; |
|
21
|
|
|
|
|
46
|
|
|
21
|
|
|
|
|
1253
|
|
30
|
21
|
|
|
21
|
|
141
|
use constant EV_NO_CHECKSUM => "NSUM"; |
|
21
|
|
|
|
|
55
|
|
|
21
|
|
|
|
|
953
|
|
31
|
21
|
|
|
21
|
|
115
|
use constant EV_MULTI_CHECKSUM => "MSUM"; |
|
21
|
|
|
|
|
49
|
|
|
21
|
|
|
|
|
1004
|
|
32
|
21
|
|
|
21
|
|
230
|
use constant EV_BAD_HASHTYPE => "BALG"; |
|
21
|
|
|
|
|
45
|
|
|
21
|
|
|
|
|
906
|
|
33
|
|
|
|
|
|
|
|
34
|
21
|
|
|
21
|
|
124
|
use POSIX (); |
|
21
|
|
|
|
|
42
|
|
|
21
|
|
|
|
|
12381
|
|
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
my $nowish; # approximate unixtime, updated once per loop. |
37
|
|
|
|
|
|
|
|
38
|
0
|
|
|
0
|
0
|
|
sub watchdog_timeout { 120 } |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
sub work { |
41
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
# this can be CPU-intensive. let's nice ourselves down. |
44
|
0
|
|
|
|
|
|
POSIX::nice(10); |
45
|
|
|
|
|
|
|
|
46
|
0
|
|
|
|
|
|
my $sto = Mgd::get_store(); |
47
|
0
|
|
|
|
|
|
my $max_checked = 0; |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
every(2.0, sub { |
50
|
0
|
|
|
0
|
|
|
my $sleep_set = shift; |
51
|
0
|
|
|
|
|
|
$nowish = time(); |
52
|
0
|
|
|
|
|
|
local $Mgd::nowish = $nowish; |
53
|
|
|
|
|
|
|
|
54
|
0
|
|
|
|
|
|
my $queue_todo = $self->queue_todo('fsck'); |
55
|
|
|
|
|
|
|
# This counts the same as a $self->still_alive; |
56
|
0
|
|
|
|
|
|
$self->send_to_parent('worker_bored 50 fsck'); |
57
|
0
|
0
|
|
|
|
|
return unless @{$queue_todo}; |
|
0
|
|
|
|
|
|
|
58
|
0
|
0
|
|
|
|
|
return unless $self->validate_dbh; |
59
|
|
|
|
|
|
|
|
60
|
0
|
|
|
|
|
|
my @fids = (); |
61
|
0
|
|
|
|
|
|
while (my $todo = shift @{$queue_todo}) { |
|
0
|
|
|
|
|
|
|
62
|
0
|
|
|
|
|
|
my $fid = MogileFS::FID->new($todo->{fid}); |
63
|
0
|
0
|
|
|
|
|
if ($fid->exists) { |
64
|
0
|
|
|
|
|
|
push(@fids, $fid); |
65
|
|
|
|
|
|
|
} else { |
66
|
|
|
|
|
|
|
# FID stopped existing before being checked. |
67
|
0
|
|
|
|
|
|
$sto->delete_fid_from_file_to_queue($fid->id, FSCK_QUEUE); |
68
|
|
|
|
|
|
|
} |
69
|
|
|
|
|
|
|
} |
70
|
0
|
0
|
|
|
|
|
return unless @fids; |
71
|
|
|
|
|
|
|
|
72
|
0
|
|
0
|
|
|
|
$self->{opt_nostat} = MogileFS::Config->server_setting('fsck_opt_policy_only') || 0; |
73
|
0
|
|
|
|
|
|
my $alg = MogileFS::Config->server_setting_cached("fsck_checksum"); |
74
|
0
|
0
|
0
|
|
|
|
if (defined($alg) && $alg eq "off") { |
75
|
0
|
|
|
|
|
|
$self->{opt_checksum} = "off"; |
76
|
|
|
|
|
|
|
} else { |
77
|
0
|
0
|
|
|
|
|
$self->{opt_checksum} = MogileFS::Checksum->valid_alg($alg) ? $alg : 0; |
78
|
|
|
|
|
|
|
} |
79
|
0
|
|
|
|
|
|
MogileFS::FID->mass_load_devids(@fids); |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
# don't sleep in loop, next round, since we found stuff to work on |
82
|
|
|
|
|
|
|
# this round... |
83
|
0
|
|
|
|
|
|
$sleep_set->(0); |
84
|
|
|
|
|
|
|
|
85
|
0
|
|
|
|
|
|
my $new_max; |
86
|
0
|
|
|
|
|
|
my $hit_problem = 0; |
87
|
|
|
|
|
|
|
|
88
|
0
|
|
|
|
|
|
foreach my $fid (@fids) { |
89
|
0
|
0
|
|
|
|
|
if (!$self->check_fid($fid)) { |
90
|
|
|
|
|
|
|
# some connectivity problem... retry this fid later. |
91
|
|
|
|
|
|
|
# (don't dequeue it) |
92
|
0
|
|
|
|
|
|
$self->still_alive; |
93
|
0
|
|
|
|
|
|
next; |
94
|
|
|
|
|
|
|
} |
95
|
0
|
|
|
|
|
|
$sto->delete_fid_from_file_to_queue($fid->id, FSCK_QUEUE); |
96
|
|
|
|
|
|
|
} |
97
|
0
|
|
|
|
|
|
}); |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
# given a $fid (MogileFS::FID, with pre-populated ->devids data) |
101
|
|
|
|
|
|
|
# return 0 if reachability problems. |
102
|
|
|
|
|
|
|
# return 1 if fid was checked (regardless of there being problems or not) |
103
|
|
|
|
|
|
|
# if no problems, no action. |
104
|
|
|
|
|
|
|
# if problems, log & enqueue fixes |
105
|
21
|
|
|
21
|
|
134
|
use constant STALLED => 0; |
|
21
|
|
|
|
|
45
|
|
|
21
|
|
|
|
|
1091
|
|
106
|
21
|
|
|
21
|
|
120
|
use constant HANDLED => 1; |
|
21
|
|
|
|
|
52
|
|
|
21
|
|
|
|
|
19390
|
|
107
|
|
|
|
|
|
|
sub check_fid { |
108
|
0
|
|
|
0
|
0
|
|
my ($self, $fid) = @_; |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
my $fix = sub { |
111
|
|
|
|
|
|
|
# we cached devids without locking for the fast path, |
112
|
|
|
|
|
|
|
# ensure we get an up-to-date list in the slow path. |
113
|
0
|
|
|
0
|
|
|
$fid->forget_cached_devids; |
114
|
|
|
|
|
|
|
|
115
|
0
|
|
|
|
|
|
my $sto = Mgd::get_store(); |
116
|
0
|
0
|
|
|
|
|
unless ($sto->should_begin_replicating_fidid($fid->id)) { |
117
|
0
|
|
|
|
|
|
error("Fsck stalled for fid $fid: failed to acquire lock"); |
118
|
0
|
|
|
|
|
|
return STALLED; |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
|
121
|
0
|
0
|
|
|
|
|
unless ($fid->exists) { |
122
|
|
|
|
|
|
|
# FID stopped existing while doing (or waiting on) |
123
|
|
|
|
|
|
|
# the fast check, give up on this fid |
124
|
0
|
|
|
|
|
|
$sto->note_done_replicating($fid->id); |
125
|
0
|
|
|
|
|
|
return HANDLED; |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
|
128
|
0
|
|
|
|
|
|
my $fixed = eval { $self->fix_fid($fid) }; |
|
0
|
|
|
|
|
|
|
129
|
0
|
|
|
|
|
|
my $err = $@; |
130
|
0
|
|
|
|
|
|
$sto->note_done_replicating($fid->id); |
131
|
0
|
0
|
|
|
|
|
if (! defined $fixed) { |
132
|
0
|
|
|
|
|
|
error("Fsck stalled for fid $fid: $err"); |
133
|
0
|
|
|
|
|
|
return STALLED; |
134
|
|
|
|
|
|
|
} |
135
|
0
|
0
|
|
|
|
|
$fid->fsck_log(EV_CANT_FIX) if ! $fixed; |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
# that might've all taken awhile, let's update our approximate time |
138
|
0
|
|
|
|
|
|
$nowish = $self->still_alive; |
139
|
0
|
|
|
|
|
|
return HANDLED; |
140
|
0
|
|
|
|
|
|
}; |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
# first obvious fucked-up case: no devids even presumed to exist. |
143
|
0
|
0
|
|
|
|
|
unless ($fid->devids) { |
144
|
|
|
|
|
|
|
# first, log this weird condition. |
145
|
0
|
|
|
|
|
|
$fid->fsck_log(EV_NO_PATHS); |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
# weird, schedule a fix (which will do a search over all |
148
|
|
|
|
|
|
|
# devices as a last-ditch effort to locate it) |
149
|
0
|
|
|
|
|
|
return $fix->(); |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
# first, see if the assumed devids meet the replication policy for |
153
|
|
|
|
|
|
|
# the fid's class. |
154
|
0
|
0
|
|
|
|
|
unless ($fid->devids_meet_policy) { |
155
|
|
|
|
|
|
|
# log a policy violation |
156
|
0
|
|
|
|
|
|
$fid->fsck_log(EV_POLICY_VIOLATION); |
157
|
0
|
|
|
|
|
|
return $fix->(); |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
# This is a simple fixup case |
161
|
|
|
|
|
|
|
# If we got here, we already know we have no policy violation and |
162
|
|
|
|
|
|
|
# don't need to call $fix->() to just fix a devcount |
163
|
0
|
|
|
|
|
|
$self->maybe_fix_devcount($fid); |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
# missing checksum row |
166
|
0
|
0
|
0
|
|
|
|
if ($fid->class->hashtype && ! $fid->checksum) { |
167
|
0
|
|
|
|
|
|
return $fix->(); |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
# in the fast case, do nothing else (don't check if assumed file |
171
|
|
|
|
|
|
|
# locations are actually there). in the fast case, all we do is |
172
|
|
|
|
|
|
|
# check the replication policy, which is already done, so finish. |
173
|
0
|
0
|
|
|
|
|
return HANDLED if $self->{opt_nostat}; |
174
|
|
|
|
|
|
|
|
175
|
0
|
0
|
0
|
|
|
|
if ($self->{opt_checksum} && $self->{opt_checksum} ne "off") { |
176
|
0
|
|
|
|
|
|
return $fix->(); |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
# stat each device to see if it's still there. on first problem, |
180
|
|
|
|
|
|
|
# stop and go into the slow(er) fix function. |
181
|
0
|
|
|
|
|
|
my $err; |
182
|
|
|
|
|
|
|
my $rv = $self->parallel_check_sizes([ $fid->devfids ], sub { |
183
|
0
|
|
|
0
|
|
|
my ($dfid, $disk_size) = @_; |
184
|
0
|
0
|
|
|
|
|
if (! defined $disk_size) { |
185
|
0
|
|
|
|
|
|
my $dev = $dfid->device; |
186
|
|
|
|
|
|
|
# We end up checking is_perm_dead twice, but that's the way the |
187
|
|
|
|
|
|
|
# flow goes... |
188
|
0
|
0
|
|
|
|
|
if ($dev->dstate->is_perm_dead) { |
189
|
0
|
|
|
|
|
|
$err = "needfix"; |
190
|
0
|
|
|
|
|
|
return 0; |
191
|
|
|
|
|
|
|
} |
192
|
0
|
|
|
|
|
|
error("Connectivity problem reaching device " . $dev->id . " on host " . $dev->host->ip . "\n"); |
193
|
0
|
|
|
|
|
|
$err = "stalled"; |
194
|
0
|
|
|
|
|
|
return 0; |
195
|
|
|
|
|
|
|
} |
196
|
0
|
0
|
|
|
|
|
return 1 if $disk_size == $fid->length; |
197
|
0
|
|
|
|
|
|
$err = "needfix"; |
198
|
|
|
|
|
|
|
# Note: not doing fsck_log, as fix_fid will log status for each device. |
199
|
0
|
|
|
|
|
|
return 0; |
200
|
0
|
|
|
|
|
|
}); |
201
|
|
|
|
|
|
|
|
202
|
0
|
0
|
|
|
|
|
if ($rv) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
203
|
0
|
0
|
0
|
|
|
|
return ($fid->class->hashtype && !($self->{opt_checksum} && $self->{opt_checksum} eq "off")) |
204
|
|
|
|
|
|
|
? $fix->() : HANDLED; |
205
|
|
|
|
|
|
|
} elsif ($err eq "stalled") { |
206
|
0
|
|
|
|
|
|
return STALLED; |
207
|
|
|
|
|
|
|
} elsif ($err eq "needfix") { |
208
|
0
|
|
|
|
|
|
return $fix->(); |
209
|
|
|
|
|
|
|
} else { |
210
|
0
|
|
|
|
|
|
die "Unknown error checking fid sizes in parallel.\n"; |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
# returns true if all size checks succeeded, false otherwise |
215
|
|
|
|
|
|
|
sub parallel_check_sizes { |
216
|
0
|
|
|
0
|
0
|
|
my ($self, $dflist, $cb) = @_; |
217
|
0
|
|
|
|
|
|
my $expect = scalar @$dflist; |
218
|
0
|
|
|
|
|
|
my ($good, $done) = (0, 0); |
219
|
|
|
|
|
|
|
|
220
|
0
|
|
|
|
|
|
foreach my $df (@$dflist) { |
221
|
|
|
|
|
|
|
$df->size_on_disk(sub { |
222
|
0
|
|
|
0
|
|
|
my ($size) = @_; |
223
|
0
|
|
|
|
|
|
$done++; |
224
|
0
|
0
|
|
|
|
|
$good++ if $cb->($df, $size); |
225
|
0
|
|
|
|
|
|
}); |
226
|
|
|
|
|
|
|
} |
227
|
|
|
|
|
|
|
|
228
|
0
|
|
|
0
|
|
|
Danga::Socket->SetPostLoopCallback(sub { $done != $expect }); |
|
0
|
|
|
|
|
|
|
229
|
0
|
|
|
|
|
|
Danga::Socket->EventLoop; |
230
|
|
|
|
|
|
|
|
231
|
0
|
|
|
|
|
|
return $good == $expect; |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
# this is the slow path. if something above in check_fid finds |
235
|
|
|
|
|
|
|
# something amiss in any way, we went the slow path on a fid and try |
236
|
|
|
|
|
|
|
# really hard to fix the situation. |
237
|
|
|
|
|
|
|
# |
238
|
|
|
|
|
|
|
# return true if situation handled, 0 if nothing could be done. |
239
|
|
|
|
|
|
|
# die on errors (like connectivity problems). |
240
|
21
|
|
|
21
|
|
132
|
use constant CANT_FIX => 0; |
|
21
|
|
|
|
|
45
|
|
|
21
|
|
|
|
|
61596
|
|
241
|
|
|
|
|
|
|
sub fix_fid { |
242
|
0
|
|
|
0
|
0
|
|
my ($self, $fid) = @_; |
243
|
0
|
|
|
|
|
|
debug(sprintf("Fixing FID %d", $fid->id)); |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
# make devfid objects from the devids that this fid is on, |
246
|
0
|
|
|
|
|
|
my @dfids = map { MogileFS::DevFID->new($_, $fid) } $fid->devids; |
|
0
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
# track all known good copies (dev objects), as well as all bad |
249
|
|
|
|
|
|
|
# copies (places it should've been, but isn't) |
250
|
0
|
|
|
|
|
|
my @good_devs; |
251
|
|
|
|
|
|
|
my @bad_devs; |
252
|
0
|
|
|
|
|
|
my %already_checked; # devid -> 1. |
253
|
0
|
|
0
|
|
|
|
my $alg = $fid->class->hashname || $self->{opt_checksum}; |
254
|
0
|
|
|
|
|
|
my $checksums = {}; |
255
|
0
|
|
|
0
|
|
|
my $ping_cb = sub { $self->still_alive }; |
|
0
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
my $check_dfids = sub { |
258
|
0
|
|
|
0
|
|
|
my $is_desperate_mode = shift; |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
# stat all devices. |
261
|
0
|
|
|
|
|
|
foreach my $dfid (@dfids) { |
262
|
0
|
|
|
|
|
|
my $dev = $dfid->device; |
263
|
0
|
0
|
|
|
|
|
next if $already_checked{$dev->id}++; |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
# Got a dead link, but reaper hasn't cleared it yet? |
266
|
0
|
0
|
|
|
|
|
if ($dev->dstate->is_perm_dead) { |
267
|
0
|
|
|
|
|
|
push @bad_devs, $dev; |
268
|
0
|
|
|
|
|
|
next; |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
|
271
|
0
|
|
|
|
|
|
my $disk_size = $dfid->size_on_disk; |
272
|
0
|
0
|
|
|
|
|
die "dev " . $dev->id . " unreachable" unless defined $disk_size; |
273
|
|
|
|
|
|
|
|
274
|
0
|
0
|
|
|
|
|
if ($disk_size == $fid->length) { |
275
|
0
|
0
|
0
|
|
|
|
if ($alg && $alg ne "off") { |
276
|
0
|
|
|
|
|
|
my $digest = $self->checksum_on_disk($dfid, $alg, $ping_cb); |
277
|
0
|
0
|
|
|
|
|
unless (defined $digest) { |
278
|
0
|
|
|
|
|
|
die "dev " . $dev->id . " unreachable"; |
279
|
|
|
|
|
|
|
} |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
# DELETE could've hit right after size check |
282
|
0
|
0
|
|
|
|
|
if ($digest eq "-1") { |
283
|
0
|
0
|
|
|
|
|
unless ($is_desperate_mode) { |
284
|
0
|
|
|
|
|
|
$fid->fsck_log(EV_FILE_MISSING, $dev); |
285
|
|
|
|
|
|
|
} |
286
|
0
|
|
|
|
|
|
push @bad_devs, $dfid->device; |
287
|
0
|
|
|
|
|
|
next; |
288
|
|
|
|
|
|
|
} |
289
|
0
|
|
0
|
|
|
|
push @{$checksums->{$digest} ||= []}, $dfid->device; |
|
0
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
} |
291
|
|
|
|
|
|
|
|
292
|
0
|
|
|
|
|
|
push @good_devs, $dfid->device; |
293
|
|
|
|
|
|
|
# if we were doing a desperate search, one is enough, we can stop now! |
294
|
0
|
0
|
|
|
|
|
return if $is_desperate_mode; |
295
|
0
|
|
|
|
|
|
next; |
296
|
|
|
|
|
|
|
} |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
# don't log in desperate mode, as we'd have "file missing!" log entries |
299
|
|
|
|
|
|
|
# for every device in the normal case, which is expected. |
300
|
0
|
0
|
|
|
|
|
unless ($is_desperate_mode) { |
301
|
0
|
0
|
|
|
|
|
if ($disk_size == -1) { |
302
|
0
|
|
|
|
|
|
$fid->fsck_log(EV_FILE_MISSING, $dev); |
303
|
|
|
|
|
|
|
} else { |
304
|
0
|
|
|
|
|
|
$fid->fsck_log(EV_BAD_LENGTH, $dev); |
305
|
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
} |
307
|
|
|
|
|
|
|
|
308
|
0
|
|
|
|
|
|
push @bad_devs, $dfid->device; |
309
|
|
|
|
|
|
|
} |
310
|
0
|
|
|
|
|
|
}; |
311
|
|
|
|
|
|
|
|
312
|
0
|
|
|
|
|
|
$check_dfids->(); |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
# if we didn't find it anywhere, let's go do an exhaustive search over |
315
|
|
|
|
|
|
|
# all devices, looking for it... |
316
|
0
|
0
|
|
|
|
|
unless (@good_devs) { |
317
|
|
|
|
|
|
|
# replace @dfids with list of all (alive) devices. dups will be ignored by |
318
|
|
|
|
|
|
|
# check_dfids |
319
|
0
|
|
|
|
|
|
$fid->fsck_log(EV_START_SEARCH); |
320
|
0
|
|
|
|
|
|
@dfids = List::Util::shuffle( |
321
|
0
|
|
|
|
|
|
map { MogileFS::DevFID->new($_, $fid) } |
322
|
0
|
|
|
|
|
|
grep { $_->dstate->should_fsck_search_on } |
323
|
|
|
|
|
|
|
Mgd::device_factory()->get_all |
324
|
|
|
|
|
|
|
); |
325
|
0
|
|
|
|
|
|
$check_dfids->("desperate"); |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
# still can't fix it? |
328
|
0
|
0
|
|
|
|
|
unless (@good_devs) { |
329
|
0
|
|
|
|
|
|
$self->forget_bad_devs($fid, @bad_devs); |
330
|
0
|
|
|
|
|
|
$fid->update_devcount; |
331
|
0
|
|
|
|
|
|
return CANT_FIX; |
332
|
|
|
|
|
|
|
} |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
# wow, we actually found it! |
335
|
0
|
|
|
|
|
|
$fid->fsck_log(EV_FOUND_FID); |
336
|
0
|
|
|
|
|
|
$fid->note_on_device($good_devs[0]); # at least one good one. |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
# fall through to check policy (which will most likely be |
339
|
|
|
|
|
|
|
# wrong, with only one file_on record...) and re-replicate |
340
|
|
|
|
|
|
|
} |
341
|
|
|
|
|
|
|
|
342
|
0
|
|
|
|
|
|
$self->forget_bad_devs($fid, @bad_devs); |
343
|
|
|
|
|
|
|
# in case the devcount or similar was fixed. |
344
|
0
|
|
|
|
|
|
$fid->want_reload; |
345
|
|
|
|
|
|
|
|
346
|
0
|
0
|
0
|
|
|
|
$self->fix_checksums($fid, $alg, $checksums) if $alg && $alg ne "off"; |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
# Note: this will reload devids, if they called 'note_on_device' |
349
|
|
|
|
|
|
|
# or 'forget_about_device' |
350
|
0
|
0
|
|
|
|
|
unless ($fid->devids_meet_policy) { |
351
|
0
|
|
|
|
|
|
$fid->enqueue_for_replication(in => 1); |
352
|
0
|
|
|
|
|
|
$fid->fsck_log(EV_RE_REPLICATE); |
353
|
0
|
|
|
|
|
|
return HANDLED; |
354
|
|
|
|
|
|
|
} |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
# Clean up the device count if it's wrong |
357
|
0
|
|
|
|
|
|
$self->maybe_fix_devcount($fid); |
358
|
|
|
|
|
|
|
|
359
|
0
|
|
|
|
|
|
return HANDLED; |
360
|
|
|
|
|
|
|
} |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
sub forget_file_on_with_bad_checksums { |
363
|
0
|
|
|
0
|
0
|
|
my ($self, $fid, $checksums) = @_; |
364
|
0
|
|
|
|
|
|
foreach my $bdevs (values %$checksums) { |
365
|
0
|
|
|
|
|
|
foreach my $bdev (@$bdevs) { |
366
|
0
|
|
|
|
|
|
error("removing file_on mapping for fid=" . $fid->id . ", dev=" . $bdev->id); |
367
|
0
|
|
|
|
|
|
$fid->forget_about_device($bdev); |
368
|
|
|
|
|
|
|
} |
369
|
|
|
|
|
|
|
} |
370
|
|
|
|
|
|
|
} |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
# returns -1 on missing, |
373
|
|
|
|
|
|
|
# undef on connectivity error, |
374
|
|
|
|
|
|
|
# else checksum of file on disk (after HTTP GET or mogstored read) |
375
|
|
|
|
|
|
|
sub checksum_on_disk { |
376
|
0
|
|
|
0
|
0
|
|
my ($self, $dfid, $alg, $ping_cb) = @_; |
377
|
0
|
|
|
|
|
|
return $dfid->checksum_on_disk($alg, $ping_cb, "fsck"); |
378
|
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
sub bad_checksums_errmsg { |
381
|
0
|
|
|
0
|
0
|
|
my ($self, $alg, $checksums) = @_; |
382
|
0
|
|
|
|
|
|
my @err; |
383
|
|
|
|
|
|
|
|
384
|
0
|
|
|
|
|
|
foreach my $checksum (keys %$checksums) { |
385
|
0
|
|
|
|
|
|
my $bdevs = join(",", map { $_->id } @{$checksums->{$checksum}}); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
386
|
0
|
|
|
|
|
|
$checksum = unpack("H*", $checksum); |
387
|
0
|
|
|
|
|
|
push @err, "$alg:$checksum on devids=[$bdevs]" |
388
|
|
|
|
|
|
|
} |
389
|
|
|
|
|
|
|
|
390
|
0
|
|
|
|
|
|
return join('; ', @err); |
391
|
|
|
|
|
|
|
} |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
# we don't now what checksum the file is supposed to be, but some |
394
|
|
|
|
|
|
|
# of the devices had checksums that didn't match the other(s). |
395
|
|
|
|
|
|
|
sub auto_checksums_bad { |
396
|
0
|
|
|
0
|
0
|
|
my ($self, $fid, $checksums) = @_; |
397
|
0
|
|
|
|
|
|
my $alg = $self->{opt_checksum}; |
398
|
0
|
|
|
|
|
|
my $err = $self->bad_checksums_errmsg($alg, $checksums); |
399
|
|
|
|
|
|
|
|
400
|
0
|
|
|
|
|
|
error("$fid has multiple checksums: $err"); |
401
|
0
|
|
|
|
|
|
$fid->fsck_log(EV_MULTI_CHECKSUM); |
402
|
|
|
|
|
|
|
} |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
sub all_checksums_bad { |
405
|
0
|
|
|
0
|
0
|
|
my ($self, $fid, $checksums) = @_; |
406
|
0
|
0
|
|
|
|
|
my $alg = $fid->class->hashname or return; # class could've changed |
407
|
0
|
|
|
|
|
|
my $cur_checksum = $fid->checksum; |
408
|
0
|
|
|
|
|
|
my $err = $self->bad_checksums_errmsg($alg, $checksums); |
409
|
0
|
0
|
|
|
|
|
my $cur = $cur_checksum ? "Expected: $cur_checksum" |
410
|
|
|
|
|
|
|
: "No known valid checksum"; |
411
|
0
|
|
|
|
|
|
error("all checksums bad: $err. $cur"); |
412
|
0
|
|
|
|
|
|
$fid->fsck_log(EV_BAD_CHECKSUM); |
413
|
|
|
|
|
|
|
} |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
sub fix_checksums { |
416
|
0
|
|
|
0
|
0
|
|
my ($self, $fid, $alg, $checksums) = @_; |
417
|
0
|
|
|
|
|
|
my $cur_checksum = $fid->checksum; |
418
|
0
|
|
|
|
|
|
my @all_checksums = keys(%$checksums); |
419
|
|
|
|
|
|
|
|
420
|
0
|
0
|
|
|
|
|
if (scalar(@all_checksums) == 1) { # all checksums match, good! |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
421
|
0
|
|
|
|
|
|
my $disk_checksum = $all_checksums[0]; |
422
|
0
|
0
|
|
|
|
|
if ($cur_checksum) { |
423
|
0
|
0
|
|
|
|
|
if ($cur_checksum->{checksum} ne $disk_checksum) { |
424
|
0
|
|
|
|
|
|
my $expect = $cur_checksum->info; |
425
|
0
|
|
|
|
|
|
my $actual = "$alg:" . unpack("H*", $disk_checksum); |
426
|
0
|
|
|
|
|
|
error("$cur_checksum does not match disk: $actual"); |
427
|
0
|
0
|
|
|
|
|
if ($alg ne $cur_checksum->hashname) { |
428
|
0
|
|
|
|
|
|
$fid->fsck_log(EV_BAD_HASHTYPE); |
429
|
|
|
|
|
|
|
} else { |
430
|
0
|
|
|
|
|
|
$fid->fsck_log(EV_BAD_CHECKSUM); |
431
|
|
|
|
|
|
|
} |
432
|
|
|
|
|
|
|
} |
433
|
|
|
|
|
|
|
} else { # fresh row to checksum |
434
|
0
|
|
|
|
|
|
my $hashtype = $fid->class->hashtype; |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
# we store this in the database |
437
|
0
|
0
|
|
|
|
|
if ($hashtype) { |
438
|
0
|
|
|
|
|
|
my %row = ( |
439
|
|
|
|
|
|
|
fid => $fid->id, |
440
|
|
|
|
|
|
|
checksum => $disk_checksum, |
441
|
|
|
|
|
|
|
hashtype => $hashtype, |
442
|
|
|
|
|
|
|
); |
443
|
0
|
|
|
|
|
|
my $new_checksum = MogileFS::Checksum->new(\%row); |
444
|
0
|
|
|
|
|
|
debug("creating new checksum=$new_checksum"); |
445
|
0
|
|
|
|
|
|
$fid->fsck_log(EV_NO_CHECKSUM); |
446
|
0
|
|
|
|
|
|
$new_checksum->save; |
447
|
|
|
|
|
|
|
} else { |
448
|
0
|
|
|
|
|
|
my $hex_checksum = unpack("H*", $disk_checksum); |
449
|
0
|
|
|
|
|
|
my $alg = $self->{opt_checksum}; |
450
|
0
|
|
|
|
|
|
debug("fsck_checksum=auto good: $fid $alg:$hex_checksum"); |
451
|
|
|
|
|
|
|
} |
452
|
|
|
|
|
|
|
} |
453
|
|
|
|
|
|
|
} elsif ($cur_checksum) { |
454
|
0
|
|
|
|
|
|
my $good = delete($checksums->{$cur_checksum->{checksum}}); |
455
|
0
|
0
|
0
|
|
|
|
if ($good && (scalar(@$good) > 0)) { |
456
|
0
|
|
|
|
|
|
$self->forget_file_on_with_bad_checksums($fid, $checksums); |
457
|
|
|
|
|
|
|
# will fail $fid->devids_meet_policy and re-replicate |
458
|
|
|
|
|
|
|
} else { |
459
|
0
|
|
|
|
|
|
$self->all_checksums_bad($fid, $checksums); |
460
|
|
|
|
|
|
|
} |
461
|
|
|
|
|
|
|
} elsif ($self->{opt_checksum}) { |
462
|
0
|
|
|
|
|
|
$self->auto_checksums_bad($fid, $checksums); |
463
|
|
|
|
|
|
|
} else { |
464
|
0
|
|
|
|
|
|
$self->all_checksums_bad($fid, $checksums); |
465
|
|
|
|
|
|
|
} |
466
|
|
|
|
|
|
|
} |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
# remove the file_on mappings for devices that were bogus/missing. |
469
|
|
|
|
|
|
|
sub forget_bad_devs { |
470
|
0
|
|
|
0
|
0
|
|
my ($self, $fid, @bad_devs) = @_; |
471
|
0
|
|
|
|
|
|
foreach my $bdev (@bad_devs) { |
472
|
0
|
|
|
|
|
|
error("removing file_on mapping for fid=" . $fid->id . ", dev=" . $bdev->id); |
473
|
0
|
|
|
|
|
|
$fid->forget_about_device($bdev); |
474
|
|
|
|
|
|
|
} |
475
|
|
|
|
|
|
|
} |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
sub maybe_fix_devcount { |
478
|
|
|
|
|
|
|
# don't even log BCNT errors if skip_devcount is enabled |
479
|
0
|
0
|
|
0
|
0
|
|
return if MogileFS::Config->server_setting_cached('skip_devcount'); |
480
|
|
|
|
|
|
|
|
481
|
0
|
|
|
|
|
|
my ($self, $fid) = @_; |
482
|
0
|
0
|
|
|
|
|
return if scalar($fid->devids) == $fid->devcount; |
483
|
|
|
|
|
|
|
# log a bad count |
484
|
0
|
|
|
|
|
|
$fid->fsck_log(EV_BAD_COUNT); |
485
|
0
|
|
|
|
|
|
$fid->update_devcount(); |
486
|
|
|
|
|
|
|
} |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
1; |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
# Local Variables: |
491
|
|
|
|
|
|
|
# mode: perl |
492
|
|
|
|
|
|
|
# c-basic-indent: 4 |
493
|
|
|
|
|
|
|
# indent-tabs-mode: nil |
494
|
|
|
|
|
|
|
# End: |