line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Net::OpenSSH::Parallel; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '0.12'; |
4
|
|
|
|
|
|
|
|
5
|
1
|
|
|
1
|
|
51240
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
45
|
|
6
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
34
|
|
7
|
1
|
|
|
1
|
|
5
|
use Carp qw(croak carp); |
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
87
|
|
8
|
|
|
|
|
|
|
|
9
|
1
|
|
|
1
|
|
2231
|
use Net::OpenSSH; |
|
1
|
|
|
|
|
49146
|
|
|
1
|
|
|
|
|
53
|
|
10
|
1
|
|
|
1
|
|
748
|
use Net::OpenSSH::Parallel::Constants qw(:error :on_error); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
203
|
|
11
|
|
|
|
|
|
|
|
12
|
1
|
|
|
1
|
|
5
|
use POSIX qw(WNOHANG); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
8
|
|
13
|
1
|
|
|
1
|
|
1049
|
use Time::HiRes qw(time); |
|
1
|
|
|
|
|
1980
|
|
|
1
|
|
|
|
|
5
|
|
14
|
1
|
|
|
1
|
|
207
|
use Scalar::Util qw(dualvar); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
7602
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
our $debug; |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
sub new { |
19
|
0
|
|
|
0
|
1
|
|
my ($class, %opts) = @_; |
20
|
0
|
|
|
|
|
|
my $max_workers = delete $opts{workers}; |
21
|
0
|
|
|
|
|
|
my $max_conns = delete $opts{connections}; |
22
|
0
|
|
|
|
|
|
my $reconnections = delete $opts{reconnections}; |
23
|
0
|
|
|
|
|
|
my $on_error = delete $opts{on_error}; |
24
|
|
|
|
|
|
|
|
25
|
0
|
0
|
|
|
|
|
if ($max_conns) { |
26
|
0
|
0
|
|
|
|
|
if ($max_workers) { |
27
|
0
|
0
|
|
|
|
|
$max_conns < $max_workers and |
28
|
|
|
|
|
|
|
croak "connections ($max_conns) < workers ($max_workers)"; |
29
|
|
|
|
|
|
|
} |
30
|
|
|
|
|
|
|
else { |
31
|
0
|
|
|
|
|
|
$max_workers = $max_conns; |
32
|
|
|
|
|
|
|
} |
33
|
|
|
|
|
|
|
} |
34
|
|
|
|
|
|
|
|
35
|
0
|
0
|
|
|
|
|
%opts and croak "unknonwn option(s): ". join(", ", keys %opts); |
36
|
|
|
|
|
|
|
|
37
|
0
|
|
|
|
|
|
my $self = { joins => {}, |
38
|
|
|
|
|
|
|
hosts => {}, |
39
|
|
|
|
|
|
|
host_by_pid => {}, |
40
|
|
|
|
|
|
|
ssh_master_by_pid => {}, |
41
|
|
|
|
|
|
|
in_state => { |
42
|
|
|
|
|
|
|
connecting => {}, |
43
|
|
|
|
|
|
|
ready => {}, |
44
|
|
|
|
|
|
|
running => {}, |
45
|
|
|
|
|
|
|
done => {}, |
46
|
|
|
|
|
|
|
waiting => {}, |
47
|
|
|
|
|
|
|
suspended => {}, |
48
|
|
|
|
|
|
|
join_failed => {}, |
49
|
|
|
|
|
|
|
}, |
50
|
|
|
|
|
|
|
connected => { suspended => {}, |
51
|
|
|
|
|
|
|
waiting => {}, |
52
|
|
|
|
|
|
|
join_failed => {}, |
53
|
|
|
|
|
|
|
}, |
54
|
|
|
|
|
|
|
joins => {}, |
55
|
|
|
|
|
|
|
max_workers => $max_workers, |
56
|
|
|
|
|
|
|
max_conns => $max_conns, |
57
|
|
|
|
|
|
|
num_conns => 0, |
58
|
|
|
|
|
|
|
reconnections => $reconnections, |
59
|
|
|
|
|
|
|
on_error => $on_error, |
60
|
|
|
|
|
|
|
}; |
61
|
0
|
|
|
|
|
|
bless $self, $class; |
62
|
0
|
|
|
|
|
|
$self; |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
my %debug_channel = (api => 1, state => 2, select => 4, at => 8, |
66
|
|
|
|
|
|
|
action => 16, join => 32, workers => 64, |
67
|
|
|
|
|
|
|
connect => 128, conns => 256, error => 512); |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
sub _debug { |
70
|
0
|
|
|
0
|
|
|
my $channel = shift; |
71
|
0
|
0
|
|
|
|
|
my $bit = $debug_channel{$channel} |
72
|
|
|
|
|
|
|
or die "internal error: bad debug channel $channel"; |
73
|
0
|
0
|
|
|
|
|
if ($debug & $debug_channel{$channel}) { |
74
|
0
|
0
|
|
|
|
|
print STDERR sprintf("%6.3f", (time - $^T)), "| ", |
75
|
0
|
|
|
|
|
|
join('', map { defined($_) ? $_ : '' } @_), "\n"; |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
sub add_host { |
80
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
81
|
0
|
|
|
|
|
|
my $label = shift; |
82
|
0
|
0
|
|
|
|
|
$label =~ /([,*!()<>\/{}])/ and croak "invalid char '$1' in host label"; |
83
|
0
|
0
|
|
|
|
|
my %opts = (@_ & 1 ? (host => @_) : @_); |
84
|
0
|
0
|
|
|
|
|
$opts{host} = $label unless defined $opts{host}; |
85
|
0
|
0
|
|
|
|
|
$opts{batch_mode} = 1 unless defined $opts{batch_mode}; |
86
|
|
|
|
|
|
|
|
87
|
0
|
|
|
|
|
|
my $on_error = delete $opts{on_error}; |
88
|
0
|
|
|
|
|
|
my $reconnections = delete $opts{reconnections}; |
89
|
|
|
|
|
|
|
|
90
|
0
|
|
|
|
|
|
my $host = { label => $label, |
91
|
|
|
|
|
|
|
workers => 1, |
92
|
|
|
|
|
|
|
opts => \%opts, |
93
|
|
|
|
|
|
|
ssh => undef, |
94
|
|
|
|
|
|
|
state => 'done', |
95
|
|
|
|
|
|
|
queue => [], |
96
|
|
|
|
|
|
|
on_error => $on_error, |
97
|
|
|
|
|
|
|
reconnections => $reconnections, |
98
|
|
|
|
|
|
|
}; |
99
|
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
$self->{hosts}{$label} = $host; |
101
|
0
|
0
|
|
|
|
|
$debug and _debug(api => "[$label] added ($host)"); |
102
|
0
|
|
|
|
|
|
$self->{in_state}{done}{$label} = 1; |
103
|
0
|
0
|
|
|
|
|
$debug and _debug(state => "[$label] state set to done"); |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
sub _set_host_state { |
107
|
0
|
|
|
0
|
|
|
my ($self, $label, $state) = @_; |
108
|
0
|
|
|
|
|
|
my $host = $self->{hosts}{$label}; |
109
|
0
|
|
|
|
|
|
my $old = $host->{state}; |
110
|
0
|
0
|
|
|
|
|
delete $self->{in_state}{$old}{$label} |
111
|
|
|
|
|
|
|
or die "internal error: host $label is in state $old but not in such queue"; |
112
|
0
|
0
|
0
|
|
|
|
delete $self->{connected}{$old}{$label} |
|
|
|
0
|
|
|
|
|
113
|
|
|
|
|
|
|
if ($old eq 'suspended' or $old eq 'waiting' or $old eq 'join_failed'); |
114
|
|
|
|
|
|
|
|
115
|
0
|
|
|
|
|
|
$self->{in_state}{$state}{$label} = 1; |
116
|
0
|
|
|
|
|
|
$host->{state} = $state; |
117
|
0
|
0
|
|
|
|
|
$debug and _debug(state => "[$label] state changed $old --> $state"); |
118
|
|
|
|
|
|
|
|
119
|
0
|
0
|
0
|
|
|
|
if ($host->{ssh} and ($state eq 'suspended' or |
|
|
|
0
|
|
|
|
|
120
|
|
|
|
|
|
|
$state eq 'waiting' or |
121
|
|
|
|
|
|
|
$state eq 'join_failed')) { |
122
|
0
|
|
|
|
|
|
$self->{connected}{$state}{$label} = 1; |
123
|
0
|
0
|
|
|
|
|
$debug and _debug(state => "[$label] host is connected"); |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
} |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
my %sel2re_cache; |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
sub _selector_to_re { |
130
|
0
|
|
|
0
|
|
|
my ($self, $part) = @_; |
131
|
0
|
|
0
|
|
|
|
$sel2re_cache{$part} ||= do { |
132
|
0
|
|
|
|
|
|
$part = quotemeta $part; |
133
|
0
|
|
|
|
|
|
$part =~ s/\\\*/.*/g; |
134
|
0
|
|
|
|
|
|
qr/^$part$/; |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
sub _select_labels { |
139
|
0
|
|
|
0
|
|
|
my ($self, $selector) = @_; |
140
|
0
|
|
|
|
|
|
my %sel; |
141
|
0
|
|
|
|
|
|
my @parts = split /\s*,\s*/, $selector; |
142
|
0
|
|
|
|
|
|
for (@parts) { |
143
|
0
|
|
|
|
|
|
my $re = $self->_selector_to_re($_); |
144
|
0
|
|
|
|
|
|
$sel{$_} = 1 for grep $_ =~ $re, keys %{$self->{hosts}}; |
|
0
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
} |
146
|
0
|
|
|
|
|
|
my @labels = keys %sel; |
147
|
0
|
0
|
|
|
|
|
$debug and _debug(select => "selector($selector) --> [", join(', ', @labels), "]"); |
148
|
0
|
|
|
|
|
|
return @labels; |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
|
151
|
0
|
|
|
0
|
1
|
|
sub all { shift->push('*', @_) } |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
my %push_action_alias = (get => 'scp_get', |
154
|
|
|
|
|
|
|
put => 'scp_put', |
155
|
|
|
|
|
|
|
psub => 'parsub', |
156
|
|
|
|
|
|
|
cmd => 'command'); |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
my %push_min_args = ( here => 1, |
159
|
|
|
|
|
|
|
goto => 1, |
160
|
|
|
|
|
|
|
stop => 0, |
161
|
|
|
|
|
|
|
sub => 1, |
162
|
|
|
|
|
|
|
parsub => 1, |
163
|
|
|
|
|
|
|
scp_get => 2, |
164
|
|
|
|
|
|
|
scp_put => 2, |
165
|
|
|
|
|
|
|
rsync_get => 2, |
166
|
|
|
|
|
|
|
rsync_put => 2 ); |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
my %push_max_args = ( here => 1, |
169
|
|
|
|
|
|
|
goto => 1, |
170
|
|
|
|
|
|
|
stop => 0 ); |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
sub push { |
173
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
174
|
0
|
|
|
|
|
|
my $selector = shift; |
175
|
0
|
|
|
|
|
|
my $action = shift; |
176
|
0
|
|
|
|
|
|
my $in_state = $self->{in_state}; |
177
|
|
|
|
|
|
|
|
178
|
0
|
0
|
|
|
|
|
if (ref $action eq 'CODE') { |
179
|
0
|
|
|
|
|
|
unshift @_, $action; |
180
|
0
|
|
|
|
|
|
$action = 'sub'; |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
|
183
|
0
|
|
|
|
|
|
my $alias = $push_action_alias{$action}; |
184
|
0
|
0
|
|
|
|
|
$action = $alias if defined $alias; |
185
|
|
|
|
|
|
|
|
186
|
0
|
0
|
|
|
|
|
$action =~ /^(?:command|(?:(?:rsync|scp)_(?:get|put))|join|sub|parsub|here|stop|goto|_notify)$/ |
187
|
|
|
|
|
|
|
or croak "bad action '$action'"; |
188
|
|
|
|
|
|
|
|
189
|
0
|
0
|
0
|
|
|
|
my %opts = (($action ne 'sub' and ref $_[0] eq 'HASH') ? %{shift()} : ()); |
|
0
|
|
|
|
|
|
|
190
|
0
|
0
|
0
|
|
|
|
%opts and grep($action eq $_, qw(join here)) |
191
|
|
|
|
|
|
|
and croak "unsupported option(s) '" . join("', '", keys %opts) . "' in $action action"; |
192
|
|
|
|
|
|
|
|
193
|
0
|
|
|
|
|
|
my @labels = $self->_select_labels($selector); |
194
|
|
|
|
|
|
|
|
195
|
0
|
|
|
|
|
|
my $max = $push_max_args{$action}; |
196
|
0
|
0
|
0
|
|
|
|
croak "too many parameters for action $action" |
197
|
|
|
|
|
|
|
if (defined $max and $max < @_); |
198
|
|
|
|
|
|
|
|
199
|
0
|
|
|
|
|
|
my $min = $push_min_args{$action}; |
200
|
0
|
0
|
0
|
|
|
|
croak "too few parameters for action $action" |
201
|
|
|
|
|
|
|
if (defined $min and $min > @_); |
202
|
|
|
|
|
|
|
|
203
|
0
|
0
|
|
|
|
|
if ($action eq 'join') { |
204
|
0
|
|
|
|
|
|
my $notify_selector = shift @_; |
205
|
0
|
|
|
|
|
|
my $join = { id => '#' . $self->{join_seq}++, |
206
|
|
|
|
|
|
|
depends => {}, |
207
|
|
|
|
|
|
|
notify => [] }; |
208
|
|
|
|
|
|
|
my @depends = $self->push($notify_selector, _notify => {}, $join) |
209
|
0
|
0
|
|
|
|
|
or do { |
210
|
0
|
|
|
|
|
|
$join->_debug(join => "join $join->{id} does not depend on anything, ignoring!"); |
211
|
0
|
|
|
|
|
|
return (); |
212
|
|
|
|
|
|
|
}; |
213
|
0
|
|
|
|
|
|
$join->{depends}{$_} = 1 for @depends; |
214
|
|
|
|
|
|
|
|
215
|
0
|
|
|
|
|
|
for my $label (@labels) { |
216
|
0
|
|
|
|
|
|
my $host = $self->{hosts}{$label}; |
217
|
0
|
|
|
|
|
|
push @{$host->{queue}}, [join => {}, $join]; |
|
0
|
|
|
|
|
|
|
218
|
0
|
0
|
|
|
|
|
$debug and _debug(api => "[$label] join $join->{id} queued"); |
219
|
0
|
0
|
|
|
|
|
$self->_set_host_state($label, 'ready') |
220
|
|
|
|
|
|
|
if $in_state->{done}{$label}; |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
else { |
224
|
0
|
|
|
|
|
|
for my $label (@labels) { |
225
|
0
|
|
|
|
|
|
my $host = $self->{hosts}{$label}; |
226
|
0
|
|
|
|
|
|
push @{$host->{queue}}, [$action, \%opts, @_]; |
|
0
|
|
|
|
|
|
|
227
|
0
|
0
|
|
|
|
|
$debug and _debug(api => "[$label] action $action queued"); |
228
|
0
|
0
|
|
|
|
|
$self->_set_host_state($label, 'ready') |
229
|
|
|
|
|
|
|
if $in_state->{done}{$label}; |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
} |
232
|
0
|
|
|
|
|
|
@labels; |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
sub _audit_conns { |
236
|
0
|
|
|
0
|
|
|
my $self = shift; |
237
|
0
|
|
|
|
|
|
my $hosts = $self->{hosts}; |
238
|
0
|
|
|
|
|
|
my $num = 0; |
239
|
0
|
|
|
|
|
|
$num++ for grep $_->{ssh}, values %$hosts; |
240
|
0
|
0
|
|
|
|
|
$debug and _debug(conns => "audit_conns counted: $num, saved: $self->{num_conns}"); |
241
|
0
|
0
|
|
|
|
|
$num == $self->{num_conns} |
242
|
|
|
|
|
|
|
or die "internal error: wrong number of connections, counted: $num, saved: $self->{num_conns}"; |
243
|
0
|
|
|
|
|
|
my $in_state = $self->{in_state}; |
244
|
0
|
|
|
|
|
|
for my $state (keys %$in_state) { |
245
|
0
|
|
|
|
|
|
my $num = 0; |
246
|
0
|
|
|
|
|
|
$num++ for grep $hosts->{$_}{ssh}, keys %{$in_state->{$state}}; |
|
0
|
|
|
|
|
|
|
247
|
0
|
|
|
|
|
|
my $total = keys %{$in_state->{$state}}; |
|
0
|
|
|
|
|
|
|
248
|
0
|
|
|
|
|
|
print STDERR "conns in state $state: $num of $total\n"; |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
} |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
sub _hash_chain_get { |
253
|
0
|
|
|
0
|
|
|
my $name = shift; |
254
|
0
|
|
|
|
|
|
for (@_) { |
255
|
0
|
0
|
|
|
|
|
if (defined $_) { |
256
|
0
|
|
|
|
|
|
my $v = $_->{$name}; |
257
|
0
|
0
|
|
|
|
|
return $v if defined $v; |
258
|
|
|
|
|
|
|
} |
259
|
|
|
|
|
|
|
} |
260
|
0
|
|
|
|
|
|
undef; |
261
|
|
|
|
|
|
|
} |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
sub _at_error { |
264
|
0
|
|
|
0
|
|
|
my ($self, $label, $error) = @_; |
265
|
0
|
|
|
|
|
|
my $host = $self->{hosts}{$label}; |
266
|
0
|
|
|
|
|
|
my $task = delete $host->{current_task}; |
267
|
0
|
|
|
|
|
|
my $queue = $host->{queue}; |
268
|
|
|
|
|
|
|
|
269
|
0
|
0
|
|
|
|
|
$debug and _debug(error => "_at_error label: $label, error: $error"); |
270
|
|
|
|
|
|
|
|
271
|
0
|
|
|
|
|
|
my $opts; |
272
|
0
|
0
|
|
|
|
|
$opts = $task->[1] if $task; |
273
|
|
|
|
|
|
|
|
274
|
0
|
|
|
|
|
|
my $on_error; |
275
|
0
|
0
|
|
|
|
|
if ($error == OSSH_MASTER_FAILED) { |
276
|
0
|
0
|
|
|
|
|
if ($host->{state} eq 'connecting') { |
277
|
|
|
|
|
|
|
# task is not set in state connecting! |
278
|
0
|
0
|
|
|
|
|
$task and die "internal error: task is defined in state connecting"; |
279
|
0
|
0
|
|
|
|
|
$opts = $queue->[0][1] if @$queue; |
280
|
|
|
|
|
|
|
} |
281
|
0
|
|
0
|
|
|
|
my $max_reconnections = _hash_chain_get(reconnections => $opts, $host, $self) || 0; |
282
|
0
|
|
0
|
|
|
|
my $reconnections = $host->{current_task_reconnections}++ || 0; |
283
|
0
|
0
|
|
|
|
|
$debug and _debug(error => "[$label] reconnection: $reconnections, max: $max_reconnections"); |
284
|
0
|
0
|
|
|
|
|
if ($reconnections < $max_reconnections) { |
285
|
0
|
0
|
|
|
|
|
$debug and _debug(error => "[$label] will reconnect!"); |
286
|
0
|
|
|
|
|
|
$on_error = OSSH_ON_ERROR_RETRY; |
287
|
|
|
|
|
|
|
} |
288
|
|
|
|
|
|
|
} |
289
|
0
|
|
0
|
|
|
|
$on_error ||= _hash_chain_get(on_error => $opts, $host, $self); |
290
|
|
|
|
|
|
|
|
291
|
0
|
0
|
|
|
|
|
if (ref $on_error eq 'CODE') { |
292
|
0
|
0
|
|
|
|
|
if ($error == OSSH_JOIN_FAILED) { |
293
|
0
|
|
|
|
|
|
$on_error = $on_error->($self, $label, $error); |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
else { |
296
|
0
|
|
|
|
|
|
$on_error = $on_error->($self, $label, $error, $task); |
297
|
|
|
|
|
|
|
} |
298
|
|
|
|
|
|
|
} |
299
|
|
|
|
|
|
|
|
300
|
0
|
0
|
0
|
|
|
|
$on_error = OSSH_ON_ERROR_ABORT if (not defined $on_error or |
301
|
|
|
|
|
|
|
$error == OSSH_ABORTED); |
302
|
|
|
|
|
|
|
|
303
|
0
|
0
|
|
|
|
|
$debug and _debug(error => "[$label] on_error (final): $on_error, error: $error (".($error+0).")"); |
304
|
|
|
|
|
|
|
|
305
|
0
|
0
|
|
|
|
|
if ($on_error == OSSH_ON_ERROR_RETRY) { |
306
|
0
|
0
|
|
|
|
|
if ($error == OSSH_MASTER_FAILED) { |
|
|
0
|
|
|
|
|
|
307
|
0
|
|
|
|
|
|
$self->_set_host_state($label, 'suspended'); |
308
|
0
|
|
|
|
|
|
$self->_disconnect_host($label); |
309
|
0
|
|
|
|
|
|
$self->_set_host_state($label, 'ready'); |
310
|
|
|
|
|
|
|
} |
311
|
|
|
|
|
|
|
elsif ($error == OSSH_GOTO_FAILED) { |
312
|
|
|
|
|
|
|
# No way to retry after a GOTO error! |
313
|
|
|
|
|
|
|
# That should probably croak, but that would leave unmanaged |
314
|
|
|
|
|
|
|
# processes running |
315
|
0
|
|
|
|
|
|
$on_error = OSSH_ON_ERROR_ABORT; |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
else { |
318
|
0
|
|
|
|
|
|
unshift @$queue, $task; |
319
|
|
|
|
|
|
|
} |
320
|
0
|
|
|
|
|
|
return; |
321
|
|
|
|
|
|
|
} |
322
|
|
|
|
|
|
|
|
323
|
0
|
|
|
|
|
|
delete $host->{current_task_reconnections}; |
324
|
|
|
|
|
|
|
|
325
|
0
|
0
|
|
|
|
|
if ($on_error == OSSH_ON_ERROR_IGNORE) { |
326
|
0
|
0
|
|
|
|
|
if ($error == OSSH_MASTER_FAILED) { |
327
|
|
|
|
|
|
|
# stablishing a new connection failed, what we should do? |
328
|
|
|
|
|
|
|
# currently we remove the current task from the queue and |
329
|
|
|
|
|
|
|
# continue. |
330
|
0
|
0
|
|
|
|
|
shift @$queue unless $task; |
331
|
0
|
|
|
|
|
|
$self->_set_host_state($label, 'suspended'); |
332
|
0
|
|
|
|
|
|
$self->_disconnect_host($label); |
333
|
0
|
|
|
|
|
|
$self->_set_host_state($label, 'ready'); |
334
|
|
|
|
|
|
|
} |
335
|
|
|
|
|
|
|
else { |
336
|
0
|
|
|
|
|
|
$self->_set_host_state($label, 'ready'); |
337
|
|
|
|
|
|
|
} |
338
|
|
|
|
|
|
|
# else do nothing |
339
|
|
|
|
|
|
|
} |
340
|
|
|
|
|
|
|
else { |
341
|
0
|
0
|
0
|
|
|
|
unless ($on_error == OSSH_ON_ERROR_DONE or |
|
|
|
0
|
|
|
|
|
342
|
|
|
|
|
|
|
$on_error == OSSH_ON_ERROR_ABORT or |
343
|
|
|
|
|
|
|
$on_error == OSSH_ON_ERROR_ABORT_ALL) { |
344
|
0
|
|
|
|
|
|
carp "bad on_error code $on_error"; |
345
|
0
|
|
|
|
|
|
$on_error = OSSH_ON_ERROR_ABORT; |
346
|
|
|
|
|
|
|
} |
347
|
0
|
|
|
|
|
|
my $queue = $host->{queue}; |
348
|
0
|
|
|
|
|
|
my $failed = ($on_error != OSSH_ON_ERROR_DONE); |
349
|
0
|
0
|
|
|
|
|
$debug and _debug(error => "[$label] dropping queue, ", scalar(@$queue), " jobs"); |
350
|
0
|
|
|
|
|
|
while (my $task = shift @$queue) { |
351
|
0
|
|
|
|
|
|
my ($action, undef, $join) = @$task; |
352
|
0
|
0
|
|
|
|
|
$debug and _debug(error => "[$label] remove action $action from queue"); |
353
|
0
|
0
|
|
|
|
|
$self->_join_notify($label, $join, $failed) |
354
|
|
|
|
|
|
|
if $action eq '_notify'; |
355
|
|
|
|
|
|
|
} |
356
|
|
|
|
|
|
|
|
357
|
0
|
0
|
|
|
|
|
$on_error == OSSH_ON_ERROR_ABORT_ALL |
358
|
|
|
|
|
|
|
and $self->{abort_all} = 1; |
359
|
|
|
|
|
|
|
|
360
|
0
|
|
|
|
|
|
$self->_set_host_state($label, 'done'); |
361
|
0
|
|
|
|
|
|
$self->_disconnect_host($label); |
362
|
0
|
|
|
|
|
|
$host->{error} = $error; |
363
|
|
|
|
|
|
|
} |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
sub _at_connect { |
367
|
0
|
|
|
0
|
|
|
my ($self, $label) = @_; |
368
|
0
|
|
|
|
|
|
my $host = $self->{hosts}{$label}; |
369
|
0
|
0
|
|
|
|
|
$debug and _debug(connect => "[$label] _connect, starting SSH connection"); |
370
|
0
|
0
|
|
|
|
|
$host->{ssh} and die "internal error: connecting host is already connected"; |
371
|
0
|
|
|
|
|
|
my $ssh = $host->{ssh} = Net::OpenSSH->new(expand_vars => 1, |
372
|
0
|
|
|
|
|
|
%{$host->{opts}}, |
373
|
|
|
|
|
|
|
async => 1); |
374
|
0
|
|
|
|
|
|
$ssh->set_var(LABEL => $label); |
375
|
0
|
|
|
|
|
|
my $master_pid = $ssh->get_master_pid; |
376
|
0
|
|
|
|
|
|
$host->{master_pid} = $master_pid; |
377
|
0
|
|
|
|
|
|
$self->{ssh_master_by_pid}{$master_pid} = $label; |
378
|
0
|
|
|
|
|
|
$self->{num_conns}++; |
379
|
0
|
|
|
|
|
|
$self->_set_host_state($label, 'connecting'); |
380
|
0
|
0
|
|
|
|
|
if ($ssh->error) { |
381
|
0
|
|
|
|
|
|
$self->_at_error($label, $ssh->error); |
382
|
|
|
|
|
|
|
} |
383
|
|
|
|
|
|
|
} |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
sub _at_connecting { |
386
|
0
|
|
|
0
|
|
|
my ($self, $label) = @_; |
387
|
0
|
|
|
|
|
|
my $host = $self->{hosts}{$label}; |
388
|
0
|
0
|
|
|
|
|
$debug and _debug(at => "[$label] at_connecting, waiting for master"); |
389
|
0
|
|
|
|
|
|
my $ssh = $host->{ssh}; |
390
|
0
|
0
|
|
|
|
|
if ($ssh->wait_for_master(1)) { |
|
|
0
|
|
|
|
|
|
391
|
0
|
0
|
|
|
|
|
$debug and _debug(at => "[$label] at_connecting, master connected"); |
392
|
0
|
|
|
|
|
|
$self->_set_host_state($label, 'ready'); |
393
|
|
|
|
|
|
|
} |
394
|
|
|
|
|
|
|
elsif ($ssh->error) { |
395
|
0
|
|
|
|
|
|
$self->_at_error($label, $ssh->error); |
396
|
|
|
|
|
|
|
} |
397
|
|
|
|
|
|
|
} |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
sub _join_notify { |
400
|
0
|
|
|
0
|
|
|
my ($self, $label, $join, $failed) = @_; |
401
|
|
|
|
|
|
|
# use Data::Dumper; |
402
|
|
|
|
|
|
|
# print STDERR Dumper $join; |
403
|
0
|
0
|
|
|
|
|
delete $join->{depends}{$label} |
404
|
|
|
|
|
|
|
or die "internal error: $join->{id} notified for non dependent label $label"; |
405
|
0
|
0
|
|
|
|
|
$debug and _debug(join => "removing dependent $label from join $join->{id}"); |
406
|
0
|
0
|
|
|
|
|
$join->{failed} = 1 if $failed; |
407
|
0
|
0
|
|
|
|
|
if (not %{$join->{depends}}) { |
|
0
|
|
|
|
|
|
|
408
|
0
|
0
|
|
|
|
|
$debug and _debug(join => "join $join->{id} done"); |
409
|
0
|
|
|
|
|
|
$join->{done} = 1; |
410
|
0
|
|
|
|
|
|
my $failed = $join->{failed}; |
411
|
0
|
|
|
|
|
|
for my $label (@{$join->{notify}}) { |
|
0
|
|
|
|
|
|
|
412
|
0
|
0
|
|
|
|
|
$debug and _debug(join => "notifying $label about join $join->{id} done"); |
413
|
0
|
0
|
|
|
|
|
$self->_set_host_state($label, $failed ? 'join_failed' : 'ready'); |
414
|
|
|
|
|
|
|
} |
415
|
|
|
|
|
|
|
} |
416
|
|
|
|
|
|
|
# print STDERR Dumper $join; |
417
|
|
|
|
|
|
|
} |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
sub _num_workers { |
420
|
0
|
|
|
0
|
|
|
my $in_state = shift->{in_state}; |
421
|
0
|
|
|
|
|
|
( keys(%{$in_state->{ready}}) + |
|
0
|
|
|
|
|
|
|
422
|
0
|
|
|
|
|
|
keys(%{$in_state->{running}}) + |
423
|
0
|
|
|
|
|
|
keys(%{$in_state->{connecting}}) ); |
424
|
|
|
|
|
|
|
} |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
sub _disconnect_host { |
427
|
0
|
|
|
0
|
|
|
my ($self, $label) = @_; |
428
|
0
|
|
|
|
|
|
my $host = $self->{hosts}{$label}; |
429
|
0
|
|
|
|
|
|
my $state = $host->{state}; |
430
|
0
|
0
|
|
|
|
|
$state =~ /^(?:waiting|suspended|done|connecting)$/ |
431
|
|
|
|
|
|
|
or die "internal error: disconnecting $label in state $state"; |
432
|
0
|
0
|
|
|
|
|
if ($host->{ssh}) { |
433
|
0
|
0
|
|
|
|
|
$debug and _debug(connect => "[$label] disconnecting host"); |
434
|
0
|
|
|
|
|
|
my $master_pid = delete $host->{master_pid}; |
435
|
0
|
0
|
|
|
|
|
delete $self->{ssh_master_by_pid}{$master_pid} |
436
|
|
|
|
|
|
|
if defined $master_pid; |
437
|
0
|
|
|
|
|
|
undef $host->{ssh}; |
438
|
0
|
|
|
|
|
|
$self->{num_conns}--; |
439
|
0
|
|
|
|
|
|
$self->_set_host_state($label, $state); |
440
|
|
|
|
|
|
|
} |
441
|
|
|
|
|
|
|
} |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
sub _disconnect_any_host { |
444
|
0
|
|
|
0
|
|
|
my $self = shift; |
445
|
0
|
|
|
|
|
|
my $connected = $self->{connected}; |
446
|
0
|
0
|
|
|
|
|
$debug and _debug(conns => "disconnect any host"); |
447
|
|
|
|
|
|
|
# $self->_audit_conns; |
448
|
0
|
|
|
|
|
|
my $label; |
449
|
0
|
|
|
|
|
|
for my $state (qw(suspended join_failed waiting)) { |
450
|
|
|
|
|
|
|
# use Data::Dumper; |
451
|
|
|
|
|
|
|
# print Dumper $connected; |
452
|
0
|
0
|
|
|
|
|
$debug and _debug(conns => "looking for connected host in state $state"); |
453
|
0
|
|
|
|
|
|
($label) = each %{$connected->{$state}}; |
|
0
|
|
|
|
|
|
|
454
|
0
|
|
|
|
|
|
keys %{$connected->{$state}}; # reset iterator |
|
0
|
|
|
|
|
|
|
455
|
0
|
0
|
|
|
|
|
last if defined $label; |
456
|
|
|
|
|
|
|
} |
457
|
0
|
0
|
|
|
|
|
$debug and _debug(conns => "[$label] disconnecting"); |
458
|
0
|
0
|
|
|
|
|
defined $label or die "internal error: unable to disconnect any host"; |
459
|
0
|
|
|
|
|
|
$self->_disconnect_host($label); |
460
|
|
|
|
|
|
|
} |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
my @private_opts = qw(on_error or_goto reconnections); |
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
sub _at_ready { |
465
|
0
|
|
|
0
|
|
|
my ($self, $label) = @_; |
466
|
0
|
0
|
|
|
|
|
if (my $max_workers = $self->{max_workers}) { |
467
|
0
|
|
|
|
|
|
my $in_state = $self->{in_state}; |
468
|
0
|
|
|
|
|
|
my $num_workers = $self->_num_workers; |
469
|
0
|
0
|
|
|
|
|
$debug and _debug(workers => "num workers: $num_workers, maximun: $max_workers"); |
470
|
0
|
0
|
|
|
|
|
if ($num_workers > $max_workers) { |
471
|
0
|
0
|
|
|
|
|
$debug and _debug(workers => "[$label] suspending"); |
472
|
0
|
|
|
|
|
|
$self->_set_host_state($label, 'suspended'); |
473
|
0
|
|
|
|
|
|
return; |
474
|
|
|
|
|
|
|
} |
475
|
|
|
|
|
|
|
} |
476
|
|
|
|
|
|
|
|
477
|
0
|
|
|
|
|
|
my $host = $self->{hosts}{$label}; |
478
|
0
|
0
|
|
|
|
|
$debug and _debug(at => "[$label] at_ready"); |
479
|
|
|
|
|
|
|
|
480
|
0
|
|
|
|
|
|
my $queue = $host->{queue}; |
481
|
|
|
|
|
|
|
|
482
|
0
|
0
|
|
|
|
|
if ($self->{abort_all}) { |
483
|
0
|
|
|
|
|
|
$self->_at_error($label, OSSH_ABORTED); |
484
|
0
|
|
|
|
|
|
return; |
485
|
|
|
|
|
|
|
} |
486
|
|
|
|
|
|
|
|
487
|
0
|
|
|
|
|
|
while (defined (my $task = shift @$queue)) { |
488
|
0
|
|
|
|
|
|
my $action = shift @$task; |
489
|
0
|
0
|
|
|
|
|
$debug and _debug(at => "[$label] at_ready, starting new action $action"); |
490
|
0
|
0
|
|
|
|
|
if ($action eq 'join') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
491
|
0
|
|
|
|
|
|
my (undef, $join) = @$task; |
492
|
0
|
0
|
|
|
|
|
if ($join->{done}) { |
493
|
0
|
0
|
|
|
|
|
$debug and _debug(join => "join[$join->{id}] is done"); |
494
|
0
|
0
|
|
|
|
|
if ($join->{failed}) { |
495
|
0
|
|
|
|
|
|
$self->_at_error($label, OSSH_JOIN_FAILED); |
496
|
0
|
|
|
|
|
|
return; |
497
|
|
|
|
|
|
|
} |
498
|
0
|
0
|
|
|
|
|
$debug and _debug(action => "[$label] join $join->{id} already done"); |
499
|
0
|
|
|
|
|
|
next; |
500
|
|
|
|
|
|
|
} |
501
|
0
|
|
|
|
|
|
CORE::push @{$join->{notify}}, $label; |
|
0
|
|
|
|
|
|
|
502
|
0
|
|
|
|
|
|
$self->_set_host_state($label, 'waiting'); |
503
|
0
|
|
|
|
|
|
return; |
504
|
|
|
|
|
|
|
} |
505
|
|
|
|
|
|
|
elsif ($action eq 'here') { |
506
|
0
|
|
|
|
|
|
next; |
507
|
|
|
|
|
|
|
} |
508
|
|
|
|
|
|
|
elsif ($action eq 'stop') { |
509
|
0
|
|
|
|
|
|
$self->_skip($label, 'END'); |
510
|
0
|
|
|
|
|
|
next; |
511
|
|
|
|
|
|
|
} |
512
|
|
|
|
|
|
|
elsif ($action eq 'goto') { |
513
|
0
|
|
|
|
|
|
my (undef, $target) = @$task; |
514
|
0
|
|
|
|
|
|
$self->_skip($label, $target); |
515
|
0
|
|
|
|
|
|
next; |
516
|
|
|
|
|
|
|
} |
517
|
|
|
|
|
|
|
elsif ($action eq '_notify') { |
518
|
0
|
|
|
|
|
|
my (undef, $join) = @$task; |
519
|
0
|
|
|
|
|
|
$self->_join_notify($label, $join); |
520
|
0
|
|
|
|
|
|
next; |
521
|
|
|
|
|
|
|
} |
522
|
|
|
|
|
|
|
elsif ($action eq 'sub') { |
523
|
|
|
|
|
|
|
# use Data::Dumper; |
524
|
|
|
|
|
|
|
# _debug (action => Dumper(@$task)); |
525
|
0
|
|
|
|
|
|
shift @$task; |
526
|
0
|
|
|
|
|
|
my $sub = shift @$task; |
527
|
0
|
0
|
|
|
|
|
$debug and _debug(action => "[$label] calling sub $sub"); |
528
|
0
|
|
|
|
|
|
$sub->($self, $label, @$task); |
529
|
0
|
|
|
|
|
|
next; |
530
|
|
|
|
|
|
|
} |
531
|
|
|
|
|
|
|
else { |
532
|
0
|
|
|
|
|
|
my $ssh = $host->{ssh}; |
533
|
0
|
0
|
0
|
|
|
|
unless ($action eq 'parsub' and $task->[0]{no_ssh}) { |
534
|
0
|
0
|
|
|
|
|
unless ($ssh) { |
535
|
|
|
|
|
|
|
# unshift the task we have just removed and connect first: |
536
|
0
|
|
|
|
|
|
unshift @$task, $action; |
537
|
0
|
|
|
|
|
|
unshift @$queue, $task; |
538
|
0
|
0
|
|
|
|
|
if (my $max_conns = $self->{max_conns}) { |
539
|
0
|
0
|
|
|
|
|
$self->_disconnect_any_host() if $self->{num_conns} >= $max_conns; |
540
|
|
|
|
|
|
|
} |
541
|
0
|
0
|
|
|
|
|
$debug and _debug(at => "[$label] host is not connected, connecting..."); |
542
|
0
|
|
|
|
|
|
$self->_at_connect($label); |
543
|
0
|
|
|
|
|
|
return; |
544
|
|
|
|
|
|
|
} |
545
|
|
|
|
|
|
|
|
546
|
0
|
0
|
|
|
|
|
if (my $error = $ssh->error) { |
547
|
0
|
|
|
|
|
|
$self->_at_error($label, $error); |
548
|
0
|
|
|
|
|
|
return; |
549
|
|
|
|
|
|
|
} |
550
|
|
|
|
|
|
|
} |
551
|
|
|
|
|
|
|
|
552
|
0
|
|
|
|
|
|
$host->{current_task} = [$action, @$task]; |
553
|
0
|
|
|
|
|
|
my %opts = %{shift @$task}; |
|
0
|
|
|
|
|
|
|
554
|
0
|
|
|
|
|
|
delete @opts{@private_opts}; |
555
|
0
|
0
|
|
|
|
|
my $method = $self->can("_start_$action") |
556
|
|
|
|
|
|
|
or die "internal error: method _start_$action not found"; |
557
|
0
|
|
|
|
|
|
my $pid = $method->($self, $label, \%opts, @$task); |
558
|
0
|
0
|
|
|
|
|
$debug and _debug(action => "[$label] action pid: ", $pid); |
559
|
0
|
0
|
|
|
|
|
unless (defined $pid) { |
560
|
0
|
|
0
|
|
|
|
my $error = (($ssh && $ssh->error) || |
561
|
|
|
|
|
|
|
dualvar(($action eq 'parsub' |
562
|
|
|
|
|
|
|
? "Unable to fork parsub" |
563
|
|
|
|
|
|
|
: "Action $action failed to start"), OSSH_SLAVE_FAILED)); |
564
|
0
|
|
|
|
|
|
$self->_at_error($label, $error); |
565
|
0
|
|
|
|
|
|
return; |
566
|
|
|
|
|
|
|
} |
567
|
0
|
|
|
|
|
|
$self->{host_by_pid}{$pid} = $label; |
568
|
0
|
|
|
|
|
|
$self->_set_host_state($label, 'running'); |
569
|
0
|
|
|
|
|
|
return; |
570
|
|
|
|
|
|
|
} |
571
|
|
|
|
|
|
|
} |
572
|
0
|
0
|
|
|
|
|
$debug and _debug(at => "[$label] at_ready, queue_is_empty, we are done!"); |
573
|
0
|
|
|
|
|
|
$self->_set_host_state($label, 'done'); |
574
|
0
|
|
|
|
|
|
$self->_disconnect_host($label); |
575
|
|
|
|
|
|
|
} |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
sub _start_parsub { |
578
|
0
|
|
|
0
|
|
|
my $self = shift; |
579
|
0
|
|
|
|
|
|
my $label = shift; |
580
|
0
|
|
|
|
|
|
my $opts = shift; |
581
|
0
|
|
|
|
|
|
my $sub = shift; |
582
|
0
|
|
|
|
|
|
my $ssh = $self->{hosts}{$label}{ssh}; |
583
|
0
|
0
|
|
|
|
|
$debug and _debug(action => "[$label] start parsub action [@_]"); |
584
|
0
|
|
|
|
|
|
my $pid = fork; |
585
|
0
|
0
|
|
|
|
|
unless ($pid) { |
586
|
0
|
0
|
|
|
|
|
defined $pid or return; |
587
|
0
|
|
|
|
|
|
eval { $sub->($label, $ssh, @_) }; |
|
0
|
|
|
|
|
|
|
588
|
0
|
0
|
0
|
|
|
|
$@ and $debug and _debug(error => "slave died on parsub: $@"); |
589
|
0
|
0
|
|
|
|
|
POSIX::_exit($@ ? 1 : 0); |
590
|
|
|
|
|
|
|
} |
591
|
0
|
|
|
|
|
|
$pid; |
592
|
|
|
|
|
|
|
} |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
sub _start_command { |
595
|
0
|
|
|
0
|
|
|
my $self = shift; |
596
|
0
|
|
|
|
|
|
my $label = shift; |
597
|
0
|
|
|
|
|
|
my $opts = shift; |
598
|
0
|
|
|
|
|
|
my $ssh = $self->{hosts}{$label}{ssh}; |
599
|
0
|
0
|
|
|
|
|
$debug and _debug(action => "[$label] start command action [@_]"); |
600
|
0
|
|
|
|
|
|
$ssh->spawn($opts, @_); |
601
|
|
|
|
|
|
|
} |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
sub _start_scp_get { |
604
|
0
|
|
|
0
|
|
|
my $self = shift; |
605
|
0
|
|
|
|
|
|
my $label = shift; |
606
|
0
|
|
|
|
|
|
my $opts = shift; |
607
|
0
|
|
|
|
|
|
my $ssh = $self->{hosts}{$label}{ssh}; |
608
|
0
|
0
|
|
|
|
|
$debug and _debug(action => "[$label] start scp_get action"); |
609
|
0
|
|
|
|
|
|
$opts->{async} = 1; |
610
|
0
|
|
|
|
|
|
$ssh->scp_get($opts, @_); |
611
|
|
|
|
|
|
|
} |
612
|
|
|
|
|
|
|
|
613
|
|
|
|
|
|
|
sub _start_scp_put { |
614
|
0
|
|
|
0
|
|
|
my $self = shift; |
615
|
0
|
|
|
|
|
|
my $label = shift; |
616
|
0
|
|
|
|
|
|
my $opts = shift; |
617
|
0
|
|
|
|
|
|
my $ssh = $self->{hosts}{$label}{ssh}; |
618
|
0
|
0
|
|
|
|
|
$debug and _debug(action => "[$label] start scp_put action"); |
619
|
0
|
|
|
|
|
|
$opts->{async} = 1; |
620
|
0
|
|
|
|
|
|
$ssh->scp_put($opts, @_); |
621
|
|
|
|
|
|
|
} |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
sub _start_rsync_get { |
624
|
0
|
|
|
0
|
|
|
my $self = shift; |
625
|
0
|
|
|
|
|
|
my $label = shift; |
626
|
0
|
|
|
|
|
|
my $opts = shift; |
627
|
0
|
|
|
|
|
|
my $ssh = $self->{hosts}{$label}{ssh}; |
628
|
0
|
0
|
|
|
|
|
$debug and _debug(action => "[$label] start rsync_get action"); |
629
|
0
|
|
|
|
|
|
$opts->{async} = 1; |
630
|
0
|
|
|
|
|
|
$ssh->rsync_get($opts, @_); |
631
|
|
|
|
|
|
|
} |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
sub _start_rsync_put { |
634
|
0
|
|
|
0
|
|
|
my $self = shift; |
635
|
0
|
|
|
|
|
|
my $label = shift; |
636
|
0
|
|
|
|
|
|
my $opts = shift; |
637
|
0
|
|
|
|
|
|
my $ssh = $self->{hosts}{$label}{ssh}; |
638
|
0
|
0
|
|
|
|
|
$debug and _debug(action => "[$label] start rsync_put action"); |
639
|
0
|
|
|
|
|
|
$opts->{async} = 1; |
640
|
0
|
|
|
|
|
|
$ssh->rsync_put($opts, @_); |
641
|
|
|
|
|
|
|
} |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
# FIXME: dead code? |
644
|
|
|
|
|
|
|
sub _start_join { |
645
|
0
|
|
|
0
|
|
|
my $self = shift; |
646
|
0
|
|
|
|
|
|
my $label = shift; |
647
|
0
|
|
|
|
|
|
warn "internal mismatch: this shouldn't be happening!"; |
648
|
|
|
|
|
|
|
} |
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
sub _skip { |
651
|
0
|
|
|
0
|
|
|
my ($self, $label, $target) = @_; |
652
|
0
|
0
|
|
|
|
|
$debug and _debug(action => "skipping until $target"); |
653
|
0
|
|
|
|
|
|
my $host = $self->{hosts}{$label}; |
654
|
0
|
|
|
|
|
|
my $queue = $host->{queue}; |
655
|
0
|
|
|
|
|
|
my ($ix, $task); |
656
|
0
|
|
|
|
|
|
for ($ix = 0; defined($task = $queue->[$ix]); $ix++) { |
657
|
0
|
0
|
0
|
|
|
|
last if ($task->[0] eq 'here' and $task->[2] eq $target); |
658
|
|
|
|
|
|
|
} |
659
|
0
|
0
|
0
|
|
|
|
if ($task or $target eq 'END') { |
660
|
0
|
|
|
|
|
|
for (1..$ix) { |
661
|
0
|
|
|
|
|
|
my $task = shift @$queue; |
662
|
0
|
0
|
|
|
|
|
$self->_join_notify($label, $task->[2]) |
663
|
|
|
|
|
|
|
if $task->[0] eq '_notify'; |
664
|
|
|
|
|
|
|
} |
665
|
0
|
|
|
|
|
|
return; |
666
|
|
|
|
|
|
|
} |
667
|
0
|
0
|
|
|
|
|
$debug and _debug(action => "here label $target not found"); |
668
|
0
|
|
|
|
|
|
$self->_at_error($label, OSSH_GOTO_FAILED); |
669
|
|
|
|
|
|
|
} |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
sub _finish_task { |
672
|
0
|
|
|
0
|
|
|
my ($self, $pid) = @_; |
673
|
0
|
|
|
|
|
|
my $label = delete $self->{host_by_pid}{$pid}; |
674
|
|
|
|
|
|
|
|
675
|
0
|
0
|
|
|
|
|
if (defined $label) { |
676
|
0
|
0
|
|
|
|
|
$debug and _debug(action => "[$label] action finished pid: $pid, rc: $?"); |
677
|
0
|
|
|
|
|
|
my $host = $self->{hosts}{$label}; |
678
|
0
|
|
|
|
|
|
my $or_goto; |
679
|
0
|
0
|
|
|
|
|
if ($?) { |
680
|
0
|
|
|
|
|
|
my ($action) = @{$host->{current_task}}; |
|
0
|
|
|
|
|
|
|
681
|
0
|
|
|
|
|
|
my $rc = ($? >> 8); |
682
|
0
|
0
|
|
|
|
|
my $ssh = $host->{ssh} or die "internal error: $label is not connected"; |
683
|
0
|
|
|
|
|
|
my $error = $ssh->error; |
684
|
0
|
0
|
0
|
|
|
|
$or_goto = $host->{current_task}[1]{or_goto} unless ($error or $rc == 255); |
685
|
0
|
0
|
|
|
|
|
if (defined $or_goto) { |
686
|
0
|
0
|
|
|
|
|
$debug and _debug(action => "[$label] skipping to $or_goto with rc = $rc"); |
687
|
|
|
|
|
|
|
} |
688
|
|
|
|
|
|
|
else { |
689
|
0
|
|
0
|
|
|
|
$error ||= dualvar(OSSH_SLAVE_FAILED, |
690
|
|
|
|
|
|
|
"child exited with non-zero return code ($rc)"); |
691
|
0
|
|
|
|
|
|
$self->_at_error($label, $error); |
692
|
|
|
|
|
|
|
return |
693
|
0
|
|
|
|
|
|
} |
694
|
|
|
|
|
|
|
} |
695
|
0
|
|
|
|
|
|
$self->_set_host_state($label, 'ready'); |
696
|
0
|
0
|
|
|
|
|
$self->_skip($label, $or_goto) if defined $or_goto; |
697
|
0
|
|
|
|
|
|
delete $host->{current_task}; |
698
|
0
|
|
|
|
|
|
delete $host->{current_task_reconnections}; |
699
|
|
|
|
|
|
|
} |
700
|
|
|
|
|
|
|
else { |
701
|
0
|
|
|
|
|
|
my $label = delete $self->{ssh_master_by_pid}{$pid}; |
702
|
0
|
0
|
|
|
|
|
defined $label or carp "spurious child exit (pid: $pid)"; |
703
|
|
|
|
|
|
|
|
704
|
0
|
0
|
|
|
|
|
$debug and _debug(action => "[$label] master ssh exited"); |
705
|
0
|
|
|
|
|
|
my $host = $self->{hosts}{$label}; |
706
|
0
|
0
|
|
|
|
|
my $ssh = $host->{ssh} |
707
|
|
|
|
|
|
|
or die ("internal error: master ssh process exited but ". |
708
|
|
|
|
|
|
|
"there is no ssh object associated to host $label"); |
709
|
0
|
|
|
|
|
|
$ssh->master_exited; |
710
|
0
|
|
|
|
|
|
my $state = $host->{state}; |
711
|
|
|
|
|
|
|
# do error handler later... |
712
|
|
|
|
|
|
|
} |
713
|
|
|
|
|
|
|
} |
714
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
sub _wait_for_jobs { |
716
|
0
|
|
|
0
|
|
|
my ($self, $time) = @_; |
717
|
0
|
|
|
|
|
|
my $dontwait = ($time == 0); |
718
|
0
|
0
|
|
|
|
|
$debug and _debug(at => "_wait_for_jobs time: $time"); |
719
|
|
|
|
|
|
|
# This loop is here because we want to call waitpit before and |
720
|
|
|
|
|
|
|
# after the select. If we find some child has exited in the first |
721
|
|
|
|
|
|
|
# round we don't call select at all and return immediately |
722
|
0
|
|
|
|
|
|
while (1) { |
723
|
0
|
0
|
|
|
|
|
if (%{$self->{in_state}{running}}) { |
|
0
|
|
|
|
|
|
|
724
|
0
|
0
|
|
|
|
|
$debug and _debug(at => "_wait_for_jobs reaping children"); |
725
|
0
|
|
|
|
|
|
while (1) { |
726
|
0
|
|
|
|
|
|
my $pid = waitpid(-1, WNOHANG); |
727
|
0
|
0
|
|
|
|
|
last if $pid <= 0; |
728
|
0
|
0
|
|
|
|
|
$debug and _debug(action => "waitpid caught pid: $pid, rc: $?"); |
729
|
0
|
|
|
|
|
|
$dontwait = 1; |
730
|
0
|
|
|
|
|
|
$self->_finish_task($pid); |
731
|
|
|
|
|
|
|
} |
732
|
|
|
|
|
|
|
} |
733
|
0
|
0
|
|
|
|
|
$dontwait and return 1; |
734
|
0
|
0
|
|
|
|
|
$debug and _debug(at => "_wait_for_jobs calling select"); |
735
|
|
|
|
|
|
|
{ |
736
|
|
|
|
|
|
|
# This is a hack to make select finish as soon as we get a |
737
|
|
|
|
|
|
|
# CHLD signal. |
738
|
0
|
|
|
0
|
|
|
local $SIG{CHLD} = sub {}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
739
|
0
|
|
|
|
|
|
select(undef, undef, undef, $time); |
740
|
|
|
|
|
|
|
} |
741
|
0
|
|
|
|
|
|
$dontwait = 1; |
742
|
|
|
|
|
|
|
} |
743
|
|
|
|
|
|
|
} |
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
sub _clean_errors { |
746
|
0
|
|
|
0
|
|
|
my $self = shift; |
747
|
0
|
|
|
|
|
|
delete $_->{error} for values %{$self->{hosts}}; |
|
0
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
} |
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
sub run { |
751
|
0
|
|
|
0
|
1
|
|
my ($self, $time) = @_; |
752
|
|
|
|
|
|
|
|
753
|
0
|
|
|
|
|
|
$self->_clean_errors; |
754
|
|
|
|
|
|
|
|
755
|
0
|
|
|
|
|
|
my $hosts = $self->{hosts}; |
756
|
0
|
|
|
|
|
|
my $max_workers = $self->{max_workers}; |
757
|
0
|
|
|
|
|
|
my ($connecting, $ready, $running, $waiting, $suspended, $join_failed, $done) = |
758
|
0
|
|
|
|
|
|
@{$self->{in_state}}{qw(connecting ready running waiting suspended join_failed done)}; |
759
|
0
|
|
|
|
|
|
my $connected_suspended = $self->{connected}{suspended}; |
760
|
0
|
|
|
|
|
|
while (1) { |
761
|
|
|
|
|
|
|
# use Data::Dumper; |
762
|
|
|
|
|
|
|
# print STDERR Dumper $self; |
763
|
0
|
0
|
|
|
|
|
$debug and _debug(api => "run: iterating..."); |
764
|
|
|
|
|
|
|
|
765
|
0
|
0
|
|
|
|
|
$debug and _debug(at => "run: hosts at connecting: ", scalar(keys %$connecting)); |
766
|
0
|
|
|
|
|
|
$self->_at_connecting($_) for keys %$connecting; |
767
|
|
|
|
|
|
|
|
768
|
0
|
0
|
|
|
|
|
$debug and _debug(at => "run: hosts at ready: ", scalar(keys %$ready)); |
769
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
# $self->_audit_conns; |
771
|
0
|
|
|
|
|
|
$self->_at_ready($_) for keys %$ready; |
772
|
|
|
|
|
|
|
# $self->_audit_conns; |
773
|
|
|
|
|
|
|
|
774
|
0
|
0
|
|
|
|
|
$debug and _debug(at => 'run: hosts at join_failed: ', scalar(keys %$join_failed)); |
775
|
0
|
|
|
|
|
|
$self->_at_error($_, OSSH_JOIN_FAILED) for keys %$join_failed; |
776
|
|
|
|
|
|
|
|
777
|
0
|
0
|
|
|
|
|
if ($max_workers) { |
778
|
0
|
0
|
|
|
|
|
$debug and _debug(at => "run: hosts at suspended:", scalar(keys %$suspended)); |
779
|
0
|
0
|
|
|
|
|
if (%$suspended) { |
780
|
0
|
|
|
|
|
|
my $awake = $max_workers - $self->_num_workers; |
781
|
0
|
|
|
|
|
|
my @labels; |
782
|
0
|
|
|
|
|
|
for my $hash ($connected_suspended, $suspended) { |
783
|
0
|
|
|
|
|
|
while ($awake > 0) { |
784
|
0
|
0
|
|
|
|
|
my ($label) = each %$hash or last; |
785
|
0
|
|
|
|
|
|
CORE::push @labels, $label; |
786
|
0
|
|
|
|
|
|
$awake--; |
787
|
|
|
|
|
|
|
} |
788
|
0
|
|
|
|
|
|
for my $label (@labels) { |
789
|
0
|
0
|
|
|
|
|
$debug and _debug(workers => "[$label] awaking"); |
790
|
0
|
|
|
|
|
|
$self->_set_host_state($label, 'ready'); |
791
|
|
|
|
|
|
|
} |
792
|
0
|
|
|
|
|
|
keys %$hash; # do we really need to reset the each iterator? |
793
|
|
|
|
|
|
|
} |
794
|
|
|
|
|
|
|
} |
795
|
|
|
|
|
|
|
} |
796
|
|
|
|
|
|
|
|
797
|
0
|
0
|
|
|
|
|
$debug and _debug(at => "run: hosts at waiting: ", scalar(keys %$waiting)); |
798
|
0
|
0
|
|
|
|
|
$debug and _debug(at => "run: hosts at running: ", scalar(keys %$running)); |
799
|
0
|
0
|
|
|
|
|
$debug and _debug(at => "run: hosts at done: ", scalar(keys %$done), " of ", scalar(keys %$hosts)); |
800
|
|
|
|
|
|
|
|
801
|
0
|
0
|
|
|
|
|
last if keys(%$hosts) == keys(%$done); |
802
|
|
|
|
|
|
|
|
803
|
0
|
0
|
|
|
|
|
my $time = ( %$ready ? 0 : |
|
|
0
|
|
|
|
|
|
804
|
|
|
|
|
|
|
%$connecting ? 0.3 : |
805
|
|
|
|
|
|
|
5.0 ); |
806
|
0
|
|
|
|
|
|
$self->_wait_for_jobs($time); |
807
|
|
|
|
|
|
|
} |
808
|
|
|
|
|
|
|
|
809
|
0
|
|
|
|
|
|
delete $self->{abort_all}; |
810
|
|
|
|
|
|
|
|
811
|
0
|
|
|
|
|
|
my $error; |
812
|
0
|
|
|
|
|
|
for my $label (sort keys %$hosts) { |
813
|
0
|
0
|
|
|
|
|
$hosts->{$label}{error} and $error = 1; |
814
|
0
|
0
|
|
|
|
|
$debug and _debug(error => "[$label] error: ", $hosts->{$label}{error}); |
815
|
|
|
|
|
|
|
} |
816
|
0
|
|
|
|
|
|
!$error; |
817
|
|
|
|
|
|
|
} |
818
|
|
|
|
|
|
|
|
819
|
|
|
|
|
|
|
sub get_error { |
820
|
0
|
|
|
0
|
1
|
|
my ($self, $label) = @_; |
821
|
0
|
0
|
|
|
|
|
my $host = $self->{hosts}{$label} |
822
|
|
|
|
|
|
|
or croak "no such host $label has been added"; |
823
|
0
|
|
|
|
|
|
$host->{error} |
824
|
|
|
|
|
|
|
} |
825
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
sub get_errors { |
827
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
828
|
0
|
0
|
|
|
|
|
if (wantarray) { |
829
|
0
|
|
|
|
|
|
return map { |
830
|
0
|
|
|
|
|
|
my $error = $self->get_error($_); |
831
|
0
|
0
|
|
|
|
|
defined $error ? ($_ => $error) : () |
832
|
0
|
|
|
|
|
|
} sort keys %{$self->{hosts}} |
833
|
|
|
|
|
|
|
} |
834
|
|
|
|
|
|
|
else { |
835
|
0
|
|
|
|
|
|
return grep defined($self->get_error($_)), keys %{$self->{hosts}} |
|
0
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
} |
837
|
|
|
|
|
|
|
} |
838
|
|
|
|
|
|
|
|
839
|
|
|
|
|
|
|
1; |
840
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
__END__ |