line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
############################################################################### |
2
|
|
|
|
|
|
|
## ---------------------------------------------------------------------------- |
3
|
|
|
|
|
|
|
## Parallel grep model similar to the native grep function. |
4
|
|
|
|
|
|
|
## |
5
|
|
|
|
|
|
|
############################################################################### |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
package MCE::Grep; |
8
|
|
|
|
|
|
|
|
9
|
4
|
|
|
4
|
|
254894
|
use strict; |
|
4
|
|
|
|
|
32
|
|
|
4
|
|
|
|
|
101
|
|
10
|
4
|
|
|
4
|
|
17
|
use warnings; |
|
4
|
|
|
|
|
7
|
|
|
4
|
|
|
|
|
100
|
|
11
|
|
|
|
|
|
|
|
12
|
4
|
|
|
4
|
|
16
|
no warnings qw( threads recursion uninitialized ); |
|
4
|
|
|
|
|
4
|
|
|
4
|
|
|
|
|
218
|
|
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
our $VERSION = '1.887'; |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
## no critic (BuiltinFunctions::ProhibitStringyEval) |
17
|
|
|
|
|
|
|
## no critic (Subroutines::ProhibitSubroutinePrototypes) |
18
|
|
|
|
|
|
|
## no critic (TestingAndDebugging::ProhibitNoStrict) |
19
|
|
|
|
|
|
|
|
20
|
4
|
|
|
4
|
|
20
|
use Scalar::Util qw( looks_like_number weaken ); |
|
4
|
|
|
|
|
7
|
|
|
4
|
|
|
|
|
177
|
|
21
|
4
|
|
|
4
|
|
1627
|
use MCE; |
|
4
|
|
|
|
|
8
|
|
|
4
|
|
|
|
|
23
|
|
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
our @CARP_NOT = qw( MCE ); |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
my $_tid = $INC{'threads.pm'} ? threads->tid() : 0; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
sub CLONE { |
28
|
0
|
0
|
|
0
|
|
0
|
$_tid = threads->tid() if $INC{'threads.pm'}; |
29
|
|
|
|
|
|
|
} |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
############################################################################### |
32
|
|
|
|
|
|
|
## ---------------------------------------------------------------------------- |
33
|
|
|
|
|
|
|
## Import routine. |
34
|
|
|
|
|
|
|
## |
35
|
|
|
|
|
|
|
############################################################################### |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
my ($_MCE, $_def, $_params, $_prev_c, $_tag) = ({}, {}, {}, {}, 'MCE::Grep'); |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
sub import { |
40
|
4
|
|
|
4
|
|
55
|
my ($_class, $_pkg) = (shift, caller); |
41
|
|
|
|
|
|
|
|
42
|
4
|
|
|
|
|
16
|
my $_p = $_def->{$_pkg} = { |
43
|
|
|
|
|
|
|
MAX_WORKERS => 'auto', |
44
|
|
|
|
|
|
|
CHUNK_SIZE => 'auto', |
45
|
|
|
|
|
|
|
}; |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
## Import functions. |
48
|
4
|
50
|
|
|
|
12
|
if ($_pkg !~ /^MCE::/) { |
49
|
4
|
|
|
4
|
|
48
|
no strict 'refs'; no warnings 'redefine'; |
|
4
|
|
|
4
|
|
11
|
|
|
4
|
|
|
|
|
181
|
|
|
4
|
|
|
|
|
21
|
|
|
4
|
|
|
|
|
14
|
|
|
4
|
|
|
|
|
9232
|
|
50
|
4
|
|
|
|
|
11
|
*{ $_pkg.'::mce_grep_f' } = \&run_file; |
|
4
|
|
|
|
|
26
|
|
51
|
4
|
|
|
|
|
8
|
*{ $_pkg.'::mce_grep_s' } = \&run_seq; |
|
4
|
|
|
|
|
9
|
|
52
|
4
|
|
|
|
|
8
|
*{ $_pkg.'::mce_grep' } = \&run; |
|
4
|
|
|
|
|
12
|
|
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
## Process module arguments. |
56
|
4
|
|
|
|
|
34
|
while ( my $_argument = shift ) { |
57
|
0
|
|
|
|
|
0
|
my $_arg = lc $_argument; |
58
|
|
|
|
|
|
|
|
59
|
0
|
0
|
|
|
|
0
|
$_p->{MAX_WORKERS} = shift, next if ( $_arg eq 'max_workers' ); |
60
|
0
|
0
|
|
|
|
0
|
$_p->{CHUNK_SIZE} = shift, next if ( $_arg eq 'chunk_size' ); |
61
|
0
|
0
|
|
|
|
0
|
$_p->{TMP_DIR} = shift, next if ( $_arg eq 'tmp_dir' ); |
62
|
0
|
0
|
|
|
|
0
|
$_p->{FREEZE} = shift, next if ( $_arg eq 'freeze' ); |
63
|
0
|
0
|
|
|
|
0
|
$_p->{THAW} = shift, next if ( $_arg eq 'thaw' ); |
64
|
0
|
0
|
|
|
|
0
|
$_p->{INIT_RELAY} = shift, next if ( $_arg eq 'init_relay' ); |
65
|
0
|
0
|
|
|
|
0
|
$_p->{USE_THREADS} = shift, next if ( $_arg eq 'use_threads' ); |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
## Sereal 3.015+, if available, is used automatically by MCE 1.8+. |
68
|
0
|
0
|
|
|
|
0
|
if ( $_arg eq 'sereal' ) { |
69
|
0
|
0
|
|
|
|
0
|
if ( shift eq '0' ) { |
70
|
0
|
|
|
|
|
0
|
require Storable; |
71
|
0
|
|
|
|
|
0
|
$_p->{FREEZE} = \&Storable::freeze; |
72
|
0
|
|
|
|
|
0
|
$_p->{THAW} = \&Storable::thaw; |
73
|
|
|
|
|
|
|
} |
74
|
0
|
|
|
|
|
0
|
next; |
75
|
|
|
|
|
|
|
} |
76
|
|
|
|
|
|
|
|
77
|
0
|
|
|
|
|
0
|
_croak("Error: ($_argument) invalid module option"); |
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
|
80
|
4
|
|
|
|
|
22
|
$_p->{MAX_WORKERS} = MCE::_parse_max_workers($_p->{MAX_WORKERS}); |
81
|
|
|
|
|
|
|
|
82
|
4
|
|
|
|
|
14
|
MCE::_validate_number($_p->{MAX_WORKERS}, 'MAX_WORKERS', $_tag); |
83
|
|
|
|
|
|
|
MCE::_validate_number($_p->{CHUNK_SIZE}, 'CHUNK_SIZE', $_tag) |
84
|
4
|
50
|
|
|
|
10
|
unless ($_p->{CHUNK_SIZE} eq 'auto'); |
85
|
|
|
|
|
|
|
|
86
|
4
|
|
|
|
|
54
|
return; |
87
|
|
|
|
|
|
|
} |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
############################################################################### |
90
|
|
|
|
|
|
|
## ---------------------------------------------------------------------------- |
91
|
|
|
|
|
|
|
## Gather callback for storing by chunk_id => chunk_ref into a hash. |
92
|
|
|
|
|
|
|
## |
93
|
|
|
|
|
|
|
############################################################################### |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
my ($_total_chunks, %_tmp); |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
sub _gather { |
98
|
|
|
|
|
|
|
|
99
|
37
|
|
|
37
|
|
131
|
my ($_chunk_id, $_data_ref) = @_; |
100
|
|
|
|
|
|
|
|
101
|
37
|
|
|
|
|
96
|
$_tmp{$_chunk_id} = $_data_ref; |
102
|
37
|
|
|
|
|
46
|
$_total_chunks++; |
103
|
|
|
|
|
|
|
|
104
|
37
|
|
|
|
|
75
|
return; |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
############################################################################### |
108
|
|
|
|
|
|
|
## ---------------------------------------------------------------------------- |
109
|
|
|
|
|
|
|
## Init and finish routines. |
110
|
|
|
|
|
|
|
## |
111
|
|
|
|
|
|
|
############################################################################### |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
sub init (@) { |
114
|
|
|
|
|
|
|
|
115
|
3
|
50
|
33
|
3
|
1
|
360
|
shift if (defined $_[0] && $_[0] eq 'MCE::Grep'); |
116
|
3
|
|
|
|
|
21
|
my $_pkg = "$$.$_tid.".caller(); |
117
|
|
|
|
|
|
|
|
118
|
3
|
50
|
|
|
|
21
|
$_params->{$_pkg} = (ref $_[0] eq 'HASH') ? shift : { @_ }; |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
_croak("$_tag: (HASH) not allowed as input by this MCE model") |
121
|
3
|
50
|
|
|
|
15
|
if ( ref $_params->{$_pkg}{input_data} eq 'HASH' ); |
122
|
|
|
|
|
|
|
|
123
|
3
|
|
|
|
|
6
|
@_ = (); |
124
|
|
|
|
|
|
|
|
125
|
3
|
|
|
|
|
6
|
return; |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
sub finish (@) { |
129
|
|
|
|
|
|
|
|
130
|
7
|
50
|
33
|
7
|
1
|
1366
|
shift if (defined $_[0] && $_[0] eq 'MCE::Grep'); |
131
|
7
|
100
|
|
|
|
36
|
my $_pkg = (defined $_[0]) ? shift : "$$.$_tid.".caller(); |
132
|
|
|
|
|
|
|
|
133
|
7
|
100
|
66
|
|
|
68
|
if ( $_pkg eq 'MCE' ) { |
|
|
100
|
|
|
|
|
|
134
|
4
|
|
|
|
|
9
|
for my $_k ( keys %{ $_MCE } ) { MCE::Grep->finish($_k, 1); } |
|
4
|
|
|
|
|
27
|
|
|
2
|
|
|
|
|
28
|
|
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
elsif ( $_MCE->{$_pkg} && $_MCE->{$_pkg}{_init_pid} eq "$$.$_tid" ) { |
137
|
1
|
50
|
|
|
|
20
|
$_MCE->{$_pkg}->shutdown(@_) if $_MCE->{$_pkg}{_spawned}; |
138
|
1
|
|
|
|
|
3
|
$_total_chunks = undef, undef %_tmp; |
139
|
|
|
|
|
|
|
|
140
|
1
|
|
|
|
|
3
|
delete $_prev_c->{$_pkg}; |
141
|
1
|
|
|
|
|
12
|
delete $_MCE->{$_pkg}; |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
|
144
|
7
|
|
|
|
|
17
|
@_ = (); |
145
|
|
|
|
|
|
|
|
146
|
7
|
|
|
|
|
19
|
return; |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
############################################################################### |
150
|
|
|
|
|
|
|
## ---------------------------------------------------------------------------- |
151
|
|
|
|
|
|
|
## Parallel grep with MCE -- file. |
152
|
|
|
|
|
|
|
## |
153
|
|
|
|
|
|
|
############################################################################### |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
sub run_file (&@) { |
156
|
|
|
|
|
|
|
|
157
|
2
|
50
|
33
|
2
|
1
|
2180
|
shift if (defined $_[0] && $_[0] eq 'MCE::Grep'); |
158
|
|
|
|
|
|
|
|
159
|
2
|
|
|
|
|
5
|
my $_code = shift; my $_file = shift; |
|
2
|
|
|
|
|
5
|
|
160
|
2
|
|
|
|
|
8
|
my $_pid = "$$.$_tid.".caller(); |
161
|
|
|
|
|
|
|
|
162
|
2
|
50
|
|
|
|
8
|
if (defined (my $_p = $_params->{$_pid})) { |
163
|
2
|
50
|
|
|
|
6
|
delete $_p->{input_data} if (exists $_p->{input_data}); |
164
|
2
|
50
|
|
|
|
5
|
delete $_p->{sequence} if (exists $_p->{sequence}); |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
else { |
167
|
0
|
|
|
|
|
0
|
$_params->{$_pid} = {}; |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
|
170
|
2
|
100
|
66
|
|
|
52
|
if (defined $_file && ref $_file eq '' && $_file ne '') { |
|
|
50
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
171
|
1
|
50
|
|
|
|
26
|
_croak("$_tag: ($_file) does not exist") unless (-e $_file); |
172
|
1
|
50
|
|
|
|
14
|
_croak("$_tag: ($_file) is not readable") unless (-r $_file); |
173
|
1
|
50
|
|
|
|
11
|
_croak("$_tag: ($_file) is not a plain file") unless (-f $_file); |
174
|
1
|
|
|
|
|
10
|
$_params->{$_pid}{_file} = $_file; |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
elsif (ref $_file eq 'SCALAR' || ref($_file) =~ /^(?:GLOB|FileHandle|IO::)/) { |
177
|
1
|
|
|
|
|
5
|
$_params->{$_pid}{_file} = $_file; |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
else { |
180
|
0
|
|
|
|
|
0
|
_croak("$_tag: (file) is not specified or valid"); |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
|
183
|
2
|
|
|
|
|
6
|
@_ = (); |
184
|
|
|
|
|
|
|
|
185
|
2
|
|
|
|
|
6
|
return run($_code); |
186
|
|
|
|
|
|
|
} |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
############################################################################### |
189
|
|
|
|
|
|
|
## ---------------------------------------------------------------------------- |
190
|
|
|
|
|
|
|
## Parallel grep with MCE -- sequence. |
191
|
|
|
|
|
|
|
## |
192
|
|
|
|
|
|
|
############################################################################### |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
sub run_seq (&@) { |
195
|
|
|
|
|
|
|
|
196
|
1
|
50
|
33
|
1
|
1
|
1458
|
shift if (defined $_[0] && $_[0] eq 'MCE::Grep'); |
197
|
|
|
|
|
|
|
|
198
|
1
|
|
|
|
|
3
|
my $_code = shift; |
199
|
1
|
|
|
|
|
5
|
my $_pid = "$$.$_tid.".caller(); |
200
|
|
|
|
|
|
|
|
201
|
1
|
50
|
|
|
|
5
|
if (defined (my $_p = $_params->{$_pid})) { |
202
|
1
|
50
|
|
|
|
4
|
delete $_p->{input_data} if (exists $_p->{input_data}); |
203
|
1
|
50
|
|
|
|
4
|
delete $_p->{_file} if (exists $_p->{_file}); |
204
|
|
|
|
|
|
|
} |
205
|
|
|
|
|
|
|
else { |
206
|
0
|
|
|
|
|
0
|
$_params->{$_pid} = {}; |
207
|
|
|
|
|
|
|
} |
208
|
|
|
|
|
|
|
|
209
|
1
|
|
|
|
|
2
|
my ($_begin, $_end); |
210
|
|
|
|
|
|
|
|
211
|
1
|
50
|
33
|
|
|
9
|
if (ref $_[0] eq 'HASH') { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
212
|
0
|
|
|
|
|
0
|
$_begin = $_[0]->{begin}, $_end = $_[0]->{end}; |
213
|
0
|
|
|
|
|
0
|
$_params->{$_pid}{sequence} = $_[0]; |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
elsif (ref $_[0] eq 'ARRAY') { |
216
|
0
|
0
|
0
|
|
|
0
|
if (@{ $_[0] } > 3 && $_[0]->[3] =~ /\d$/) { |
|
0
|
|
|
|
|
0
|
|
217
|
0
|
|
|
|
|
0
|
$_begin = $_[0]->[0], $_end = $_[0]->[-1]; |
218
|
0
|
|
|
|
|
0
|
$_params->{$_pid}{sequence} = [ $_[0]->[0], $_[0]->[-1] ]; |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
else { |
221
|
0
|
|
|
|
|
0
|
$_begin = $_[0]->[0], $_end = $_[0]->[1]; |
222
|
0
|
|
|
|
|
0
|
$_params->{$_pid}{sequence} = $_[0]; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
elsif (ref $_[0] eq '' || ref($_[0]) =~ /^Math::/) { |
226
|
1
|
50
|
33
|
|
|
5
|
if (@_ > 3 && $_[3] =~ /\d$/) { |
227
|
0
|
|
|
|
|
0
|
$_begin = $_[0], $_end = $_[-1]; |
228
|
0
|
|
|
|
|
0
|
$_params->{$_pid}{sequence} = [ $_[0], $_[-1] ]; |
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
else { |
231
|
1
|
|
|
|
|
2
|
$_begin = $_[0], $_end = $_[1]; |
232
|
1
|
|
|
|
|
3
|
$_params->{$_pid}{sequence} = [ @_ ]; |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
else { |
236
|
0
|
|
|
|
|
0
|
_croak("$_tag: (sequence) is not specified or valid"); |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
|
239
|
1
|
50
|
|
|
|
4
|
_croak("$_tag: (begin) is not specified for sequence") |
240
|
|
|
|
|
|
|
unless (defined $_begin); |
241
|
1
|
50
|
|
|
|
2
|
_croak("$_tag: (end) is not specified for sequence") |
242
|
|
|
|
|
|
|
unless (defined $_end); |
243
|
|
|
|
|
|
|
|
244
|
1
|
|
|
|
|
3
|
$_params->{$_pid}{sequence_run} = undef; |
245
|
|
|
|
|
|
|
|
246
|
1
|
|
|
|
|
2
|
@_ = (); |
247
|
|
|
|
|
|
|
|
248
|
1
|
|
|
|
|
3
|
return run($_code); |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
############################################################################### |
252
|
|
|
|
|
|
|
## ---------------------------------------------------------------------------- |
253
|
|
|
|
|
|
|
## Parallel grep with MCE. |
254
|
|
|
|
|
|
|
## |
255
|
|
|
|
|
|
|
############################################################################### |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
sub run (&@) { |
258
|
|
|
|
|
|
|
|
259
|
7
|
50
|
33
|
7
|
1
|
2274
|
shift if (defined $_[0] && $_[0] eq 'MCE::Grep'); |
260
|
|
|
|
|
|
|
|
261
|
7
|
|
|
|
|
16
|
my $_code = shift; $_total_chunks = 0; undef %_tmp; |
|
7
|
|
|
|
|
13
|
|
|
7
|
|
|
|
|
15
|
|
262
|
7
|
100
|
|
|
|
35
|
my $_pkg = caller() eq 'MCE::Grep' ? caller(1) : caller(); |
263
|
7
|
|
|
|
|
39
|
my $_pid = "$$.$_tid.$_pkg"; |
264
|
|
|
|
|
|
|
|
265
|
7
|
|
|
|
|
13
|
my $_input_data; my $_max_workers = $_def->{$_pkg}{MAX_WORKERS}; |
|
7
|
|
|
|
|
22
|
|
266
|
7
|
|
|
|
|
14
|
my $_r = ref $_[0]; |
267
|
|
|
|
|
|
|
|
268
|
7
|
100
|
66
|
|
|
49
|
if (@_ == 1 && $_r =~ /^(?:ARRAY|HASH|SCALAR|CODE|GLOB|FileHandle|IO::)/) { |
269
|
1
|
50
|
|
|
|
6
|
_croak("$_tag: (HASH) not allowed as input by this MCE model") |
270
|
|
|
|
|
|
|
if $_r eq 'HASH'; |
271
|
1
|
|
|
|
|
2
|
$_input_data = shift; |
272
|
|
|
|
|
|
|
} |
273
|
|
|
|
|
|
|
|
274
|
7
|
50
|
|
|
|
22
|
if (defined (my $_p = $_params->{$_pid})) { |
275
|
|
|
|
|
|
|
$_max_workers = MCE::_parse_max_workers($_p->{max_workers}) |
276
|
7
|
50
|
|
|
|
45
|
if (exists $_p->{max_workers}); |
277
|
|
|
|
|
|
|
|
278
|
7
|
100
|
100
|
|
|
44
|
delete $_p->{sequence} if (defined $_input_data || scalar @_); |
279
|
7
|
50
|
|
|
|
20
|
delete $_p->{user_func} if (exists $_p->{user_func}); |
280
|
7
|
50
|
|
|
|
17
|
delete $_p->{user_tasks} if (exists $_p->{user_tasks}); |
281
|
7
|
50
|
|
|
|
17
|
delete $_p->{use_slurpio} if (exists $_p->{use_slurpio}); |
282
|
7
|
50
|
|
|
|
17
|
delete $_p->{bounds_only} if (exists $_p->{bounds_only}); |
283
|
7
|
50
|
|
|
|
13
|
delete $_p->{gather} if (exists $_p->{gather}); |
284
|
|
|
|
|
|
|
} |
285
|
|
|
|
|
|
|
|
286
|
7
|
|
|
|
|
13
|
my $_chunk_size = do { |
287
|
7
|
|
50
|
|
|
16
|
my $_p = $_params->{$_pid} || {}; |
288
|
|
|
|
|
|
|
(defined $_p->{init_relay} || defined $_def->{$_pkg}{INIT_RELAY}) ? 1 : |
289
|
|
|
|
|
|
|
MCE::_parse_chunk_size( |
290
|
7
|
50
|
33
|
|
|
76
|
$_def->{$_pkg}{CHUNK_SIZE}, $_max_workers, $_params->{$_pid}, |
291
|
|
|
|
|
|
|
$_input_data, scalar @_ |
292
|
|
|
|
|
|
|
); |
293
|
|
|
|
|
|
|
}; |
294
|
|
|
|
|
|
|
|
295
|
7
|
50
|
|
|
|
22
|
if (defined (my $_p = $_params->{$_pid})) { |
296
|
7
|
100
|
|
|
|
20
|
if (exists $_p->{_file}) { |
297
|
2
|
|
|
|
|
5
|
$_input_data = delete $_p->{_file}; |
298
|
|
|
|
|
|
|
} else { |
299
|
5
|
50
|
|
|
|
13
|
$_input_data = $_p->{input_data} if exists $_p->{input_data}; |
300
|
|
|
|
|
|
|
} |
301
|
|
|
|
|
|
|
} |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
## ------------------------------------------------------------------------- |
304
|
|
|
|
|
|
|
|
305
|
7
|
|
|
|
|
31
|
MCE::_save_state($_MCE->{$_pid}); |
306
|
|
|
|
|
|
|
|
307
|
7
|
100
|
66
|
|
|
46
|
if (!defined $_prev_c->{$_pid} || $_prev_c->{$_pid} != $_code) { |
308
|
3
|
50
|
|
|
|
9
|
$_MCE->{$_pid}->shutdown() if (defined $_MCE->{$_pid}); |
309
|
3
|
|
|
|
|
9
|
$_prev_c->{$_pid} = $_code; |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
my %_opts = ( |
312
|
|
|
|
|
|
|
max_workers => $_max_workers, task_name => $_tag, |
313
|
|
|
|
|
|
|
user_func => sub { |
314
|
|
|
|
|
|
|
|
315
|
37
|
|
|
37
|
|
58
|
my ($_mce, $_chunk_ref, $_chunk_id) = @_; |
316
|
37
|
|
|
|
|
61
|
my $_wantarray = $_mce->{user_args}[0]; |
317
|
|
|
|
|
|
|
|
318
|
37
|
50
|
|
|
|
54
|
if ($_wantarray) { |
319
|
37
|
|
|
|
|
41
|
my @_a; |
320
|
|
|
|
|
|
|
|
321
|
37
|
100
|
|
|
|
69
|
if (ref $_chunk_ref eq 'SCALAR') { |
322
|
1
|
50
|
|
|
|
4
|
local $/ = $_mce->{RS} if defined $_mce->{RS}; |
323
|
1
|
|
|
|
|
95
|
open my $_MEM_FH, '<', $_chunk_ref; |
324
|
1
|
|
|
|
|
1103
|
binmode $_MEM_FH, ':raw'; |
325
|
1
|
100
|
|
|
|
7
|
while (<$_MEM_FH>) { push (@_a, $_) if &{ $_code }; } |
|
9
|
|
|
|
|
41
|
|
|
9
|
|
|
|
|
16
|
|
326
|
1
|
|
|
|
|
7
|
close $_MEM_FH; |
327
|
1
|
|
|
|
|
7
|
weaken $_MEM_FH; |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
else { |
330
|
36
|
100
|
|
|
|
50
|
if (ref $_chunk_ref) { |
331
|
27
|
|
|
|
|
32
|
push @_a, grep { &{ $_code } } @{ $_chunk_ref }; |
|
27
|
|
|
|
|
29
|
|
|
27
|
|
|
|
|
71
|
|
|
27
|
|
|
|
|
455
|
|
332
|
|
|
|
|
|
|
} else { |
333
|
9
|
|
|
|
|
13
|
push @_a, grep { &{ $_code } } $_chunk_ref; |
|
9
|
|
|
|
|
8
|
|
|
9
|
|
|
|
|
20
|
|
334
|
|
|
|
|
|
|
} |
335
|
|
|
|
|
|
|
} |
336
|
|
|
|
|
|
|
|
337
|
37
|
|
|
|
|
359
|
MCE->gather($_chunk_id, \@_a); |
338
|
|
|
|
|
|
|
} |
339
|
|
|
|
|
|
|
else { |
340
|
0
|
|
|
|
|
0
|
my $_cnt = 0; |
341
|
|
|
|
|
|
|
|
342
|
0
|
0
|
|
|
|
0
|
if (ref $_chunk_ref eq 'SCALAR') { |
343
|
0
|
0
|
|
|
|
0
|
local $/ = $_mce->{RS} if defined $_mce->{RS}; |
344
|
0
|
|
|
|
|
0
|
open my $_MEM_FH, '<', $_chunk_ref; |
345
|
0
|
|
|
|
|
0
|
binmode $_MEM_FH, ':raw'; |
346
|
0
|
0
|
|
|
|
0
|
while (<$_MEM_FH>) { $_cnt++ if &{ $_code }; } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
347
|
0
|
|
|
|
|
0
|
close $_MEM_FH; |
348
|
0
|
|
|
|
|
0
|
weaken $_MEM_FH; |
349
|
|
|
|
|
|
|
} |
350
|
|
|
|
|
|
|
else { |
351
|
0
|
0
|
|
|
|
0
|
if (ref $_chunk_ref) { |
352
|
0
|
|
|
|
|
0
|
$_cnt += grep { &{ $_code } } @{ $_chunk_ref }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
353
|
|
|
|
|
|
|
} else { |
354
|
0
|
|
|
|
|
0
|
$_cnt += grep { &{ $_code } } $_chunk_ref; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
355
|
|
|
|
|
|
|
} |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
|
358
|
0
|
0
|
|
|
|
0
|
MCE->gather($_cnt) if defined $_wantarray; |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
}, |
361
|
3
|
|
|
|
|
30
|
); |
362
|
|
|
|
|
|
|
|
363
|
3
|
50
|
|
|
|
12
|
if (defined (my $_p = $_params->{$_pid})) { |
364
|
3
|
|
|
|
|
3
|
for my $_k (keys %{ $_p }) { |
|
3
|
|
|
|
|
12
|
|
365
|
3
|
50
|
|
|
|
9
|
next if ($_k eq 'sequence_run'); |
366
|
3
|
50
|
|
|
|
6
|
next if ($_k eq 'input_data'); |
367
|
3
|
50
|
|
|
|
9
|
next if ($_k eq 'chunk_size'); |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
_croak("$_tag: ($_k) is not a valid constructor argument") |
370
|
3
|
50
|
|
|
|
9
|
unless (exists $MCE::_valid_fields_new{$_k}); |
371
|
|
|
|
|
|
|
|
372
|
3
|
|
|
|
|
6
|
$_opts{$_k} = $_p->{$_k}; |
373
|
|
|
|
|
|
|
} |
374
|
|
|
|
|
|
|
} |
375
|
|
|
|
|
|
|
|
376
|
3
|
|
|
|
|
6
|
for my $_k (qw/ tmp_dir freeze thaw init_relay use_threads /) { |
377
|
|
|
|
|
|
|
$_opts{$_k} = $_def->{$_pkg}{uc($_k)} |
378
|
15
|
50
|
33
|
|
|
39
|
if (exists $_def->{$_pkg}{uc($_k)} && !exists $_opts{$_k}); |
379
|
|
|
|
|
|
|
} |
380
|
|
|
|
|
|
|
|
381
|
3
|
|
|
|
|
21
|
$_MCE->{$_pid} = MCE->new(pkg => $_pkg, %_opts); |
382
|
|
|
|
|
|
|
} |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
## ------------------------------------------------------------------------- |
385
|
|
|
|
|
|
|
|
386
|
7
|
|
|
|
|
72
|
my $_cnt = 0; my $_wantarray = wantarray; |
|
7
|
|
|
|
|
22
|
|
387
|
|
|
|
|
|
|
|
388
|
7
|
100
|
|
|
|
32
|
$_MCE->{$_pid}{use_slurpio} = ($_chunk_size > &MCE::MAX_RECS_SIZE) ? 1 : 0; |
389
|
7
|
|
|
|
|
20
|
$_MCE->{$_pid}{user_args} = [ $_wantarray ]; |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
$_MCE->{$_pid}{gather} = $_wantarray |
392
|
7
|
50
|
|
0
|
|
24
|
? \&_gather : sub { $_cnt += $_[0]; return; }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
393
|
|
|
|
|
|
|
|
394
|
7
|
100
|
|
|
|
17
|
if (defined $_input_data) { |
|
|
100
|
|
|
|
|
|
395
|
3
|
|
|
|
|
5
|
@_ = (); |
396
|
3
|
|
|
|
|
16
|
$_MCE->{$_pid}->process({ chunk_size => $_chunk_size }, $_input_data); |
397
|
3
|
|
|
|
|
15
|
delete $_MCE->{$_pid}{input_data}; |
398
|
|
|
|
|
|
|
} |
399
|
|
|
|
|
|
|
elsif (scalar @_) { |
400
|
3
|
|
|
|
|
18
|
$_MCE->{$_pid}->process({ chunk_size => $_chunk_size }, \@_); |
401
|
1
|
|
|
|
|
20
|
delete $_MCE->{$_pid}{input_data}; |
402
|
|
|
|
|
|
|
} |
403
|
|
|
|
|
|
|
else { |
404
|
1
|
50
|
33
|
|
|
15
|
if (defined $_params->{$_pid} && exists $_params->{$_pid}{sequence}) { |
405
|
|
|
|
|
|
|
$_MCE->{$_pid}->run({ |
406
|
|
|
|
|
|
|
chunk_size => $_chunk_size, |
407
|
|
|
|
|
|
|
sequence => $_params->{$_pid}{sequence} |
408
|
1
|
|
|
|
|
10
|
}, 0); |
409
|
1
|
50
|
|
|
|
8
|
if (exists $_params->{$_pid}{sequence_run}) { |
410
|
1
|
|
|
|
|
2
|
delete $_params->{$_pid}{sequence_run}; |
411
|
1
|
|
|
|
|
3
|
delete $_params->{$_pid}{sequence}; |
412
|
|
|
|
|
|
|
} |
413
|
1
|
|
|
|
|
3
|
delete $_MCE->{$_pid}{sequence}; |
414
|
|
|
|
|
|
|
} |
415
|
|
|
|
|
|
|
} |
416
|
|
|
|
|
|
|
|
417
|
5
|
|
|
|
|
33
|
MCE::_restore_state(); |
418
|
|
|
|
|
|
|
|
419
|
5
|
50
|
|
|
|
12
|
if ($_wantarray) { |
|
|
0
|
|
|
|
|
|
420
|
5
|
|
|
|
|
88
|
return map { @{ $_ } } delete @_tmp{ 1 .. $_total_chunks }; |
|
37
|
|
|
|
|
36
|
|
|
37
|
|
|
|
|
129
|
|
421
|
|
|
|
|
|
|
} |
422
|
|
|
|
|
|
|
elsif (defined $_wantarray) { |
423
|
0
|
|
|
|
|
0
|
return $_cnt; |
424
|
|
|
|
|
|
|
} |
425
|
|
|
|
|
|
|
|
426
|
0
|
|
|
|
|
0
|
return; |
427
|
|
|
|
|
|
|
} |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
############################################################################### |
430
|
|
|
|
|
|
|
## ---------------------------------------------------------------------------- |
431
|
|
|
|
|
|
|
## Private methods. |
432
|
|
|
|
|
|
|
## |
433
|
|
|
|
|
|
|
############################################################################### |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
sub _croak { |
436
|
|
|
|
|
|
|
|
437
|
0
|
|
|
0
|
|
0
|
goto &MCE::_croak; |
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
1; |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
__END__ |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
############################################################################### |
445
|
|
|
|
|
|
|
## ---------------------------------------------------------------------------- |
446
|
|
|
|
|
|
|
## Module usage. |
447
|
|
|
|
|
|
|
## |
448
|
|
|
|
|
|
|
############################################################################### |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
=head1 NAME |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
MCE::Grep - Parallel grep model similar to the native grep function |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
=head1 VERSION |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
This document describes MCE::Grep version 1.887 |
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
=head1 SYNOPSIS |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
## Exports mce_grep, mce_grep_f, and mce_grep_s |
461
|
|
|
|
|
|
|
use MCE::Grep; |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
## Array or array_ref |
464
|
|
|
|
|
|
|
my @a = mce_grep { $_ % 5 == 0 } 1..10000; |
465
|
|
|
|
|
|
|
my @b = mce_grep { $_ % 5 == 0 } \@list; |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
## Important; pass an array_ref for deeply input data |
468
|
|
|
|
|
|
|
my @c = mce_grep { $_->[1] % 2 == 0 } [ [ 0, 1 ], [ 0, 2 ], ... ]; |
469
|
|
|
|
|
|
|
my @d = mce_grep { $_->[1] % 2 == 0 } \@deeply_list; |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
## File path, glob ref, IO::All::{ File, Pipe, STDIO } obj, or scalar ref |
472
|
|
|
|
|
|
|
## Workers read directly and not involve the manager process |
473
|
|
|
|
|
|
|
my @e = mce_grep_f { /pattern/ } "/path/to/file"; # efficient |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
## Involves the manager process, therefore slower |
476
|
|
|
|
|
|
|
my @f = mce_grep_f { /pattern/ } $file_handle; |
477
|
|
|
|
|
|
|
my @g = mce_grep_f { /pattern/ } $io; |
478
|
|
|
|
|
|
|
my @h = mce_grep_f { /pattern/ } \$scalar; |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
## Sequence of numbers (begin, end [, step, format]) |
481
|
|
|
|
|
|
|
my @i = mce_grep_s { %_ * 3 == 0 } 1, 10000, 5; |
482
|
|
|
|
|
|
|
my @j = mce_grep_s { %_ * 3 == 0 } [ 1, 10000, 5 ]; |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
my @k = mce_grep_s { %_ * 3 == 0 } { |
485
|
|
|
|
|
|
|
begin => 1, end => 10000, step => 5, format => undef |
486
|
|
|
|
|
|
|
}; |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
=head1 DESCRIPTION |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
This module provides a parallel grep implementation via Many-Core Engine. |
491
|
|
|
|
|
|
|
MCE incurs a small overhead due to passing of data. A fast code block will |
492
|
|
|
|
|
|
|
run faster natively. However, the overhead will likely diminish as the |
493
|
|
|
|
|
|
|
complexity increases for the code. |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
my @m1 = grep { $_ % 5 == 0 } 1..1000000; ## 0.065 secs |
496
|
|
|
|
|
|
|
my @m2 = mce_grep { $_ % 5 == 0 } 1..1000000; ## 0.194 secs |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
Chunking, enabled by default, greatly reduces the overhead behind the scene. |
499
|
|
|
|
|
|
|
The time for mce_grep below also includes the time for data exchanges between |
500
|
|
|
|
|
|
|
the manager and worker processes. More parallelization will be seen when the |
501
|
|
|
|
|
|
|
code incurs additional CPU time. |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
my @m1 = grep { /[2357][1468][9]/ } 1..1000000; ## 0.353 secs |
504
|
|
|
|
|
|
|
my @m2 = mce_grep { /[2357][1468][9]/ } 1..1000000; ## 0.218 secs |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
Even faster is mce_grep_s; useful when input data is a range of numbers. |
507
|
|
|
|
|
|
|
Workers generate sequences mathematically among themselves without any |
508
|
|
|
|
|
|
|
interaction from the manager process. Two arguments are required for |
509
|
|
|
|
|
|
|
mce_grep_s (begin, end). Step defaults to 1 if begin is smaller than end, |
510
|
|
|
|
|
|
|
otherwise -1. |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
my @m3 = mce_grep_s { /[2357][1468][9]/ } 1, 1000000; ## 0.165 secs |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
Although this document is about MCE::Grep, the L<MCE::Stream> module can write |
515
|
|
|
|
|
|
|
results immediately without waiting for all chunks to complete. This is made |
516
|
|
|
|
|
|
|
possible by passing the reference to an array (in this case @m4 and @m5). |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
use MCE::Stream default_mode => 'grep'; |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
my @m4; mce_stream \@m4, sub { /[2357][1468][9]/ }, 1..1000000; |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
## Completed in 0.203 secs. This is amazing considering the |
523
|
|
|
|
|
|
|
## overhead for passing data between the manager and workers. |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
my @m5; mce_stream_s \@m5, sub { /[2357][1468][9]/ }, 1, 1000000; |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
## Completed in 0.120 secs. Like with mce_grep_s, specifying a |
528
|
|
|
|
|
|
|
## sequence specification turns out to be faster due to lesser |
529
|
|
|
|
|
|
|
## overhead for the manager process. |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
A common scenario is grepping for pattern(s) inside a massive log file. |
532
|
|
|
|
|
|
|
Notice how parallelism increases as complexity increases for the pattern. |
533
|
|
|
|
|
|
|
Testing was done against a 300 MB file containing 250k lines. |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
use MCE::Grep; |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
my @m; open my $LOG, "<", "/path/to/log/file" or die "$!\n"; |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
@m = grep { /pattern/ } <$LOG>; ## 0.756 secs |
540
|
|
|
|
|
|
|
@m = grep { /foobar|[2357][1468][9]/ } <$LOG>; ## 24.681 secs |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
## Parallelism with mce_grep. This involves the manager process |
543
|
|
|
|
|
|
|
## due to processing a file handle. |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
@m = mce_grep { /pattern/ } <$LOG>; ## 0.997 secs |
546
|
|
|
|
|
|
|
@m = mce_grep { /foobar|[2357][1468][9]/ } <$LOG>; ## 7.439 secs |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
## Even faster with mce_grep_f. Workers access the file directly |
549
|
|
|
|
|
|
|
## with zero interaction from the manager process. |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
my $LOG = "/path/to/file"; |
552
|
|
|
|
|
|
|
@m = mce_grep_f { /pattern/ } $LOG; ## 0.112 secs |
553
|
|
|
|
|
|
|
@m = mce_grep_f { /foobar|[2357][1468][9]/ } $LOG; ## 6.840 secs |
554
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
=head1 PARSING HUGE FILES |
556
|
|
|
|
|
|
|
|
557
|
|
|
|
|
|
|
The MCE::Grep module lacks an optimization for quickly determining if a match |
558
|
|
|
|
|
|
|
is found from not knowing the pattern inside the code block. Use the following |
559
|
|
|
|
|
|
|
snippet as a template to achieve better performance. Also, take a look at |
560
|
|
|
|
|
|
|
examples/egrep.pl, included with the distribution. |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
use MCE::Loop; |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
MCE::Loop->init( |
565
|
|
|
|
|
|
|
max_workers => 8, use_slurpio => 1 |
566
|
|
|
|
|
|
|
); |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
my $pattern = 'karl'; |
569
|
|
|
|
|
|
|
my $hugefile = 'very_huge.file'; |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
my @result = mce_loop_f { |
572
|
|
|
|
|
|
|
my ($mce, $slurp_ref, $chunk_id) = @_; |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
## Quickly determine if a match is found. |
575
|
|
|
|
|
|
|
## Process slurped chunk only if true. |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
if ($$slurp_ref =~ /$pattern/m) { |
578
|
|
|
|
|
|
|
my @matches; |
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
## The following is fast on Unix. Performance degrades |
581
|
|
|
|
|
|
|
## drastically on Windows beyond 4 workers. |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
open my $MEM_FH, '<', $slurp_ref; |
584
|
|
|
|
|
|
|
binmode $MEM_FH, ':raw'; |
585
|
|
|
|
|
|
|
while (<$MEM_FH>) { push @matches, $_ if (/$pattern/); } |
586
|
|
|
|
|
|
|
close $MEM_FH; |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
## Therefore, use the following construct on Windows. |
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
while ( $$slurp_ref =~ /([^\n]+\n)/mg ) { |
591
|
|
|
|
|
|
|
my $line = $1; # save $1 to not lose the value |
592
|
|
|
|
|
|
|
push @matches, $line if ($line =~ /$pattern/); |
593
|
|
|
|
|
|
|
} |
594
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
## Gather matched lines. |
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
MCE->gather(@matches); |
598
|
|
|
|
|
|
|
} |
599
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
} $hugefile; |
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
print join('', @result); |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
=head1 OVERRIDING DEFAULTS |
605
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
The following list options which may be overridden when loading the module. |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
use Sereal qw( encode_sereal decode_sereal ); |
609
|
|
|
|
|
|
|
use CBOR::XS qw( encode_cbor decode_cbor ); |
610
|
|
|
|
|
|
|
use JSON::XS qw( encode_json decode_json ); |
611
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
use MCE::Grep |
613
|
|
|
|
|
|
|
max_workers => 4, # Default 'auto' |
614
|
|
|
|
|
|
|
chunk_size => 100, # Default 'auto' |
615
|
|
|
|
|
|
|
tmp_dir => "/path/to/app/tmp", # $MCE::Signal::tmp_dir |
616
|
|
|
|
|
|
|
freeze => \&encode_sereal, # \&Storable::freeze |
617
|
|
|
|
|
|
|
thaw => \&decode_sereal, # \&Storable::thaw |
618
|
|
|
|
|
|
|
init_relay => 0, # Default undef; MCE 1.882+ |
619
|
|
|
|
|
|
|
use_threads => 0, # Default undef; MCE 1.882+ |
620
|
|
|
|
|
|
|
; |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
From MCE 1.8 onwards, Sereal 3.015+ is loaded automatically if available. |
623
|
|
|
|
|
|
|
Specify C<< Sereal => 0 >> to use Storable instead. |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
use MCE::Grep Sereal => 0; |
626
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
=head1 CUSTOMIZING MCE |
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
=over 3 |
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
=item MCE::Grep->init ( options ) |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
=item MCE::Grep::init { options } |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
=back |
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
The init function accepts a hash of MCE options. The gather option, if |
638
|
|
|
|
|
|
|
specified, is ignored due to being used internally by the module. |
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
use MCE::Grep; |
641
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
MCE::Grep->init( |
643
|
|
|
|
|
|
|
chunk_size => 1, max_workers => 4, |
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
user_begin => sub { |
646
|
|
|
|
|
|
|
print "## ", MCE->wid, " started\n"; |
647
|
|
|
|
|
|
|
}, |
648
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
user_end => sub { |
650
|
|
|
|
|
|
|
print "## ", MCE->wid, " completed\n"; |
651
|
|
|
|
|
|
|
} |
652
|
|
|
|
|
|
|
); |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
my @a = mce_grep { $_ % 5 == 0 } 1..100; |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
print "\n", "@a", "\n"; |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
-- Output |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
## 2 started |
661
|
|
|
|
|
|
|
## 3 started |
662
|
|
|
|
|
|
|
## 1 started |
663
|
|
|
|
|
|
|
## 4 started |
664
|
|
|
|
|
|
|
## 3 completed |
665
|
|
|
|
|
|
|
## 4 completed |
666
|
|
|
|
|
|
|
## 1 completed |
667
|
|
|
|
|
|
|
## 2 completed |
668
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
5 10 15 20 25 30 35 40 45 50 55 60 65 70 75 80 85 90 95 100 |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
=head1 API DOCUMENTATION |
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
=over 3 |
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
=item MCE::Grep->run ( sub { code }, list ) |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
=item mce_grep { code } list |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
=back |
680
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
Input data may be defined using a list or an array reference. Unlike MCE::Loop, |
682
|
|
|
|
|
|
|
Flow, and Step, specifying a hash reference as input data isn't allowed. |
683
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
## Array or array_ref |
685
|
|
|
|
|
|
|
my @a = mce_grep { /[2357]/ } 1..1000; |
686
|
|
|
|
|
|
|
my @b = mce_grep { /[2357]/ } \@list; |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
## Important; pass an array_ref for deeply input data |
689
|
|
|
|
|
|
|
my @c = mce_grep { $_->[1] =~ /[2357]/ } [ [ 0, 1 ], [ 0, 2 ], ... ]; |
690
|
|
|
|
|
|
|
my @d = mce_grep { $_->[1] =~ /[2357]/ } \@deeply_list; |
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
## Not supported |
693
|
|
|
|
|
|
|
my @z = mce_grep { ... } \%hash; |
694
|
|
|
|
|
|
|
|
695
|
|
|
|
|
|
|
=over 3 |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
=item MCE::Grep->run_file ( sub { code }, file ) |
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
=item mce_grep_f { code } file |
700
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
=back |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
The fastest of these is the /path/to/file. Workers communicate the next offset |
704
|
|
|
|
|
|
|
position among themselves with zero interaction by the manager process. |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
C<IO::All> { File, Pipe, STDIO } is supported since MCE 1.845. |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
my @c = mce_grep_f { /pattern/ } "/path/to/file"; # faster |
709
|
|
|
|
|
|
|
my @d = mce_grep_f { /pattern/ } $file_handle; |
710
|
|
|
|
|
|
|
my @e = mce_grep_f { /pattern/ } $io; # IO::All |
711
|
|
|
|
|
|
|
my @f = mce_grep_f { /pattern/ } \$scalar; |
712
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
=over 3 |
714
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
=item MCE::Grep->run_seq ( sub { code }, $beg, $end [, $step, $fmt ] ) |
716
|
|
|
|
|
|
|
|
717
|
|
|
|
|
|
|
=item mce_grep_s { code } $beg, $end [, $step, $fmt ] |
718
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
=back |
720
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
Sequence may be defined as a list, an array reference, or a hash reference. |
722
|
|
|
|
|
|
|
The functions require both begin and end values to run. Step and format are |
723
|
|
|
|
|
|
|
optional. The format is passed to sprintf (% may be omitted below). |
724
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
my ($beg, $end, $step, $fmt) = (10, 20, 0.1, "%4.1f"); |
726
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
my @f = mce_grep_s { /[1234]\.[5678]/ } $beg, $end, $step, $fmt; |
728
|
|
|
|
|
|
|
my @g = mce_grep_s { /[1234]\.[5678]/ } [ $beg, $end, $step, $fmt ]; |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
my @h = mce_grep_s { /[1234]\.[5678]/ } { |
731
|
|
|
|
|
|
|
begin => $beg, end => $end, |
732
|
|
|
|
|
|
|
step => $step, format => $fmt |
733
|
|
|
|
|
|
|
}; |
734
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
=over 3 |
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
=item MCE::Grep->run ( sub { code }, iterator ) |
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
=item mce_grep { code } iterator |
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
=back |
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
An iterator reference may be specified for input_data. Iterators are described |
744
|
|
|
|
|
|
|
under section "SYNTAX for INPUT_DATA" at L<MCE::Core>. |
745
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
my @a = mce_grep { $_ % 3 == 0 } make_iterator(10, 30, 2); |
747
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
=head1 MANUAL SHUTDOWN |
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
=over 3 |
751
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
=item MCE::Grep->finish |
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
=item MCE::Grep::finish |
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
=back |
757
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
Workers remain persistent as much as possible after running. Shutdown occurs |
759
|
|
|
|
|
|
|
automatically when the script terminates. Call finish when workers are no |
760
|
|
|
|
|
|
|
longer needed. |
761
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
use MCE::Grep; |
763
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
MCE::Grep->init( |
765
|
|
|
|
|
|
|
chunk_size => 20, max_workers => 'auto' |
766
|
|
|
|
|
|
|
); |
767
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
my @a = mce_grep { ... } 1..100; |
769
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
MCE::Grep->finish; |
771
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
=head1 INDEX |
773
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
L<MCE|MCE>, L<MCE::Core> |
775
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
=head1 AUTHOR |
777
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
Mario E. Roy, S<E<lt>marioeroy AT gmail DOT comE<gt>> |
779
|
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
=cut |
781
|
|
|
|
|
|
|
|