line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package BioX::Workflow::Plugin::Drake; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '0.10'; |
4
|
1
|
|
|
1
|
|
27713
|
use Data::Dumper; |
|
1
|
|
|
|
|
10607
|
|
|
1
|
|
|
|
|
81
|
|
5
|
1
|
|
|
1
|
|
1580
|
use Data::Pairs; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
use Moose::Role; |
8
|
|
|
|
|
|
|
#extends 'BioX::Workflow'; |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
use Interpolation E => 'eval'; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 NAME |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
BioX::Workflow::Plugin::Drake - A very opinionated template based bioinformatics workflow writer for Drake. |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 SYNOPSIS |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
The main documentation for this module is at L. This module extends Workflow in order to add functionality for outputing workflows in drake format. |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
biox-workflow-drake.pl --workflow workflow.yml > workflow.drake |
21
|
|
|
|
|
|
|
drake --workflow workflow.drake #with other functionality such as --jobs for asynchronous output, etc. |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
More information about Drake can be found here L. |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=head2 Default Variables |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
BioX::Workflow::Plugin::Drake assumes your INPUT/OUTPUT and indir/outdirs are |
28
|
|
|
|
|
|
|
linked. |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
This means the output from step1 is the input for step2. |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
You can override this behavior by either declaring any of these values, or in the global |
33
|
|
|
|
|
|
|
variables set auto_input: 0, disable automatic indir/outdir naming with |
34
|
|
|
|
|
|
|
auto_name: 0, and disable automatically naming outdirectories by rule names with |
35
|
|
|
|
|
|
|
enforce_struct: 0. |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=head2 Example |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head3 workflow.yml |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
--- |
43
|
|
|
|
|
|
|
global: |
44
|
|
|
|
|
|
|
- indir: /home/user/workflow |
45
|
|
|
|
|
|
|
- outdir: /home/user/workflow/output |
46
|
|
|
|
|
|
|
- file_rule: (.csv)$ |
47
|
|
|
|
|
|
|
rules: |
48
|
|
|
|
|
|
|
- backup: |
49
|
|
|
|
|
|
|
local: |
50
|
|
|
|
|
|
|
- INPUT: "{$self->indir}/{$sample}.csv" |
51
|
|
|
|
|
|
|
- OUTPUT: "{$self->outdir}/{$sample}.csv" |
52
|
|
|
|
|
|
|
- thing: "other thing" |
53
|
|
|
|
|
|
|
process: | |
54
|
|
|
|
|
|
|
cp $INPUT $OUTPUT |
55
|
|
|
|
|
|
|
- grep_VARA: |
56
|
|
|
|
|
|
|
local: |
57
|
|
|
|
|
|
|
- OUTPUT: "{$self->outdir}/{$sample}.grep_VARA.csv" |
58
|
|
|
|
|
|
|
process: | |
59
|
|
|
|
|
|
|
echo "Working on {$self->{indir}}/{$sample.csv}" |
60
|
|
|
|
|
|
|
grep -i "VARA" {$self->indir}/{$sample}.csv >> {$self->outdir}/{$sample}.grep_VARA.csv \ |
61
|
|
|
|
|
|
|
|| touch {$self->OUTPUT} |
62
|
|
|
|
|
|
|
- grep_VARB: |
63
|
|
|
|
|
|
|
local: |
64
|
|
|
|
|
|
|
- OUTPUT: "{$self->outdir}/{$sample}.grep_VARA.grep_VARB.csv" |
65
|
|
|
|
|
|
|
process: | |
66
|
|
|
|
|
|
|
grep -i "VARB" {$self->indir}/{$sample}.grep_VARA.csv >> {$self->outdir}/{$sample}.grep_VARA.grep_VARB.csv || touch {$self->OUTPUT} |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
=head3 Notes on the drake.yml |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
Drake will stop everything if you're job returns with an exit code of anything |
71
|
|
|
|
|
|
|
besides 0. For this reason we have the last command have a command1 || command2 |
72
|
|
|
|
|
|
|
syntax, so that even if we don't grep any "VARB" from the file the workflow |
73
|
|
|
|
|
|
|
could continue. |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=head3 Run it with default setup |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
biox-workflow-drake.pl --workflow workflow.yml > workflow.full.drake |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=head3 Output with default setup |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
I don't want to inlcude the whole file, but you get the idea |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
; |
84
|
|
|
|
|
|
|
; Generated at: 2015-06-21T11:01:24 |
85
|
|
|
|
|
|
|
; This file was generated with the following options |
86
|
|
|
|
|
|
|
; --workflow drake.yml |
87
|
|
|
|
|
|
|
; --min 1 |
88
|
|
|
|
|
|
|
; |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
; |
91
|
|
|
|
|
|
|
; Samples: test1, test2 |
92
|
|
|
|
|
|
|
; |
93
|
|
|
|
|
|
|
; |
94
|
|
|
|
|
|
|
; Starting Workflow |
95
|
|
|
|
|
|
|
; |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
; |
98
|
|
|
|
|
|
|
; Starting backup |
99
|
|
|
|
|
|
|
; |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
; |
103
|
|
|
|
|
|
|
; Variables |
104
|
|
|
|
|
|
|
; Indir: /home/guests/jir2004/workflow |
105
|
|
|
|
|
|
|
; Outdir: /home/guests/jir2004/workflow/output/backup |
106
|
|
|
|
|
|
|
; Local Variables: |
107
|
|
|
|
|
|
|
; INPUT: {$self->indir}/{$sample}.csv |
108
|
|
|
|
|
|
|
; OUTPUT: {$self->outdir}/{$sample}.csv |
109
|
|
|
|
|
|
|
; thing: other thing |
110
|
|
|
|
|
|
|
; |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
/home/guests/jir2004/workflow/output/backup/$[SAMPLE].csv <- /home/guests/jir2004/workflow/$[SAMPLE].csv |
113
|
|
|
|
|
|
|
cp $INPUT $OUTPUT |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
; |
117
|
|
|
|
|
|
|
; Ending backup |
118
|
|
|
|
|
|
|
; |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
; |
122
|
|
|
|
|
|
|
; Starting grep_VARA |
123
|
|
|
|
|
|
|
; |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
Run drake |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
drake --workflow workflow.full.drake |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
The following steps will be run, in order: |
131
|
|
|
|
|
|
|
1: /home/user/workflow/output/backup/test1.csv <- /home/user/workflow/test1.csv [timestamped] |
132
|
|
|
|
|
|
|
2: /home/user/workflow/output/backup/test2.csv <- /home/user/workflow/test2.csv [timestamped] |
133
|
|
|
|
|
|
|
3: /home/user/workflow/output/grep_vara/test1.grep_VARA.csv <- /home/user/workflow/output/backup/test1.csv [projected timestamped] |
134
|
|
|
|
|
|
|
4: /home/user/workflow/output/grep_vara/test2.grep_VARA.csv <- /home/user/workflow/output/backup/test2.csv [projected timestamped] |
135
|
|
|
|
|
|
|
5: /home/user/workflow/output/grep_varb/test1.grep_VARA.grep_VARB.csv <- /home/user/workflow/output/grep_vara/test1.grep_VARA.csv [projected timestamped] |
136
|
|
|
|
|
|
|
6: /home/user/workflow/output/grep_varb/test2.grep_VARA.grep_VARB.csv <- /home/user/workflow/output/grep_vara/test2.grep_VARA.csv [projected timestamped] |
137
|
|
|
|
|
|
|
Confirm? [y/n] y |
138
|
|
|
|
|
|
|
Running 6 steps with concurrence of 1... |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
--- 0. Running (timestamped): /home/user/workflow/output/backup/test1.csv <- /home/user/workflow/test1.csv |
141
|
|
|
|
|
|
|
--- 0: /home/user/workflow/output/backup/test1.csv <- /home/user/workflow/test1.csv -> done in 0.02s |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
--- 1. Running (timestamped): /home/user/workflow/output/backup/test2.csv <- /home/user/workflow/test2.csv |
144
|
|
|
|
|
|
|
--- 1: /home/user/workflow/output/backup/test2.csv <- /home/user/workflow/test2.csv -> done in 0.01s |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
--- 2. Running (timestamped): /home/user/workflow/output/grep_vara/test1.grep_VARA.csv <- /home/user/workflow/output/backup/test1.csv |
147
|
|
|
|
|
|
|
Working on /home/user/workflow/output/backup/test1csv |
148
|
|
|
|
|
|
|
--- 2: /home/user/workflow/output/grep_vara/test1.grep_VARA.csv <- /home/user/workflow/output/backup/test1.csv -> done in 0.01s |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
--- 3. Running (timestamped): /home/user/workflow/output/grep_vara/test2.grep_VARA.csv <- /home/user/workflow/output/backup/test2.csv |
151
|
|
|
|
|
|
|
Working on /home/user/workflow/output/backup/test2csv |
152
|
|
|
|
|
|
|
--- 3: /home/user/workflow/output/grep_vara/test2.grep_VARA.csv <- /home/user/workflow/output/backup/test2.csv -> done in 0.01s |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
--- 4. Running (timestamped): /home/user/workflow/output/grep_varb/test1.grep_VARA.grep_VARB.csv <- /home/user/workflow/output/grep_vara/test1.grep_VARA.csv |
155
|
|
|
|
|
|
|
--- 4: /home/user/workflow/output/grep_varb/test1.grep_VARA.grep_VARB.csv <- /home/user/workflow/output/grep_vara/test1.grep_VARA.csv -> done in 0.01s |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
--- 5. Running (timestamped): /home/user/workflow/output/grep_varb/test2.grep_VARA.grep_VARB.csv <- /home/user/workflow/output/grep_vara/test2.grep_VARA.csv |
158
|
|
|
|
|
|
|
--- 5: /home/user/workflow/output/grep_varb/test2.grep_VARA.grep_VARB.csv <- /home/user/workflow/output/grep_vara/test2.grep_VARA.csv -> done in 0.08s |
159
|
|
|
|
|
|
|
Done (6 steps run). |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
=head3 Run in minified mode |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
As an alternative you can run this with the --min option, which instead of |
165
|
|
|
|
|
|
|
printing out each workflow prints out only one, and creates a run-workflow.sh |
166
|
|
|
|
|
|
|
which has all of your environmental variables. |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
This option is preferable if running on an HPC cluster with many nodes. |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
This WILL break with use of --resample, either local or global. You need to |
171
|
|
|
|
|
|
|
split up your workflows as opposed to using the --resample option. |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
biox-workflow-drake.pl --workflow workflow.yml --min 1 > workflow.drake #This also creates the run-workflow.sh in the same directory |
174
|
|
|
|
|
|
|
./run-workflow.sh |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
cat drake.log #Here is the log for the first run |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
2015-06-21 14:02:47,543 INFO Running 3 steps with concurrence of 1... |
179
|
|
|
|
|
|
|
2015-06-21 14:02:47,568 INFO |
180
|
|
|
|
|
|
|
2015-06-21 14:02:47,570 INFO --- 0. Running (timestamped): /home/user/workflow/output/backup/test1.csv <- /home/user/workflow/test1.csv |
181
|
|
|
|
|
|
|
2015-06-21 14:02:47,592 INFO --- 0: /home/user/workflow/output/backup/test1.csv <- /home/user/workflow/test1.csv -> done in 0.02s |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
#So on and so forth |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
If you look in the example directory you will see a few png files, these are outputs of the drake workflow. |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
=cut |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=head1 Acknowledgements |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
Before version 0.03 |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
This module was originally developed at and for Weill Cornell Medical |
194
|
|
|
|
|
|
|
College in Qatar within ITS Advanced Computing Team. With approval from |
195
|
|
|
|
|
|
|
WCMC-Q, this information was generalized and put on github, for which |
196
|
|
|
|
|
|
|
the authors would like to express their gratitude. |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
As of version 0.03: |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
This modules continuing development is supported by NYU Abu Dhabi in the Center for Genomics and Systems Biology. |
201
|
|
|
|
|
|
|
With approval from NYUAD, this information was generalized and put on bitbucket, for which |
202
|
|
|
|
|
|
|
the authors would like to express their gratitude. |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
=head1 Inline Code Documentation |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
You shouldn't need these, but if you do here they are. |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
=head2 Attributes |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
=cut |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
=head3 full |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
Print the whole workflow hardcoded. This is the default |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
=cut |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
has 'full' => ( |
219
|
|
|
|
|
|
|
is => 'rw', |
220
|
|
|
|
|
|
|
isa => 'Bool', |
221
|
|
|
|
|
|
|
default => 1, |
222
|
|
|
|
|
|
|
); |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=head3 min |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
Print the workflow as 2 files. |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
Run the drake things |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
drake --vars "SAMPLE=$sample" --workflow/workflow.drake |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
workflow.drake |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
Our regular file |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
=cut |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
has 'min' => ( |
239
|
|
|
|
|
|
|
is => 'rw', |
240
|
|
|
|
|
|
|
isa => 'Bool', |
241
|
|
|
|
|
|
|
default => 0, |
242
|
|
|
|
|
|
|
); |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
=head2 Subroutines |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
Subroutines |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
=head3 before run |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
Must initialize some variables |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
=cut |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
before 'run' => sub{ |
255
|
|
|
|
|
|
|
my($self) = shift; |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
if($self->min){ |
258
|
|
|
|
|
|
|
$self->full(0); |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
$self->wait(0); |
261
|
|
|
|
|
|
|
$self->comment_char(';'); |
262
|
|
|
|
|
|
|
}; |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
=head3 after get_samples |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
Things to do if we decide to do a min version |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=cut |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
after 'get_samples' => sub{ |
271
|
|
|
|
|
|
|
my($self) = shift; |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
return unless $self->min; |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
open(my $fh, '>', 'run-workflow.sh') or die print "Could not open file $!\n"; |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
print $fh "#!/bin/bash\n\n"; |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
foreach my $sample (@{$self->samples}){ |
280
|
|
|
|
|
|
|
print $fh <
|
281
|
|
|
|
|
|
|
drake --vars "SAMPLE=$sample" --workflow workflow.drake |
282
|
|
|
|
|
|
|
EOF |
283
|
|
|
|
|
|
|
} |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
close $fh; |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
chmod 0777, 'run-workflow.sh'; |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
$self->samples(["\$SAMPLE"]); |
290
|
|
|
|
|
|
|
}; |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
=head3 write_process |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
Fill in the template with the process |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
Ensure INPUT/OUTPUT exist |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
Prettyify the output a bit |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
=cut |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
before 'write_process' => sub{ |
303
|
|
|
|
|
|
|
my($self) = shift; |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
$DB::single=2; |
306
|
|
|
|
|
|
|
if((! $self->local_attr->exists('INPUT')) && ! $self->local_attr->exists('OUTPUT') ){ |
307
|
|
|
|
|
|
|
print "$self->{comment_char} There is no INPUT or OUTPUT!\n"; |
308
|
|
|
|
|
|
|
} |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
#Make the formatting a big prettier |
311
|
|
|
|
|
|
|
my @tmp = split("\n", $self->process); |
312
|
|
|
|
|
|
|
$self->process(join("\n\t", @tmp)); |
313
|
|
|
|
|
|
|
}; |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
before 'process_template' => sub { |
316
|
|
|
|
|
|
|
my $self = shift; |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
my $tmp = "{\$self->OUTPUT} <- {\$self->INPUT}\n\t"; |
319
|
|
|
|
|
|
|
$DB::single=2; |
320
|
|
|
|
|
|
|
if($self->min){ |
321
|
|
|
|
|
|
|
$tmp =~ s/\$SAMPLE/\$[SAMPLE]/g; |
322
|
|
|
|
|
|
|
} |
323
|
|
|
|
|
|
|
my $newprocess = $tmp.$self->process; |
324
|
|
|
|
|
|
|
$self->process($newprocess); |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
}; |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
1; |
329
|
|
|
|
|
|
|
__END__ |