File Coverage

lib/App/Sandy/Command/Genome.pm
Criterion Covered Total %
statement 3 5 60.0
branch n/a
condition n/a
subroutine 1 3 33.3
pod 0 2 0.0
total 4 10 40.0


line stmt bran cond sub pod time code
1             package App::Sandy::Command::Genome;
2             # ABSTRACT: simulate command class. Simulate genome sequencing
3              
4 1     1   1541 use App::Sandy::Base 'class';
  1         3  
  1         6  
5              
6             extends 'App::Sandy::CLI::Command';
7              
8             with 'App::Sandy::Role::Digest';
9              
10             our $VERSION = '0.22'; # VERSION
11              
12             sub default_opt {
13 0     0 0   'paired-end-id' => '%i.%U:%c:%F:%X-%Z',
14             'single-end-id' => '%i.%U:%c:%s:%t-%n',
15             'seed' => time,
16             'verbose' => 0,
17             'prefix' => 'out',
18             'output-dir' => '.',
19             'jobs' => 1,
20             'count-loops-by' => 'coverage',
21             'coverage' => 8,
22             'strand-bias' => 'random',
23             'seqid-weight' => 'length',
24             'sequencing-type' => 'paired-end',
25             'fragment-mean' => 300,
26             'fragment-stdd' => 50,
27             'sequencing-error' => 0.001,
28             'read-mean' => 100,
29             'read-stdd' => 0,
30             'quality-profile' => 'poisson',
31             'join-paired-ends' => 0,
32             'output-format' => 'fastq.gz',
33             'compression-level' => 6
34             }
35              
36             sub rm_opt {
37 0     0 0   'strand-bias',
38             'number-of-reads',
39             'seqid-weight',
40             'expression-matrix'
41             }
42              
43             __END__
44              
45             =pod
46              
47             =encoding UTF-8
48              
49             =head1 NAME
50              
51             App::Sandy::Command::Genome - simulate command class. Simulate genome sequencing
52              
53             =head1 VERSION
54              
55             version 0.22
56              
57             =head1 SYNOPSIS
58              
59             sandy genome [options] <fasta-file>
60              
61             Arguments:
62             a fasta-file
63              
64             Options:
65             -h, --help brief help message
66             -u, --man full documentation
67             -v, --verbose print log messages
68             -p, --prefix prefix output [default:"out"]
69             -o, --output-dir output directory [default:"."]
70             -O, --output-format bam, sam, fastq.gz, fastq [default:"fastq.gz"]
71             -1, --join-paired-ends merge R1 and R2 outputs in one file
72             -x, --compression-level speed compression: "1" - compress faster,
73             "9" - compress better [default:"6"; Integer]
74             -i, --append-id append to the defined template id [Format]
75             -I, --id overlap the default template id [Format]
76             -j, --jobs number of jobs [default:"1"; Integer]
77             -s, --seed set the seed of the base generator
78             [default:"time()"; Integer]
79             -c, --coverage genome coverage [default:"8", Number]
80             -t, --sequencing-type single-end or paired-end reads
81             [default:"paired-end"]
82             -q, --quality-profile sequencing system profiles from quality
83             database [default:"poisson"]
84             -e, --sequencing-error sequencing error rate for poisson
85             [default:"0.001"; Number]
86             -m, --read-mean read mean size for poisson
87             [default:"100"; Integer]
88             -d, --read-stdd read standard deviation size for poisson
89             [default:"0"; Integer]
90             -M, --fragment-mean the fragment mean size for paired-end reads
91             [default:"300"; Integer]
92             -D, --fragment-stdd the fragment standard deviation size for
93             paired-end reads [default:"50"; Integer]
94             -a, --genomic-variation a list of genomic variation entries from
95             variation database. This option may be passed
96             multiple times [default:"none"]
97             -A, --genomic-variation-regex a list of perl-like regex to match genomic
98             variation entries in variation database.
99             This option may be passed multiple times
100             [default:"none"]
101              
102             =head1 DESCRIPTION
103              
104             Simulate genome sequencing.
105              
106             =head1 OPTIONS
107              
108             =over 8
109              
110             =item B<--help>
111              
112             Print a brief help message and exits.
113              
114             =item B<--man>
115              
116             Prints the manual page and exits.
117              
118             =item B<--verbose>
119              
120             Prints log information to standard error
121              
122             =item B<--prefix>
123              
124             Concatenates the prefix to the output-file name.
125              
126             =item B<--output-dir>
127              
128             Creates output-file inside output-dir. If output-dir
129             does not exist, it is created recursively
130              
131             =item B<--output-format>
132              
133             Choose the output format. Available options are:
134             I<bam>, I<sam>, I<fastq.gz>, I<fastq>.
135             For I<bam> option, B<--append-id> is ignored, considering
136             that the sequence identifier is splitted by blank character, so
137             just the first field is included into the query name column
138             (first column).
139              
140             =item B<--join-paired-ends>
141              
142             By default, paired-end reads are put into two different files,
143             I<prefix_R[12]_001.fastq(\.gz)?>. If the user wants both outputs
144             together, she can pass this option.
145             If the B<--id> does not have the escape character %R, it is
146             automatically included right after the first field (blank separated values)
147             as in I<id/%R> - which resolves to I<id/1> or I<id/2>.
148             It is necessary to distinguish which read is R1/R2
149              
150             =item B<--compression-level>
151              
152             Regulates the speed of compression using the specified digit (between 1 and 9),
153             where "1" indicates the fastest compression method (less compression) and "9"
154             indicates the slowest compression method (best compression). The default
155             compression level is "6"
156              
157             =item B<--append-id>
158              
159             Append string template to the defined template id.
160             See B<Format>
161              
162             =item B<--id>
163              
164             Overlap the default defined template id:
165             I<single-end> %i.%U_%c_%s_%t_%n and I<paired-end> %i.%U_%c_%s_%S_%E
166             e.g. SR123.1_chr1_P_1001_1101
167             See B<Format>
168              
169             =item B<Format>
170              
171             A string B<Format> is a combination of literal and escape characters similar to the way I<printf> works.
172             That way, the user has the freedom to customize the fastq sequence identifier to fit her needs. Valid
173             escape characteres are:
174              
175             B<Common escape characters>
176              
177             ----------------------------------------------------------------------------
178             Escape Meaning
179             ----------------------------------------------------------------------------
180             %i instrument id composed by SR + PID
181             %I job slot number
182             %q quality profile
183             %e sequencing error
184             %x sequencing error position
185             %R read 1, or 2 if it is the paired-end mate
186             %U read number
187             %r read size
188             %m read mean
189             %d read standard deviation
190             %c sequence id as chromossome, gene/transcript id
191             %C sequence id type (reference or alternate non reference allele) ***
192             %s read strand
193             %t read start position
194             %n read end position
195             %a read start position regarding reference genome ***
196             %b read end position regarding reference genome ***
197             %v genomic variation position ***
198             ----------------------------------------------------------------------------
199             *** specific for genomic variation (genome simulation only)
200              
201             B<Paired-end specific escape characters>
202              
203             ----------------------------------------------------------------------------
204             Escape Meaning
205             ----------------------------------------------------------------------------
206             %T mate read start position
207             %N mate read end position
208             %A mate read start position regarding reference genome ***
209             %B mate read end position regarding reference genome ***
210             %D distance between the paired-reads
211             %M fragment mean
212             %D fragment standard deviation
213             %f fragment size
214             %F fragment strand
215             %S fragment start position
216             %E fragment end position
217             %X fragment start position regarding reference genome ***
218             %Z fragment end position regarding reference genome ***
219             ----------------------------------------------------------------------------
220             *** specific for genomic variation (genome simulation only)
221              
222             =item B<--jobs>
223              
224             Sets the number of child jobs to be created
225              
226             =item B<--seed>
227              
228             Sets the seed of the base generator. The ability to set the seed is
229             useful for those who want reproducible simulations. Pay attention to
230             the number of jobs (--jobs) set, because each job receives a different
231             seed calculated from the I<main seed>. So, for reproducibility, the
232             same seed set before needs the same number of jobs set before as well.
233              
234             =item B<--read-mean>
235              
236             Sets the read mean if quality-profile is equal to 'poisson'. The
237             quality-profile from database overrides the read-size
238              
239             =item B<--read-stdd>
240              
241             Sets the read standard deviation if quality-profile is equal to
242             'poisson'. The quality-profile from database overrides the read-stdd
243              
244             =item B<--coverage>
245              
246             Calculates the number of reads based on the genome
247             coverage: number_of_reads = (sequence_size * coverage) / read_size.
248             This is the default option for genome sequencing simulation
249              
250             =item B<--sequencing-type>
251              
252             Sets the sequencing type to single-end or paired-end
253              
254             =item B<--fragment-mean>
255              
256             If the sequencing-type is set to paired-end, it sets the
257             fragment mean
258              
259             =item B<--fragment-stdd>
260              
261             If the sequencing-type is set to paired-end, it sets the
262             fragment standard deviation
263              
264             =item B<--sequencing-error>
265              
266             Sets the sequencing error rate if quality-profile is equal to 'poisson'.
267             Valid values are between zero and one
268              
269             =item B<--quality-profile>
270              
271             Sets the sequencing system profile for quality. The default value is a poisson
272             distribution, but the user can choose among several profiles stored into the
273             database or import his own data.
274             See B<quality> command for more details
275              
276             =item B<--genomic-variation>
277              
278             Sets the genomic variation to be applied on the genome feeded. By
279             default no variation is included to the simulation, but the user has
280             the power to point some entries from B<variation> database or index his
281             own data. This option accepts a list with comma separated values
282             and can be passed multiple times, which is useful in order to join
283             various types of genomic variation into the same simulation. It is
284             possible to combine this option with B<--genomic-variation-regex>
285             See B<variation> command for the available list of genomic variation
286             entries
287              
288             =item B<--genomic-variation-regex>
289              
290             Applies perl-regex in the variation database and selects all entryes
291             that match the pattern. This option accepts a list with comma separated
292             values and can be passed multiple times. It is possible to combine this
293             option with B<--genomic-variation>
294             See B<variation> command for the available list of genomic variation
295             entries
296              
297             =back
298              
299             =head1 AUTHORS
300              
301             =over 4
302              
303             =item *
304              
305             Thiago L. A. Miller <tmiller@mochsl.org.br>
306              
307             =item *
308              
309             J. Leonel Buzzo <lbuzzo@mochsl.org.br>
310              
311             =item *
312              
313             Felipe R. C. dos Santos <fsantos@mochsl.org.br>
314              
315             =item *
316              
317             Helena B. Conceição <hconceicao@mochsl.org.br>
318              
319             =item *
320              
321             Gabriela Guardia <gguardia@mochsl.org.br>
322              
323             =item *
324              
325             Fernanda Orpinelli <forpinelli@mochsl.org.br>
326              
327             =item *
328              
329             Pedro A. F. Galante <pgalante@mochsl.org.br>
330              
331             =back
332              
333             =head1 COPYRIGHT AND LICENSE
334              
335             This software is Copyright (c) 2018 by Teaching and Research Institute from Sírio-Libanês Hospital.
336              
337             This is free software, licensed under:
338              
339             The GNU General Public License, Version 3, June 2007
340              
341             =cut