File Coverage

lib/App/Sandy/Command/Transcriptome.pm
Criterion Covered Total %
statement 3 5 60.0
branch n/a
condition n/a
subroutine 1 3 33.3
pod 0 2 0.0
total 4 10 40.0


line stmt bran cond sub pod time code
1             package App::Sandy::Command::Transcriptome;
2             # ABSTRACT: simulate command class. Simulate transcriptome sequencing
3              
4 1     1   4678 use App::Sandy::Base 'class';
  1         2  
  1         7  
5              
6             extends 'App::Sandy::CLI::Command';
7              
8             with 'App::Sandy::Role::Digest';
9              
10             our $VERSION = '0.22'; # VERSION
11              
12             sub default_opt {
13 0     0 0   'paired-end-id' => '%i.%U:%c %U',
14             'single-end-id' => '%i.%U:%c %U',
15             'seed' => time,
16             'verbose' => 0,
17             'prefix' => 'out',
18             'output-dir' => '.',
19             'jobs' => 1,
20             'count-loops-by' => 'number-of-reads',
21             'number-of-reads' => 1000000,
22             'strand-bias' => 'minus',
23             'seqid-weight' => 'length',
24             'sequencing-type' => 'paired-end',
25             'fragment-mean' => 300,
26             'fragment-stdd' => 50,
27             'sequencing-error' => 0.001,
28             'read-mean' => 100,
29             'read-stdd' => 0,
30             'quality-profile' => 'poisson',
31             'join-paired-ends' => 0,
32             'output-format' => 'fastq.gz',
33             'compression-level' => 6
34             }
35              
36             sub rm_opt {
37 0     0 0   'strand-bias',
38             'coverage',
39             'seqid-weight',
40             'genomic-variation'
41             }
42              
43             __END__
44              
45             =pod
46              
47             =encoding UTF-8
48              
49             =head1 NAME
50              
51             App::Sandy::Command::Transcriptome - simulate command class. Simulate transcriptome sequencing
52              
53             =head1 VERSION
54              
55             version 0.22
56              
57             =head1 SYNOPSIS
58              
59             sandy transcriptome [options] <fasta-file>
60              
61             Arguments:
62             a fasta-file
63              
64             Options:
65             -h, --help brief help message
66             -u, --man full documentation
67             -v, --verbose print log messages
68             -p, --prefix prefix output [default:"out"]
69             -o, --output-dir output directory [default:"."]
70             -O, --output-format bam, sam, fastq.gz, fastq [default:"fastq.gz"]
71             -1, --join-paired-ends merge R1 and R2 outputs in one file
72             -x, --compression-level speed compression: "1" - compress faster,
73             "9" - compress better [default:"6"; Integer]
74             -i, --append-id append to the defined template id [Format]
75             -I, --id overlap the default template id [Format]
76             -j, --jobs number of jobs [default:"1"; Integer]
77             -s, --seed set the seed of the base generator
78             [default:"time()"; Integer]
79             -n, --number-of-reads set the number of reads
80             [default:"1000000", Integer]
81             -t, --sequencing-type single-end or paired-end reads
82             [default:"paired-end"]
83             -q, --quality-profile sequencing system profiles from quality
84             database [default:"poisson"]
85             -e, --sequencing-error sequencing error rate for poisson
86             [default:"0.001"; Number]
87             -m, --read-mean read mean size for poisson
88             [default:"100"; Integer]
89             -d, --read-stdd read standard deviation size for poisson
90             [default:"0"; Integer]
91             -M, --fragment-mean the fragment mean size for paired-end reads
92             [default:"300"; Integer]
93             -D, --fragment-stdd the fragment standard deviation size for
94             paired-end reads [default:"50"; Integer]
95             -f, --expression-matrix an expression-matrix entry from database
96              
97             =head1 DESCRIPTION
98              
99             Simulate transcriptome sequencing.
100              
101             =head1 OPTIONS
102              
103             =over 8
104              
105             =item B<--help>
106              
107             Print a brief help message and exits.
108              
109             =item B<--man>
110              
111             Prints the manual page and exits.
112              
113             =item B<--verbose>
114              
115             Prints log information to standard error
116              
117             =item B<--prefix>
118              
119             Concatenates the prefix to the output-file name.
120              
121             =item B<--output-dir>
122              
123             Creates output-file inside output-dir. If output-dir
124             does not exist, it is created recursively
125              
126             =item B<--output-format>
127              
128             Choose the output format. Available options are:
129             I<bam>, I<sam>, I<fastq.gz>, I<fastq>.
130             For I<bam> option, B<--append-id> is ignored, considering
131             that the sequence identifier is splitted by blank character, so
132             just the first field is included into the query name column
133             (first column).
134              
135             =item B<--join-paired-ends>
136              
137             By default, paired-end reads are put into two different files,
138             I<prefix_R[12]_001.fastq(\.gz)?>. If the user wants both outputs
139             together, she can pass this option.
140             If the B<--id> does not have the escape character %R, it is
141             automatically included right after the first field (blank separated values)
142             as in I<id/%R> - which resolves to I<id/1> or I<id/2>.
143             It is necessary to distinguish which read is R1/R2
144              
145             =item B<--compression-level>
146              
147             Regulates the speed of compression using the specified digit (between 1 and 9),
148             where "1" indicates the fastest compression method (less compression) and "9"
149             indicates the slowest compression method (best compression). The default
150             compression level is "6"
151              
152             =item B<--append-id>
153              
154             Append string template to the defined template id.
155             See B<Format>
156              
157             =item B<--id>
158              
159             Overlap the default defined template id:
160             I<single-end> %i.%U %U and I<paired-end> %i.%U %U
161             e.g. SR123.1 1
162             See B<Format>
163              
164             =item B<Format>
165              
166             A string B<Format> is a combination of literal and escape characters similar to the way I<printf> works.
167             That way, the user has the freedom to customize the fastq sequence identifier to fit her needs. Valid
168             escape characteres are:
169              
170             B<Common escape characters>
171              
172             ----------------------------------------------------------------------------
173             Escape Meaning
174             ----------------------------------------------------------------------------
175             %i instrument id composed by SR + PID
176             %I job slot number
177             %q quality profile
178             %e sequencing error
179             %x sequencing error position
180             %R read 1, or 2 if it is the paired-end mate
181             %U read number
182             %r read size
183             %m read mean
184             %d read standard deviation
185             %c sequence id as chromossome, gene/transcript id
186             %C sequence id type (reference or alternate non reference allele) ***
187             %s read strand
188             %t read start position
189             %n read end position
190             %a read start position regarding reference genome ***
191             %b read end position regarding reference genome ***
192             %v genomic variation position ***
193             ----------------------------------------------------------------------------
194             *** specific for genomic variation (genome simulation only)
195              
196             B<Paired-end specific escape characters>
197              
198             ----------------------------------------------------------------------------
199             Escape Meaning
200             ----------------------------------------------------------------------------
201             %T mate read start position
202             %N mate read end position
203             %A mate read start position regarding reference genome ***
204             %B mate read end position regarding reference genome ***
205             %D distance between the paired-reads
206             %M fragment mean
207             %D fragment standard deviation
208             %f fragment size
209             %F fragment strand
210             %S fragment start position
211             %E fragment end position
212             %X fragment start position regarding reference genome ***
213             %Z fragment end position regarding reference genome ***
214             ----------------------------------------------------------------------------
215             *** specific for genomic variation (genome simulation only)
216              
217             =item B<--jobs>
218              
219             Sets the number of child jobs to be created
220              
221             =item B<--seed>
222              
223             Sets the seed of the base generator. The ability to set the seed is
224             useful for those who want reproducible simulations. Pay attention to
225             the number of jobs (--jobs) set, because each job receives a different
226             seed calculated from the I<main seed>. So, for reproducibility, the
227             same seed set before needs the same number of jobs set before as well.
228              
229             =item B<--read-mean>
230              
231             Sets the read mean if quality-profile is equal to 'poisson'. The
232             quality-profile from database overrides the read-size
233              
234             =item B<--read-stdd>
235              
236             Sets the read standard deviation if quality-profile is equal to
237             'poisson'. The quality-profile from database overrides the read-stdd
238              
239             =item B<--number-of-reads>
240              
241             Sets the number of reads desired for each fragment end. That means,
242             it will be the number of reads for each pair - 1 x N reads for single-end
243             and 2 x N reads for paired-end. This is the default option for transcriptome
244             sequencing simulation
245              
246             =item B<--sequencing-type>
247              
248             Sets the sequencing type to single-end or paired-end
249              
250             =item B<--fragment-mean>
251              
252             If the sequencing-type is set to paired-end, it sets the
253             fragment mean
254              
255             =item B<--fragment-stdd>
256              
257             If the sequencing-type is set to paired-end, it sets the
258             fragment standard deviation
259              
260             =item B<--sequencing-error>
261              
262             Sets the sequencing error rate if quality-profile is equal to 'poisson'.
263             Valid values are between zero and one
264              
265             =item B<--quality-profile>
266              
267             Sets the sequencing system profile for quality. The default value is a poisson
268             distribution, but the user can choose among several profiles stored into the
269             database or import his own data.
270             See B<quality> command for more details
271              
272             =item B<--expression-matrix>
273              
274             By default, the gene/transcript is raffled using its length as weight. If
275             you choose an expression-matrix, then the raffle will be made based on the
276             gene/transcript expression.
277             The expression-matrix entries are found into the database.
278             See B<expression> command for more details
279              
280             =back
281              
282             =head1 AUTHORS
283              
284             =over 4
285              
286             =item *
287              
288             Thiago L. A. Miller <tmiller@mochsl.org.br>
289              
290             =item *
291              
292             J. Leonel Buzzo <lbuzzo@mochsl.org.br>
293              
294             =item *
295              
296             Felipe R. C. dos Santos <fsantos@mochsl.org.br>
297              
298             =item *
299              
300             Helena B. Conceição <hconceicao@mochsl.org.br>
301              
302             =item *
303              
304             Gabriela Guardia <gguardia@mochsl.org.br>
305              
306             =item *
307              
308             Fernanda Orpinelli <forpinelli@mochsl.org.br>
309              
310             =item *
311              
312             Pedro A. F. Galante <pgalante@mochsl.org.br>
313              
314             =back
315              
316             =head1 COPYRIGHT AND LICENSE
317              
318             This software is Copyright (c) 2018 by Teaching and Research Institute from Sírio-Libanês Hospital.
319              
320             This is free software, licensed under:
321              
322             The GNU General Public License, Version 3, June 2007
323              
324             =cut