File Coverage

lib/App/Sandy/Command/Transcriptome.pm

Criterion	Covered	Total	%
statement	3	5	60.0
branch			n/a
condition			n/a
subroutine	1	3	33.3
pod	0	2	0.0
total	4	10	40.0

line	stmt	sub	pod	time	code
1					package App::Sandy::Command::Transcriptome;
2					# ABSTRACT: simulate command class. Simulate transcriptome sequencing
3
4	1	1		4678	use App::Sandy::Base 'class';
	1			2
	1			7
5
6					extends 'App::Sandy::CLI::Command';
7
8					with 'App::Sandy::Role::Digest';
9
10					our $VERSION = '0.22'; # VERSION
11
12					sub default_opt {
13	0	0	0		'paired-end-id' => '%i.%U:%c %U',
14					'single-end-id' => '%i.%U:%c %U',
15					'seed' => time,
16					'verbose' => 0,
17					'prefix' => 'out',
18					'output-dir' => '.',
19					'jobs' => 1,
20					'count-loops-by' => 'number-of-reads',
21					'number-of-reads' => 1000000,
22					'strand-bias' => 'minus',
23					'seqid-weight' => 'length',
24					'sequencing-type' => 'paired-end',
25					'fragment-mean' => 300,
26					'fragment-stdd' => 50,
27					'sequencing-error' => 0.001,
28					'read-mean' => 100,
29					'read-stdd' => 0,
30					'quality-profile' => 'poisson',
31					'join-paired-ends' => 0,
32					'output-format' => 'fastq.gz',
33					'compression-level' => 6
34					}
35
36					sub rm_opt {
37	0	0	0		'strand-bias',
38					'coverage',
39					'seqid-weight',
40					'genomic-variation'
41					}
42
43					__END__
44
45					=pod
46
47					=encoding UTF-8
48
49					=head1 NAME
50
51					App::Sandy::Command::Transcriptome - simulate command class. Simulate transcriptome sequencing
52
53					=head1 VERSION
54
55					version 0.22
56
57					=head1 SYNOPSIS
58
59					sandy transcriptome [options] <fasta-file>
60
61					Arguments:
62					a fasta-file
63
64					Options:
65					-h, --help brief help message
66					-u, --man full documentation
67					-v, --verbose print log messages
68					-p, --prefix prefix output [default:"out"]
69					-o, --output-dir output directory [default:"."]
70					-O, --output-format bam, sam, fastq.gz, fastq [default:"fastq.gz"]
71					-1, --join-paired-ends merge R1 and R2 outputs in one file
72					-x, --compression-level speed compression: "1" - compress faster,
73					"9" - compress better [default:"6"; Integer]
74					-i, --append-id append to the defined template id [Format]
75					-I, --id overlap the default template id [Format]
76					-j, --jobs number of jobs [default:"1"; Integer]
77					-s, --seed set the seed of the base generator
78					[default:"time()"; Integer]
79					-n, --number-of-reads set the number of reads
80					[default:"1000000", Integer]
81					-t, --sequencing-type single-end or paired-end reads
82					[default:"paired-end"]
83					-q, --quality-profile sequencing system profiles from quality
84					database [default:"poisson"]
85					-e, --sequencing-error sequencing error rate for poisson
86					[default:"0.001"; Number]
87					-m, --read-mean read mean size for poisson
88					[default:"100"; Integer]
89					-d, --read-stdd read standard deviation size for poisson
90					[default:"0"; Integer]
91					-M, --fragment-mean the fragment mean size for paired-end reads
92					[default:"300"; Integer]
93					-D, --fragment-stdd the fragment standard deviation size for
94					paired-end reads [default:"50"; Integer]
95					-f, --expression-matrix an expression-matrix entry from database
96
97					=head1 DESCRIPTION
98
99					Simulate transcriptome sequencing.
100
101					=head1 OPTIONS
102
103					=over 8
104
105					=item B<--help>
106
107					Print a brief help message and exits.
108
109					=item B<--man>
110
111					Prints the manual page and exits.
112
113					=item B<--verbose>
114
115					Prints log information to standard error
116
117					=item B<--prefix>
118
119					Concatenates the prefix to the output-file name.
120
121					=item B<--output-dir>
122
123					Creates output-file inside output-dir. If output-dir
124					does not exist, it is created recursively
125
126					=item B<--output-format>
127
128					Choose the output format. Available options are:
129					I<bam>, I<sam>, I<fastq.gz>, I<fastq>.
130					For I<bam> option, B<--append-id> is ignored, considering
131					that the sequence identifier is splitted by blank character, so
132					just the first field is included into the query name column
133					(first column).
134
135					=item B<--join-paired-ends>
136
137					By default, paired-end reads are put into two different files,
138					I<prefix_R[12]_001.fastq(\.gz)?>. If the user wants both outputs
139					together, she can pass this option.
140					If the B<--id> does not have the escape character %R, it is
141					automatically included right after the first field (blank separated values)
142					as in I<id/%R> - which resolves to I<id/1> or I<id/2>.
143					It is necessary to distinguish which read is R1/R2
144
145					=item B<--compression-level>
146
147					Regulates the speed of compression using the specified digit (between 1 and 9),
148					where "1" indicates the fastest compression method (less compression) and "9"
149					indicates the slowest compression method (best compression). The default
150					compression level is "6"
151
152					=item B<--append-id>
153
154					Append string template to the defined template id.
155					See B<Format>
156
157					=item B<--id>
158
159					Overlap the default defined template id:
160					I<single-end> %i.%U %U and I<paired-end> %i.%U %U
161					e.g. SR123.1 1
162					See B<Format>
163
164					=item B<Format>
165
166					A string B<Format> is a combination of literal and escape characters similar to the way I<printf> works.
167					That way, the user has the freedom to customize the fastq sequence identifier to fit her needs. Valid
168					escape characteres are:
169
170					B<Common escape characters>
171
172					----------------------------------------------------------------------------
173					Escape Meaning
174					----------------------------------------------------------------------------
175					%i instrument id composed by SR + PID
176					%I job slot number
177					%q quality profile
178					%e sequencing error
179					%x sequencing error position
180					%R read 1, or 2 if it is the paired-end mate
181					%U read number
182					%r read size
183					%m read mean
184					%d read standard deviation
185					%c sequence id as chromossome, gene/transcript id
186					%C sequence id type (reference or alternate non reference allele) ***
187					%s read strand
188					%t read start position
189					%n read end position
190					%a read start position regarding reference genome ***
191					%b read end position regarding reference genome ***
192					%v genomic variation position ***
193					----------------------------------------------------------------------------
194					*** specific for genomic variation (genome simulation only)
195
196					B<Paired-end specific escape characters>
197
198					----------------------------------------------------------------------------
199					Escape Meaning
200					----------------------------------------------------------------------------
201					%T mate read start position
202					%N mate read end position
203					%A mate read start position regarding reference genome ***
204					%B mate read end position regarding reference genome ***
205					%D distance between the paired-reads
206					%M fragment mean
207					%D fragment standard deviation
208					%f fragment size
209					%F fragment strand
210					%S fragment start position
211					%E fragment end position
212					%X fragment start position regarding reference genome ***
213					%Z fragment end position regarding reference genome ***
214					----------------------------------------------------------------------------
215					*** specific for genomic variation (genome simulation only)
216
217					=item B<--jobs>
218
219					Sets the number of child jobs to be created
220
221					=item B<--seed>
222
223					Sets the seed of the base generator. The ability to set the seed is
224					useful for those who want reproducible simulations. Pay attention to
225					the number of jobs (--jobs) set, because each job receives a different
226					seed calculated from the I<main seed>. So, for reproducibility, the
227					same seed set before needs the same number of jobs set before as well.
228
229					=item B<--read-mean>
230
231					Sets the read mean if quality-profile is equal to 'poisson'. The
232					quality-profile from database overrides the read-size
233
234					=item B<--read-stdd>
235
236					Sets the read standard deviation if quality-profile is equal to
237					'poisson'. The quality-profile from database overrides the read-stdd
238
239					=item B<--number-of-reads>
240
241					Sets the number of reads desired for each fragment end. That means,
242					it will be the number of reads for each pair - 1 x N reads for single-end
243					and 2 x N reads for paired-end. This is the default option for transcriptome
244					sequencing simulation
245
246					=item B<--sequencing-type>
247
248					Sets the sequencing type to single-end or paired-end
249
250					=item B<--fragment-mean>
251
252					If the sequencing-type is set to paired-end, it sets the
253					fragment mean
254
255					=item B<--fragment-stdd>
256
257					If the sequencing-type is set to paired-end, it sets the
258					fragment standard deviation
259
260					=item B<--sequencing-error>
261
262					Sets the sequencing error rate if quality-profile is equal to 'poisson'.
263					Valid values are between zero and one
264
265					=item B<--quality-profile>
266
267					Sets the sequencing system profile for quality. The default value is a poisson
268					distribution, but the user can choose among several profiles stored into the
269					database or import his own data.
270					See B<quality> command for more details
271
272					=item B<--expression-matrix>
273
274					By default, the gene/transcript is raffled using its length as weight. If
275					you choose an expression-matrix, then the raffle will be made based on the
276					gene/transcript expression.
277					The expression-matrix entries are found into the database.
278					See B<expression> command for more details
279
280					=back
281
282					=head1 AUTHORS
283
284					=over 4
285
286					=item *
287
288					Thiago L. A. Miller <tmiller@mochsl.org.br>
289
290					=item *
291
292					J. Leonel Buzzo <lbuzzo@mochsl.org.br>
293
294					=item *
295
296					Felipe R. C. dos Santos <fsantos@mochsl.org.br>
297
298					=item *
299
300					Helena B. ConceiÃ§Ã£o <hconceicao@mochsl.org.br>
301
302					=item *
303
304					Gabriela Guardia <gguardia@mochsl.org.br>
305
306					=item *
307
308					Fernanda Orpinelli <forpinelli@mochsl.org.br>
309
310					=item *
311
312					Pedro A. F. Galante <pgalante@mochsl.org.br>
313
314					=back
315
316					=head1 COPYRIGHT AND LICENSE
317
318					This software is Copyright (c) 2018 by Teaching and Research Institute from SÃrio-LibanÃªs Hospital.
319
320					This is free software, licensed under:
321
322					The GNU General Public License, Version 3, June 2007
323
324					=cut