File Coverage

blib/lib/Bio/ViennaNGS.pm
Criterion Covered Total %
statement 3 3 100.0
branch n/a
condition n/a
subroutine 1 1 100.0
pod n/a
total 4 4 100.0


line stmt bran cond sub pod time code
1             # -*-CPerl-*-
2             # Last changed Time-stamp: <2015-02-09 13:42:11 mtw>
3              
4             package Bio::ViennaNGS;
5              
6 1     1   7 use version; our $VERSION = qv('0.12_17');
  1         2  
  1         7  
7              
8             1;
9              
10             =head1 NAME
11              
12             Bio::ViennaNGS - A Perl distribution for Next-Generation Sequencing
13             (NGS) data analysis
14              
15             =head1 DESCRIPTION
16              
17             Bio::ViennaNGS is a distribution of Perl modules and utilities for
18             building efficient Next-Generation Sequencing (NGS) analysis
19             pipelines. It covers various aspects of NGS data analysis, including
20             (but not limited to) conversion of sequence annotation, evaluation of
21             mapped data, expression quantification and visualization.
22              
23             The main Bio::ViennaNGS module is shipped with a complementary set of
24             (sub)modules:
25              
26             =over
27              
28             =item L<Bio::ViennaNGS::AnnoC>: A Moose interface for storage and
29             conversion of sequence annotation data.
30              
31             =item L<Bio::ViennaNGS::Bam>: Routines for high-level manipulation of
32             BAM files.
33              
34             =item L<Bio::ViennaNGS::BamStat>: A L<Moose> based class for
35             collecting mapping statistics.
36              
37             =item L<Bio::ViennaNGS::BamStatSummary>: A L<Moose> interface for
38             processing L<Bio::ViennaNGS::BamStat> objects on a set of BAM files.
39              
40             =item L<Bio::ViennaNGS::Bed>: A L<Moose> interface for manipulation of
41             genomic interval data in BED format.
42              
43             =item L<Bio::ViennaNGS::Expression>: An object oriented interface for
44             read-count based gene expression analysis.
45              
46             =item L<Bio::ViennaNGS::Fasta>: Routines for accessing genomic
47             sequences implemented through a L<Moose> interface to
48             L<Bio::DB::Fasta>.
49              
50             =item L<Bio::ViennaNGS::Feature>: A L<Moose> based BED6 wrapper.
51              
52             =item L<Bio::ViennaNGS::FeatureChain>: Yet another L<Moose> class for
53             chaining gene annotation features.
54              
55             =item L<Bio::ViennaNGS::FeatureLine>: An abstract L<Moose> class for
56             combining several L<Bio::ViennaNGS::FeatureChain> objects.
57              
58             =item L<Bio::ViennaNGS::MinimalFeature>: A L<Moose> interface for
59             handling elementary gene annotation.
60              
61             =item L<Bio::ViennaNGS::SpliceJunc>: A collection of routines for
62             alternative splicing analysis.
63              
64             =item L<Bio::ViennaNGS::Tutorial>: A comprehensive tutorial of the
65             L<Bio::ViennaNGS> core routines with real-world NGS data.
66              
67             =item L<Bio::ViennaNGS::UCSC>: Routines for visualization of genomics
68             data with the UCSC genome browser.
69              
70             =item L<Bio::ViennaNGS::Util>: A collection of wrapper routines for
71             commonly used third-party NGS utilities, code for normalization of
72             gene expression values based on read count data and a set of utility
73             functions.
74              
75             =back
76              
77             =head1 UTILITIES
78              
79             L<Bio::ViennaNGS> comes with a collection of command line utilities
80             for accomplishing routine tasks often required in NGS data
81             processing. These utilities serve as reference implementation of the
82             routines implemented throughout the modules and can readily be used
83             for atomic tasks in NGS data processing:
84              
85             =over
86              
87             =item F<assembly_hub_constructor.pl>: The UCSC genome browser offers
88             the possibility to visualize any organism (including organisms that
89             are not included in the standard UCSC browser bundle) through hso
90             called 'Assembly Hubs'. This script constructs Assembly Hubs from
91             genomic sequence and annotation data.
92              
93             =item F<bam_split.pl>: Split (paired-end and single-end) BAM alignment
94             files by strand and compute statistics. Optionally create BED output,
95             as well as normalized bedGraph and bigWig files for coverage
96             visualization in genome browsers (see dependencies on third-patry
97             tools below).
98              
99             =item F<bam_to_bigWig.pl>: Produce bigWig coverage profiles from
100             (aligned) BAM files, explicitly considering strandedness. The most
101             natural use case of this tool is to create strand-aware coverage
102             profiles in bigWig format for genome browser visualization.
103              
104             =item F<bam_uniq.pl>: Extract unique and multi mapping reads from BAM
105             alignment files and create a separate BAM file for both uniqe (.uniq.)
106             and multi (.mult.) mappers.
107              
108             =item F<bed2bedGraph.pl>: Convert BED files to (strand specific)
109             bedGraph files, allowing additional annotation and automatic
110             generation of bedGraph files which can easily be converted to big-type
111             files for easy UCSC visualization.
112              
113             =item F<extend_bed.pl>: Extend genomic features in BED files by a
114             certain number of nucleotides, either on both sides or specifically at
115             the 5' or 3' end, respectively.
116              
117             =item F<gff2bed.pl>: Convert RefSeq GFF3 annotation files to BED12
118             format. Individual BED12 files are created for each feature type
119             (CDS/tRNA/rRNA/etc.). Tested with RefSeq bacterial GFF3 annotation.
120              
121             =item F<kmer_analysis.pl>: Count k-mers of predefined length in FastQ
122             and Fasta files
123              
124             =item F<MEME_XML_motif_extractor.pl>: Compute simple statistics from
125             MEME XML output and return a list of found motifs with the number of
126             sequences containing those motifs as well as nice ggplot graphs.
127              
128             =item F<newUCSCdb.pl>: Create a new genome database to a locally
129             installed instance of the UCSC genome browser in order to add a novel
130             organism for visualization. Based on L<this Genomewiki
131             article|http://genomewiki.ucsc.edu/index.php/Building_a_new_genome_database>.
132              
133             =item F<normalize_multicov.pl>: Compute normalized expression data in
134             TPM from (raw) read counts in bedtools multicov format. TPM reference:
135             Wagner et al, Theory Biosci. 131(4), pp 281-85 (2012)
136              
137             =item F<sj_visualizer.pl>: Convert splice junctions from mapped
138             RNA-seq data in segemehl BED6 splice junction format to BED12 for easy
139             visualization in genome Browsers.
140              
141             =item F<splice_site_summary.pl>: Identify and characterize splice
142             junctions from RNA-seq data by intersecting them with annotated splice
143             junctions.
144              
145             =item F<trim_fastq.pl>: Trim sequence and quality string fields in a
146             Fastq file by user defined length.
147              
148             =back
149              
150             =head1 DEPENDENCIES
151              
152             The L<Bio::ViennaNGS> modules and classes depend on a set of Perl
153             modules, some of which are part of the Perl core distribution:
154              
155             =over
156              
157             =item L<Bio::Perl> >= 1.00690001
158              
159             =item L<Bio::DB::Sam> >= 1.37
160              
161             =item L<Bio::DB::Fasta>
162              
163             =item L<Bio::Tools::GFF>
164              
165             =item L<File::Basename>
166              
167             =item L<File::Share>
168              
169             =item L<File::Temp>
170              
171             =item L<Path::Class>
172              
173             =item L<IPC::Cmd>
174              
175             =item L<Carp>
176              
177             =item L<Template>
178              
179             =item L<Moose>
180              
181             =item L<Moose::Util::TypeConstraints>
182              
183             =item L<namespace::autoclean>
184              
185             =item L<MooseX::Clone>
186              
187             =item L<MooseX::InstanceTracking>
188              
189             =item L<Tie::Hash::Indexed>
190              
191             =back
192              
193             In addition the following modules are required by the L<Bio::ViennaNGS> utilities:
194              
195             =over
196              
197             =item L<PerlIO::gzip>
198              
199             =item L<Math::Round>
200              
201             =item L<XML::Simple>
202              
203             =item L<Statistics::R>
204              
205             =back
206              
207             L<Bio::ViennaNGS> uses third-party tools for computing intersections
208             of BED files: F<bedtools intersect> from the
209             L<BEDtools|http://bedtools.readthedocs.org/en/latest/content/tools/intersect.html>
210             suite is used to compute overlaps and F<bedtools sort> is used to sort
211             BED output files. Make sure that those third-party utilities are
212             available on your system, and that hey can be found and executed by
213             the Perl interpreter. We recommend installing the latest version of
214             L<BEDtools|https://github.com/arq5x/bedtools2> on your system.
215              
216             =head1 SOURCE AVAILABILITY
217              
218             Source code for this distribution is available from the L<ViennaNGS
219             Github repository|https://github.com/mtw/Bio-ViennaNGS>.
220              
221             =head1 PAPERS
222              
223             If the L<Bio::ViennaNGS> suite is useful for your work or if you use
224             any component of the distribution in a custom pipeline, please cite
225             the following publication:
226              
227             B<"ViennaNGS - A toolbox for building efficient next-generation sequencing
228             analysis pipelines">
229              
230             I<Michael T. Wolfinger, Joerg Fallmann, Florian Eggenhofer and Fabian Amman>
231              
232             bioRxiv L<doi:10.1101/013011|http://dx.doi.org/10.1101/013011>.
233              
234             =head1 NOTES
235              
236             The L<Bio::ViennaNGS> suite is actively developed and tested on
237             different flavours of Linux and Mac OS X. We have taken care of
238             writing platform-independent code that should run out of the box on
239             most UNIX-based systems, however we do not have access to machines
240             running Microsoft Windows. As such, we have not tested and will not
241             test Windows compatibility.
242              
243             =head1 SEE ALSO
244              
245             =over
246              
247             =item L<Bio::ViennaNGS::AnnoC>
248              
249             =item L<Bio::ViennaNGS::Bam>
250              
251             =item L<Bio::ViennaNGS::BamStat>
252              
253             =item L<Bio::ViennaNGS::BamStatSummary>
254              
255             =item L<Bio::ViennaNGS::Bed>
256              
257             =item L<Bio::ViennaNGS::Expression>
258              
259             =item L<Bio::ViennaNGS::Fasta>
260              
261             =item L<Bio::ViennaNGS::Feature>
262              
263             =item L<Bio::ViennaNGS::FeatureChain>
264              
265             =item L<Bio::ViennaNGS::FeatureLine>
266              
267             =item L<Bio::ViennaNGS::MinimalFeature>
268              
269             =item L<Bio::ViennaNGS::SpliceJunc>
270              
271             =item L<Bio::ViennaNGS::Tutorial>
272              
273             =item L<Bio::ViennaNGS::UCSC>
274              
275             =item L<Bio::ViennaNGS::Util>
276              
277             =back
278              
279             =head1 AUTHORS
280              
281             =over
282              
283             =item Michael T. Wolfinger E<lt>michael@wolfinger.euE<gt>
284              
285             =item Jörg Fallmann E<lt>fall@tbi.univie.ac.atE<gt>
286              
287             =item Florian Eggenhofer E<lt>florian.eggenhofer@tbi.univie.ac.atE<gt>
288              
289             =item Fabian Amman E<lt>fabian@tbi.univie.ac.at<gt>
290              
291             =back
292              
293             =head1 COPYRIGHT AND LICENSE
294              
295             Copyright (C) 2014-2015 Michael T. Wolfinger
296             E<lt>michael@wolfinger.euE<gt>
297              
298             This library is free software; you can redistribute it and/or modify
299             it under the same terms as Perl itself, either Perl version 5.10.0 or,
300             at your option, any later version of Perl 5 you may have available.
301              
302             This software is distributed in the hope that it will be useful, but
303             WITHOUT ANY WARRANTY; without even the implied warranty of
304             MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
305              
306             =cut
307              
308