File Coverage

lib/Bio/Roary/CommandLine/QueryRoary.pm
Criterion Covered Total %
statement 92 99 92.9
branch 34 40 85.0
condition 11 18 61.1
subroutine 15 15 100.0
pod 0 4 0.0
total 152 176 86.3


line stmt bran cond sub pod time code
1             undef $VERSION;
2             package Bio::Roary::CommandLine::QueryRoary;
3             $Bio::Roary::CommandLine::QueryRoary::VERSION = '3.11.0';
4             # ABSTRACT: Take in a groups file and the protein fasta files and output selected data
5              
6              
7 1     1   470302 use Moose;
  1         8  
  1         7  
8 1     1   6253 use Getopt::Long qw(GetOptionsFromArray);
  1         7819  
  1         3  
9 1     1   373 use Bio::Roary::AnalyseGroups;
  1         5  
  1         41  
10 1     1   410 use Bio::Roary::Output::GroupsMultifastas;
  1         4  
  1         33  
11 1     1   391 use Bio::Roary::Output::QueryGroups;
  1         4  
  1         52  
12 1     1   503 use Bio::Roary::PrepareInputFiles;
  1         17  
  1         48  
13 1     1   441 use Bio::Roary::Output::DifferenceBetweenSets;
  1         4  
  1         38  
14 1     1   398 use Bio::Roary::AnnotateGroups;
  1         3  
  1         43  
15 1     1   423 use Bio::Roary::GroupStatistics;
  1         3  
  1         40  
16 1     1   420 use Bio::Roary::OrderGenes;
  1         3  
  1         829  
17             extends 'Bio::Roary::CommandLine::Common';
18              
19             has 'args' => ( is => 'rw', isa => 'ArrayRef', required => 1 );
20             has 'script_name' => ( is => 'ro', isa => 'Str', required => 1 );
21             has 'help' => ( is => 'rw', isa => 'Bool', default => 0 );
22              
23             has 'input_files' => ( is => 'rw', isa => 'ArrayRef' );
24             has 'groups_filename' => ( is => 'rw', isa => 'Str', default => 'clustered_proteins');
25             has 'group_names' => ( is => 'rw', isa => 'ArrayRef' );
26             has 'input_set_one' => ( is => 'rw', isa => 'ArrayRef' );
27             has 'input_set_two' => ( is => 'rw', isa => 'ArrayRef' );
28             has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'pan_genome_results' );
29             has 'action' => ( is => 'rw', isa => 'Str', default => 'union' );
30             has 'core_definition' => ( is => 'rw', isa => 'Num', default => 0.99 );
31             has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
32              
33             has '_error_message' => ( is => 'rw', isa => 'Str' );
34              
35             sub BUILD {
36 18     18 0 56 my ($self) = @_;
37              
38 18         65 my ( $input_files, $output_filename, $groups_filename, @group_names, @input_set_one, @input_set_two, $action, $core_definition,$verbose, $help );
39              
40 18         573 GetOptionsFromArray(
41             $self->args,
42             'o|output=s' => \$output_filename,
43             'g|groups_filename=s' => \$groups_filename,
44             'n|group_names=s' => \@group_names,
45             'a|action=s' => \$action,
46             'i|input_set_one=s' => \@input_set_one,
47             't|input_set_two=s' => \@input_set_two,
48             'c|core_definition=f' => \$core_definition,
49             'v|verbose' => \$verbose,
50             'h|help' => \$help,
51             );
52              
53 18 50       21525 if ( defined($verbose) ) {
54 0         0 $self->verbose($verbose);
55 0         0 $self->logger->level(10000);
56             }
57 18 100       118 $self->help($help) if(defined($help));
58            
59 18 50       45 $self->output_filename($output_filename) if ( defined($output_filename) );
60 18 100       544 $self->action($action) if ( defined($action) );
61 18 50       50 $self->core_definition( $core_definition / 100 ) if ( defined($core_definition) );
62 18 100 66     407 if ( defined($groups_filename) && ( -e $groups_filename ) ) {
63 17         557 $self->groups_filename($groups_filename);
64             }
65            
66 18 100       462 if(! (-e $self->groups_filename)) {
67 1         44 $self->_error_message("Error: Cant access the groups file: ".$self->groups_filename);
68             }
69              
70 18         164 @group_names = split( /,/, join( ',', @group_names ) );
71 18 100       241 $self->group_names( \@group_names ) if (@group_names);
72            
73 18         114 @input_set_one = split( /,/, join( ',', @input_set_one ) );
74 18 100       313 $self->input_set_one( \@input_set_one ) if (@input_set_one);
75            
76 18         101 @input_set_two = split( /,/, join( ',', @input_set_two ) );
77 18 100       241 $self->input_set_two( \@input_set_two ) if (@input_set_two);
78            
79 18 100 66     512 if(defined($self->input_set_one) && defined($self->input_set_two) )
80             {
81 7         15 my @all_input_files = (@{ $self->input_set_one },@{ $self->input_set_two });
  7         154  
  7         164  
82 7         173 $self->args(\@all_input_files);
83             }
84              
85              
86 18 100 100     496 if ( !defined($self->input_set_two) && @{ $self->args } == 0) {
  11         257  
87 1         39 $self->_error_message("Error: You need to provide a FASTA file");
88             }
89            
90 18         49 for my $filename ( @{ $self->args } ) {
  18         397  
91 44 50       378 if ( !-e $filename ) {
92 0         0 $self->_error_message("Error: Cant access file $filename");
93 0         0 last;
94             }
95             }
96 18         517 $self->input_files( $self->args );
97              
98             }
99              
100             sub run {
101 18     18 0 54 my ($self) = @_;
102              
103 18 100       498 ( !$self->help ) or die $self->usage_text;
104 17 50       447 if ( defined( $self->_error_message ) ) {
105 0         0 print $self->_error_message . "\n";
106 0         0 die $self->usage_text;
107             }
108            
109 17         402 my $prepare_input_files = Bio::Roary::PrepareInputFiles->new(
110             input_files => $self->input_files,
111             );
112              
113 17         101 my $analyse_groups_obj = Bio::Roary::AnalyseGroups->new(
114             fasta_files => $prepare_input_files->fasta_files,
115             groups_filename => $self->groups_filename,
116             );
117              
118 17 100 66     711 if ( $self->action eq 'union' ) {
    100 33        
    100 33        
    100          
    50          
119 2         86 my $query_groups = Bio::Roary::Output::QueryGroups->new(
120             analyse_groups => $analyse_groups_obj,
121             output_union_filename => $self->output_filename,
122             input_filenames => $prepare_input_files->fasta_files
123             );
124 2         31 $query_groups->groups_union();
125             }
126             elsif ( $self->action eq 'intersection' ) {
127 1         39 my $query_groups = Bio::Roary::Output::QueryGroups->new(
128             analyse_groups => $analyse_groups_obj,
129             output_intersection_filename => $self->output_filename,
130             input_filenames => $prepare_input_files->fasta_files,
131             core_definition => $self->core_definition
132             );
133 1         18 $query_groups->groups_intersection();
134             }
135             elsif ( $self->action eq 'complement' ) {
136 1         48 my $query_groups = Bio::Roary::Output::QueryGroups->new(
137             analyse_groups => $analyse_groups_obj,
138             output_complement_filename => $self->output_filename,
139             input_filenames => $prepare_input_files->fasta_files,
140             core_definition => $self->core_definition
141             );
142 1         12 $query_groups->groups_complement();
143             }
144             elsif ( $self->action eq 'gene_multifasta' && defined( $self->group_names ) ) {
145 6         192 my $group_multi_fastas = Bio::Roary::Output::GroupsMultifastas->new(
146             group_names => $self->group_names,
147             analyse_groups => $analyse_groups_obj,
148             output_filename_base => $self->output_filename
149             );
150 6         92 $group_multi_fastas->create_files();
151             }
152             elsif($self->action eq 'difference' && defined($self->input_set_one) && defined($self->input_set_two))
153             {
154 7         270 my $difference_between_sets = Bio::Roary::Output::DifferenceBetweenSets->new(
155             analyse_groups => $analyse_groups_obj,
156             input_filenames_sets => [
157             $prepare_input_files->lookup_fasta_files_from_unknown_input_files($self->input_set_one),
158             $prepare_input_files->lookup_fasta_files_from_unknown_input_files($self->input_set_two)
159             ],
160             );
161 7         76 $difference_between_sets->groups_set_one_unique();
162 7         62 $difference_between_sets->groups_set_two_unique();
163 7         45 $difference_between_sets->groups_in_common();
164            
165 7         39 for my $differences_group_filename(($difference_between_sets->groups_set_one_unique_filename,$difference_between_sets->groups_set_two_unique_filename,$difference_between_sets->groups_in_common_filename))
166             {
167 21         224 $self->create_spreadsheets($differences_group_filename, $prepare_input_files->fasta_files, $self->input_files);
168             }
169              
170             }
171             else {
172 0         0 print "Nothing done\n";
173             }
174             }
175              
176             sub create_spreadsheets
177             {
178 21     21 0 99 my ($self, $groups_file, $fasta_files, $gff_files) = @_;
179              
180 21         729 my $analyse_groups_obj = Bio::Roary::AnalyseGroups->new(
181             fasta_files => $fasta_files,
182             groups_filename => $groups_file,
183             );
184            
185 21         1268 my $annotate_groups = Bio::Roary::AnnotateGroups->new(
186             gff_files => $gff_files,
187             output_filename => $groups_file.'_reannotated',
188             groups_filename => $groups_file,
189             );
190 21         153 $annotate_groups->reannotate;
191            
192 21         979 my $order_genes_obj = Bio::Roary::OrderGenes->new(
193             analyse_groups_obj => $analyse_groups_obj,
194             gff_files => $gff_files,
195             core_definition => $self->core_definition,
196             pan_graph_filename => 'set_difference_core_accessory_graph.dot',
197             accessory_graph_filename => 'set_difference_accessory_graph.dot',
198             );
199            
200 21         896 my $group_statistics = Bio::Roary::GroupStatistics->new(
201             output_filename => $groups_file.'_statistics.csv',
202             annotate_groups_obj => $annotate_groups,
203             analyse_groups_obj => $analyse_groups_obj,
204             groups_to_contigs => $order_genes_obj->groups_to_contigs
205             );
206 21         238 $group_statistics->create_spreadsheet;
207             }
208              
209             sub usage_text {
210 1     1 0 7 my ($self) = @_;
211              
212 1         25 return <<USAGE;
213             Usage: query_pan_genome [options] *.gff
214             Perform set operations on the pan genome to see the gene differences between groups of isolates.
215              
216             Options: -g STR groups filename [clustered_proteins]
217             -a STR action (union/intersection/complement/gene_multifasta/difference) [union]
218             -c FLOAT percentage of isolates a gene must be in to be core [99]
219             -o STR output filename [pan_genome_results]
220             -n STR comma separated list of gene names for use with gene_multifasta action
221             -i STR comma separated list of filenames, comparison set one
222             -t STR comma separated list of filenames, comparison set two
223             -v verbose output to STDOUT
224             -h this help message
225            
226             Examples:
227             Union of genes found in isolates
228             query_pan_genome -a union *.gff
229            
230             Intersection of genes found in isolates (core genes)
231             query_pan_genome -a intersection *.gff
232            
233             Complement of genes found in isolates (accessory genes)
234             query_pan_genome -a complement *.gff
235              
236             Extract the sequence of each gene listed and create multi-FASTA files
237             query_pan_genome -a gene_multifasta -n gryA,mecA,abc *.gff
238              
239             Gene differences between sets of isolates
240             query_pan_genome -a difference --input_set_one 1.gff,2.gff --input_set_two 3.gff,4.gff,5.gff
241              
242             For further info see: http://sanger-pathogens.github.io/Roary/
243             USAGE
244             }
245              
246             __PACKAGE__->meta->make_immutable;
247 1     1   9 no Moose;
  1         2  
  1         7  
248             1;
249              
250             __END__
251              
252             =pod
253              
254             =encoding UTF-8
255              
256             =head1 NAME
257              
258             Bio::Roary::CommandLine::QueryRoary - Take in a groups file and the protein fasta files and output selected data
259              
260             =head1 VERSION
261              
262             version 3.11.0
263              
264             =head1 SYNOPSIS
265              
266             Take in a groups file and the protein fasta files and output selected data
267              
268             =head1 AUTHOR
269              
270             Andrew J. Page <ap13@sanger.ac.uk>
271              
272             =head1 COPYRIGHT AND LICENSE
273              
274             This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.
275              
276             This is free software, licensed under:
277              
278             The GNU General Public License, Version 3, June 2007
279              
280             =cut