File Coverage

Bio/SearchIO/Writer/HitTableWriter.pm
Criterion Covered Total %
statement 23 25 92.0
branch 5 10 50.0
condition 2 6 33.3
subroutine 5 5 100.0
pod 2 3 66.6
total 37 49 75.5


line stmt bran cond sub pod time code
1              
2             =head1 NAME
3              
4             Bio::SearchIO::Writer::HitTableWriter - Tab-delimited data for Bio::Search::Hit::HitI objects
5              
6             =head1 SYNOPSIS
7              
8             =head2 Example 1: Using the default columns
9              
10             use Bio::SearchIO;
11             use Bio::SearchIO::Writer::HitTableWriter;
12              
13             my $in = Bio::SearchIO->new();
14              
15             my $writer = Bio::SearchIO::Writer::HitTableWriter->new();
16              
17             my $out = Bio::SearchIO->new( -writer => $writer );
18              
19             while ( my $result = $in->next_result() ) {
20             $out->write_result($result, ($in->report_count - 1 ? 0 : 1) );
21             }
22              
23             =head2 Example 2: Specifying a subset of columns
24              
25             use Bio::SearchIO;
26             use Bio::SearchIO::Writer::HitTableWriter;
27              
28             my $in = Bio::SearchIO->new();
29              
30             my $writer = Bio::SearchIO::Writer::HitTableWriter->new(
31             -columns => [qw(
32             query_name
33             query_length
34             hit_name
35             hit_length
36             frac_identical_query
37             expect
38             )] );
39              
40             my $out = Bio::SearchIO->new( -writer => $writer,
41             -file => ">searchio.out" );
42              
43             while ( my $result = $in->next_result() ) {
44             $out->write_result($result, ($in->report_count - 1 ? 0 : 1) );
45             }
46              
47             =head2 Custom Labels
48              
49             You can also specify different column labels if you don't want to use
50             the defaults. Do this by specifying a C<-labels> hash reference
51             parameter when creating the HitTableWriter object. The keys of the
52             hash should be the column number (left-most column = 1) for the label(s)
53             you want to specify. Here's an example:
54              
55             my $writer = Bio::SearchIO::Writer::HitTableWriter->new(
56             -columns => [qw( query_name
57             query_length
58             hit_name
59             hit_length )],
60             -labels => { 1 => 'QUERY_GI',
61             3 => 'HIT_IDENTIFIER' } );
62              
63              
64             =head1 DESCRIPTION
65              
66             Bio::SearchIO::Writer::HitTableWriter outputs summary data
67             for each Hit within a search result. Output is in tab-delimited format,
68             one row per Hit.
69              
70             The reason why this is considered summary data is that if a hit
71             contains multiple HSPs, the HSPs will be tiled and
72             the data represents a summary across all HSPs.
73             See below for which columns are affected.
74             See the docs in L
75             for more details on HSP tiling.
76              
77             =head2 Available Columns
78              
79             Here are the columns that can be specified in the C<-columns>
80             parameter when creating a HitTableWriter object. If a C<-columns> parameter
81             is not specified, this list, in this order, will be used as the default.
82              
83             query_name # Sequence identifier of the query.
84             query_length # Full length of the query sequence
85             hit_name # Sequence identifier of the hit
86             hit_length # Full length of the hit sequence
87             round # Round number for hit (PSI-BLAST)
88             expect # Expect value for the alignment
89             score # Score for the alignment (e.g., BLAST score)
90             bits # Bit score for the alignment
91             num_hsps # Number of HSPs (not the "N" value)
92             frac_identical_query* # fraction of identical substitutions in query
93             frac_identical_hit* # fraction of identical substitutions in hit
94             frac_conserved_query* # fraction of conserved substitutions in query
95             frac_conserved_hit* # fraction of conserved substitutions in hit
96             frac_aligned_query* # fraction of the query sequence that is aligned
97             frac_aligned_hit* # fraction of the hit sequence that is aligned
98             length_aln_query* # Length of the aligned portion of the query sequence
99             length_aln_hit* # Length of the aligned portion of the hit sequence
100             gaps_query* # Number of gap characters in the aligned query sequence
101             gaps_hit* # Number of gap characters in the aligned hit sequence
102             gaps_total* # Number of gap characters in the aligned query and hit sequences
103             start_query* # Starting coordinate of the aligned portion of the query sequence
104             end_query* # Ending coordinate of the aligned portion of the query sequence
105             start_hit* # Starting coordinate of the aligned portion of the hit sequence
106             end_hit* # Ending coordinate of the aligned portion of the hit sequence
107             strand_query # Strand of the aligned query sequence
108             strand_hit # Strand of the aligned hit sequence
109             frame # Frame of the alignment (0,1,2)
110             ambiguous_aln # Ambiguous alignment indicator ('qs', 'q', 's')
111             hit_description # Full description of the hit sequence
112             query_description # Full description of the query sequence
113             rank # The rank order of the hit
114             num_hits # Number of hits for the query finding this hit
115              
116             Items marked with a C<*> report data summed across all HSPs
117             after tiling them to avoid counting data from overlapping regions
118             multiple times.
119              
120             For more details about these columns, see the documentation for the
121             corresponding method in Bio::Search::Result::BlastHit.
122              
123             =head1 TODO
124              
125             Figure out the best way to incorporate algorithm-specific score columns.
126             The best route is probably to have algorithm-specific subclasses
127             (e.g., BlastHitTableWriter, FastaHitTableWriter).
128              
129             =head1 FEEDBACK
130              
131             =head2 Mailing Lists
132              
133             User feedback is an integral part of the evolution of this and other
134             Bioperl modules. Send your comments and suggestions preferably to one
135             of the Bioperl mailing lists. Your participation is much appreciated.
136              
137             bioperl-l@bioperl.org - General discussion
138             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
139              
140             =head2 Support
141              
142             Please direct usage questions or support issues to the mailing list:
143              
144             I
145              
146             rather than to the module maintainer directly. Many experienced and
147             reponsive experts will be able look at the problem and quickly
148             address it. Please include a thorough description of the problem
149             with code and data examples if at all possible.
150              
151             =head2 Reporting Bugs
152              
153             Report bugs to the Bioperl bug tracking system to help us keep track
154             the bugs and their resolution. Bug reports can be submitted via the
155             web:
156              
157             https://github.com/bioperl/bioperl-live/issues
158              
159             =head1 AUTHOR
160              
161             Steve Chervitz Esac@bioperl.orgE
162              
163             See L for where to send bug reports
164             and comments.
165              
166             =head1 COPYRIGHT
167              
168             Copyright (c) 2001, 2002 Steve Chervitz. All Rights Reserved.
169              
170             This library is free software; you can redistribute it and/or modify
171             it under the same terms as Perl itself.
172              
173             =head1 DISCLAIMER
174              
175             This software is provided "as is" without warranty of any kind.
176              
177             =head1 SEE ALSO
178              
179             L,
180             L
181              
182             =head1 METHODS
183              
184             =cut
185              
186             package Bio::SearchIO::Writer::HitTableWriter;
187              
188 1     1   1066 use strict;
  1         1  
  1         26  
189              
190 1     1   3 use base qw(Bio::SearchIO::Writer::ResultTableWriter);
  1         1  
  1         373  
191              
192              
193             # Array fields: column, object, method[/argument], printf format,
194             # column label Methods for result object are defined in
195             # Bio::Search::Result::ResultI. Methods for hit object are defined in
196             # Bio::Search::Hit::HitI. Tech note: If a bogus method is supplied,
197             # it will result in all values to be zero. Don't know why this is.
198              
199             # TODO (maybe): Allow specification of separate mantissa/exponent for
200             # significance data.
201              
202             my %column_map = (
203             'query_name' => ['1', 'result', 'query_name', 's', 'QUERY' ],
204             'query_length' => ['2', 'result', 'query_length', 'd', 'LEN_Q'],
205             'hit_name' => ['3', 'hit', 'name', 's', 'HIT'],
206             'hit_length' => ['4', 'hit', 'length', 'd', 'LEN_H'],
207             'round' => ['5', 'hit', 'iteration', 'd', 'ROUND'],
208             'expect' => ['6', 'hit', 'significance', '.1e', 'EXPCT'],
209             'score' => ['7', 'hit', 'raw_score', 'd', 'SCORE'],
210             'bits' => ['8', 'hit', 'bits', 'd', 'BITS'],
211             'num_hsps' => ['9', 'hit', 'num_hsps', 'd', 'HSPS'],
212             'frac_identical_query' => ['10', 'hit', 'frac_identical/query', '.2f', 'FR_IDQ'],
213             'frac_identical_hit' => ['11', 'hit', 'frac_identical/hit', '.2f', 'FR_IDH'],
214             'frac_conserved_query' => ['12', 'hit', 'frac_conserved/query', '.2f', 'FR_CNQ'],
215             'frac_conserved_hit' => ['13', 'hit', 'frac_conserved/hit', '.2f', 'FR_CNH'],
216             'frac_aligned_query' => ['14', 'hit', 'frac_aligned_query', '.2f', 'FR_ALQ'],
217             'frac_aligned_hit' => ['15', 'hit', 'frac_aligned_hit', '.2f', 'FR_ALH'],
218             'length_aln_query' => ['16', 'hit', 'length_aln/query', 'd', 'LN_ALQ'],
219             'length_aln_hit' => ['17', 'hit', 'length_aln/hit', 'd', 'LN_ALH'],
220             'gaps_query' => ['18', 'hit', 'gaps/query', 'd', 'GAPS_Q'],
221             'gaps_hit' => ['19', 'hit', 'gaps/hit', 'd', 'GAPS_H'],
222             'gaps_total' => ['20', 'hit', 'gaps/total', 'd', 'GAPS_QH'],
223             'start_query' => ['21', 'hit', 'start/query', 'd', 'START_Q'],
224             'end_query' => ['22', 'hit', 'end/query', 'd', 'END_Q'],
225             'start_hit' => ['23', 'hit', 'start/hit', 'd', 'START_H'],
226             'end_hit' => ['24', 'hit', 'end/hit', 'd', 'END_H'],
227             'strand_query' => ['25', 'hit', 'strand/query', 's', 'STRND_Q'],
228             'strand_hit' => ['26', 'hit', 'strand/hit', 's', 'STRND_H'],
229             'frame' => ['27', 'hit', 'frame', 'd', 'FRAME'],
230             'ambiguous_aln' => ['28', 'hit', 'ambiguous_aln', 's', 'AMBIG'],
231             'hit_description' => ['29', 'hit', 'description', 's', 'DESC_H'],
232             'query_description' => ['30', 'result', 'query_description', 's', 'DESC_Q'],
233             'rank' => ['31', 'hit', 'rank', 's', 'RANK'],
234             'num_hits' => ['32', 'result', 'num_hits', 's', 'NUM_HITS'],
235             );
236              
237 4     4 0 63 sub column_map { return %column_map }
238              
239              
240             =head2 to_string()
241              
242             Note: this method is not intended for direct use. The
243             SearchIO::write_result() method calls it automatically if the writer
244             is hooked up to a SearchIO object as illustrated in
245             L.
246              
247             Title : to_string()
248             :
249             Usage : print $writer->to_string( $result_obj, [$include_labels] );
250             :
251             Argument : $result_obj = A Bio::Search::Result::BlastResult object
252             : $include_labels = boolean, if true column labels are included (default: false)
253             :
254             Returns : String containing tab-delimited set of data for each hit
255             : in a BlastResult object. Some data is summed across multiple HSPs.
256             :
257             Throws : n/a
258              
259             =cut
260              
261             #----------------
262             sub to_string {
263             #----------------
264 1     1 1 2 my ($self, $result, $include_labels) = @_;
265              
266 1 50       13 my $str = $include_labels ? $self->column_labels() : '';
267 1         5 my $func_ref = $self->row_data_func;
268 1         6 my $printf_fmt = $self->printf_fmt;
269            
270 1         8 my ($resultfilter,$hitfilter) = ( $self->filter('RESULT'),
271             $self->filter('HIT') );
272 1 50 33     11 if( ! defined $resultfilter ||
273 0         0 &{$resultfilter}($result) ) {
274 1 50       20 $result->can('rewind') &&
275             $result->rewind(); # insure we're at the beginning
276 1         8 foreach my $hit($result->hits) {
277 20 50 33     52 next if( defined $hitfilter && ! &{$hitfilter}($hit));
  0         0  
278 20 50       27 my @row_data = map { defined $_ ? $_ : 0 } &{$func_ref}($result, $hit);
  160         224  
  20         46  
279 20         220 $str .= sprintf "$printf_fmt\n", @row_data;
280             }
281             }
282 1         13 $str =~ s/\t\n/\n/gs;
283 1         6 return $str;
284             }
285              
286             =head2 end_report
287              
288             Title : end_report
289             Usage : $self->end_report()
290             Function: The method to call when ending a report, this is
291             mostly for cleanup for formats which require you to
292             have something at the end of the document. Nothing for
293             a text message.
294             Returns : string
295             Args : none
296              
297             =cut
298              
299             sub end_report {
300 1     1 1 3 return '';
301             }
302              
303              
304             =head2 filter
305              
306             Title : filter
307             Usage : $writer->filter('hsp', \&hsp_filter);
308             Function: Filter out either at HSP,Hit,or Result level
309             Returns : none
310             Args : string => data type,
311             CODE reference
312              
313              
314             =cut
315              
316             1;