File Coverage

blib/lib/App/RecordStream/Operation/fromsam.pm
Criterion Covered Total %
statement 30 34 88.2
branch 7 8 87.5
condition 2 2 100.0
subroutine 5 6 83.3
pod 0 3 0.0
total 44 53 83.0


line stmt bran cond sub pod time code
1 1     1   132796 use strict;
  1         3  
  1         24  
2 1     1   4 use warnings;
  1         3  
  1         30  
3              
4             package App::RecordStream::Operation::fromsam;
5 1     1   5 use base qw(App::RecordStream::Operation);
  1         2  
  1         391  
6              
7             sub init {
8 2     2 0 18613 my $this = shift;
9 2         4 my $args = shift;
10             my $options = {
11             'flags' => \($this->{'DECODE_FLAGS'}),
12 2         9 'quals|Q' => \($this->{'DECODE_QUALS'}),
13             };
14 2         10 $this->parse_options($args, $options); # Ensures we pick up the automatic options
15             }
16              
17             sub accept_line {
18 20     20 0 2993 my $this = shift;
19 20         36 my $line = shift;
20 20 100       64 return 1 if $line =~ /^@/; # Skip any header
21              
22             # Parse the mandatory fields
23 14         43 my @fields = qw[ qname flag rname pos mapq cigar rnext pnext tlen seq qual ];
24 14         75 my @values = split /\t/, $line;
25 14         29 my %record = map { $_ => shift @values } @fields;
  154         319  
26              
27             # The remaining values are optional fields, which are composed of three parts
28             # separated by a colon: TAG:TYPE:VALUE
29 14         37 for my $tag (map { [split /:/, $_, 3] } @values) {
  70         223  
30 70         233 $record{tag}{ $tag->[0] } = {
31             type => $tag->[1],
32             value => $tag->[2],
33             };
34             }
35              
36 1108         1727 $record{quals} = [ map { ord($_) - 33 } split '', $record{qual} eq '*' ? '' : $record{qual} ]
37 14 50       155 if $this->{'DECODE_QUALS'};
    100          
38              
39 14 100       114 if ($this->{'DECODE_FLAGS'}) {
40             # Technically the flags to do with pairing and r1/r2 really refer
41             # generically to multiple segments and first/last segments, but in practice
42             # so far every one talks about pairing and first/second reads. The SAM
43             # spec is good about being precise, but `samtools flags` uses the monikers
44             # below.
45             #
46             # 0x1 PAIRED .. paired-end (or multiple-segment) sequencing technology
47             # 0x2 PROPER_PAIR .. each segment properly aligned according to the aligner
48             # 0x4 UNMAP .. segment unmapped
49             # 0x8 MUNMAP .. next segment in the template unmapped
50             # 0x10 REVERSE .. SEQ is reverse complemented
51             # 0x20 MREVERSE .. SEQ of the next segment in the template is reversed
52             # 0x40 READ1 .. the first segment in the template
53             # 0x80 READ2 .. the last segment in the template
54             # 0x100 SECONDARY .. secondary alignment
55             # 0x200 QCFAIL .. not passing quality controls
56             # 0x400 DUP .. PCR or optical duplicate
57             # 0x800 SUPPLEMENTARY .. supplementary alignment
58 7   100     34 $record{flag} ||= 0;
59             $record{flags} = {
60             paired => !!($record{flag} & 0x1),
61             proper_pair => !!($record{flag} & 0x2),
62             unmapped => !!($record{flag} & 0x4),
63             mate_unmapped => !!($record{flag} & 0x8),
64             rc => !!($record{flag} & 0x10),
65             mate_rc => !!($record{flag} & 0x20),
66             r1 => !!($record{flag} & 0x40),
67             r2 => !!($record{flag} & 0x80),
68             secondary => !!($record{flag} & 0x100),
69             qc_failed => !!($record{flag} & 0x200),
70             duplicate => !!($record{flag} & 0x400),
71 7         80 supplementary => !!($record{flag} & 0x800),
72             };
73             }
74              
75 14         67 $this->push_record( App::RecordStream::Record->new(\%record) );
76 14         367 return 1;
77             }
78              
79             sub usage {
80 0     0 0   my $this = shift;
81 0           my $options = [
82             ['flags', 'Decode flag bitstring into a "flags" hashref'],
83             ['quals', 'Decode qual string into a "quals" array of numeric values'],
84             ];
85 0           my $args_string = $this->options_string($options);
86              
87 0           return <
88             Usage: recs fromsam []
89             __FORMAT_TEXT__
90             Each line of input (or lines of ) is parsed as a SAM (Sequence
91             Alignment/Map) formatted record to produce a recs output record. To parse a
92             BAM file, please pipe it from \`samtools view file.bam\` into this command.
93              
94             The output records will contain the following fields:
95             __FORMAT_TEXT__
96              
97             qname flag rname pos mapq cigar rnext pnext tlen seq qual
98              
99             __FORMAT_TEXT__
100             Additional optional SAM fields are parsed into the "tag" key, which is a
101             hash keyed by the SAM field tag name. The values are also hashes containing
102             a "type" and "value" field. For example, a read group tag will end up in your
103             record like this:
104             __FORMAT_TEXT__
105              
106             tags => {
107             RG => {
108             type => "Z",
109             value => "...",
110             }
111             }
112              
113             __FORMAT_TEXT__
114             Refer to the SAM spec for field details: http://samtools.github.io/hts-specs/SAMv1.pdf
115             __FORMAT_TEXT__
116              
117             Arguments:
118             $args_string
119              
120             Examples:
121             Parse SAM and calculate average mapping quality:
122             recs fromsam input.sam | recs collate -a avg,mapq
123             Parse BAM:
124             samtools view input.bam | recs fromsam
125             Parse SAM pre-filtered by read group:
126             samtools view -r SAMPLE-1234 input.sam | recs fromsam
127             The same, but probably slower:
128             recs fromsam input.sam | recs grep '{{tag/RG/value}} eq "SAMPLE-1234"'
129             USAGE
130             }
131              
132             1;