| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
1
|
|
|
1
|
|
127261
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
25
|
|
|
2
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
32
|
|
|
3
|
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
package App::RecordStream::Operation::fromsam; |
|
5
|
1
|
|
|
1
|
|
5
|
use base qw(App::RecordStream::Operation); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
448
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
sub init { |
|
8
|
2
|
|
|
2
|
0
|
11606
|
my $self = shift; |
|
9
|
2
|
|
|
|
|
4
|
my $args = shift; |
|
10
|
|
|
|
|
|
|
my $options = { |
|
11
|
|
|
|
|
|
|
'flags' => \($self->{'DECODE_FLAGS'}), |
|
12
|
2
|
|
|
|
|
9
|
'quals|Q' => \($self->{'DECODE_QUALS'}), |
|
13
|
|
|
|
|
|
|
}; |
|
14
|
2
|
|
|
|
|
10
|
$self->parse_options($args, $options); # Ensures we pick up the automatic options |
|
15
|
|
|
|
|
|
|
} |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
sub accept_line { |
|
18
|
20
|
|
|
20
|
0
|
2058
|
my $self = shift; |
|
19
|
20
|
|
|
|
|
27
|
my $line = shift; |
|
20
|
20
|
100
|
|
|
|
59
|
return 1 if $line =~ /^@/; # Skip any header |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
# Parse the mandatory fields |
|
23
|
14
|
|
|
|
|
35
|
my @fields = qw[ qname flag rname pos mapq cigar rnext pnext tlen seq qual ]; |
|
24
|
14
|
|
|
|
|
71
|
my @values = split /\t/, $line; |
|
25
|
14
|
|
|
|
|
24
|
my %record = map { $_ => shift @values } @fields; |
|
|
154
|
|
|
|
|
255
|
|
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
# The remaining values are optional fields, which are composed of three parts |
|
28
|
|
|
|
|
|
|
# separated by a colon: TAG:TYPE:VALUE |
|
29
|
14
|
|
|
|
|
26
|
for my $tag (map { [split /:/, $_, 3] } @values) { |
|
|
70
|
|
|
|
|
188
|
|
|
30
|
70
|
|
|
|
|
174
|
$record{tag}{ $tag->[0] } = { |
|
31
|
|
|
|
|
|
|
type => $tag->[1], |
|
32
|
|
|
|
|
|
|
value => $tag->[2], |
|
33
|
|
|
|
|
|
|
}; |
|
34
|
|
|
|
|
|
|
} |
|
35
|
|
|
|
|
|
|
|
|
36
|
1108
|
|
|
|
|
1345
|
$record{quals} = [ map { ord($_) - 33 } split '', $record{qual} eq '*' ? '' : $record{qual} ] |
|
37
|
14
|
50
|
|
|
|
140
|
if $self->{'DECODE_QUALS'}; |
|
|
|
100
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
|
|
39
|
14
|
100
|
|
|
|
78
|
if ($self->{'DECODE_FLAGS'}) { |
|
40
|
|
|
|
|
|
|
# Technically the flags to do with pairing and r1/r2 really refer |
|
41
|
|
|
|
|
|
|
# generically to multiple segments and first/last segments, but in practice |
|
42
|
|
|
|
|
|
|
# so far every one talks about pairing and first/second reads. The SAM |
|
43
|
|
|
|
|
|
|
# spec is good about being precise, but `samtools flags` uses the monikers |
|
44
|
|
|
|
|
|
|
# below. |
|
45
|
|
|
|
|
|
|
# |
|
46
|
|
|
|
|
|
|
# 0x1 PAIRED .. paired-end (or multiple-segment) sequencing technology |
|
47
|
|
|
|
|
|
|
# 0x2 PROPER_PAIR .. each segment properly aligned according to the aligner |
|
48
|
|
|
|
|
|
|
# 0x4 UNMAP .. segment unmapped |
|
49
|
|
|
|
|
|
|
# 0x8 MUNMAP .. next segment in the template unmapped |
|
50
|
|
|
|
|
|
|
# 0x10 REVERSE .. SEQ is reverse complemented |
|
51
|
|
|
|
|
|
|
# 0x20 MREVERSE .. SEQ of the next segment in the template is reversed |
|
52
|
|
|
|
|
|
|
# 0x40 READ1 .. the first segment in the template |
|
53
|
|
|
|
|
|
|
# 0x80 READ2 .. the last segment in the template |
|
54
|
|
|
|
|
|
|
# 0x100 SECONDARY .. secondary alignment |
|
55
|
|
|
|
|
|
|
# 0x200 QCFAIL .. not passing quality controls |
|
56
|
|
|
|
|
|
|
# 0x400 DUP .. PCR or optical duplicate |
|
57
|
|
|
|
|
|
|
# 0x800 SUPPLEMENTARY .. supplementary alignment |
|
58
|
7
|
|
100
|
|
|
20
|
$record{flag} ||= 0; |
|
59
|
|
|
|
|
|
|
$record{flags} = { |
|
60
|
|
|
|
|
|
|
paired => !!($record{flag} & 0x1), |
|
61
|
|
|
|
|
|
|
proper_pair => !!($record{flag} & 0x2), |
|
62
|
|
|
|
|
|
|
unmapped => !!($record{flag} & 0x4), |
|
63
|
|
|
|
|
|
|
mate_unmapped => !!($record{flag} & 0x8), |
|
64
|
|
|
|
|
|
|
rc => !!($record{flag} & 0x10), |
|
65
|
|
|
|
|
|
|
mate_rc => !!($record{flag} & 0x20), |
|
66
|
|
|
|
|
|
|
r1 => !!($record{flag} & 0x40), |
|
67
|
|
|
|
|
|
|
r2 => !!($record{flag} & 0x80), |
|
68
|
|
|
|
|
|
|
secondary => !!($record{flag} & 0x100), |
|
69
|
|
|
|
|
|
|
qc_failed => !!($record{flag} & 0x200), |
|
70
|
|
|
|
|
|
|
duplicate => !!($record{flag} & 0x400), |
|
71
|
7
|
|
|
|
|
54
|
supplementary => !!($record{flag} & 0x800), |
|
72
|
|
|
|
|
|
|
}; |
|
73
|
|
|
|
|
|
|
} |
|
74
|
|
|
|
|
|
|
|
|
75
|
14
|
|
|
|
|
45
|
$self->push_record( App::RecordStream::Record->new(\%record) ); |
|
76
|
14
|
|
|
|
|
290
|
return 1; |
|
77
|
|
|
|
|
|
|
} |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
sub usage { |
|
80
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
81
|
0
|
|
|
|
|
|
my $options = [ |
|
82
|
|
|
|
|
|
|
['flags', 'Decode flag bitstring into a "flags" hashref'], |
|
83
|
|
|
|
|
|
|
['quals', 'Decode qual string into a "quals" array of numeric values'], |
|
84
|
|
|
|
|
|
|
]; |
|
85
|
0
|
|
|
|
|
|
my $args_string = $self->options_string($options); |
|
86
|
|
|
|
|
|
|
|
|
87
|
0
|
|
|
|
|
|
return <
|
|
88
|
|
|
|
|
|
|
Usage: recs fromsam [] |
|
89
|
|
|
|
|
|
|
__FORMAT_TEXT__ |
|
90
|
|
|
|
|
|
|
Each line of input (or lines of ) is parsed as a SAM (Sequence |
|
91
|
|
|
|
|
|
|
Alignment/Map) formatted record to produce a recs output record. To parse a |
|
92
|
|
|
|
|
|
|
BAM file, please pipe it from \`samtools view file.bam\` into this command. |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
The output records will contain the following fields: |
|
95
|
|
|
|
|
|
|
__FORMAT_TEXT__ |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
qname flag rname pos mapq cigar rnext pnext tlen seq qual |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
__FORMAT_TEXT__ |
|
100
|
|
|
|
|
|
|
Additional optional SAM fields are parsed into the "tag" key, which is a |
|
101
|
|
|
|
|
|
|
hash keyed by the SAM field tag name. The values are also hashes containing |
|
102
|
|
|
|
|
|
|
a "type" and "value" field. For example, a read group tag will end up in your |
|
103
|
|
|
|
|
|
|
record like this: |
|
104
|
|
|
|
|
|
|
__FORMAT_TEXT__ |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
tags => { |
|
107
|
|
|
|
|
|
|
RG => { |
|
108
|
|
|
|
|
|
|
type => "Z", |
|
109
|
|
|
|
|
|
|
value => "...", |
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
} |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
__FORMAT_TEXT__ |
|
114
|
|
|
|
|
|
|
Refer to the SAM spec for field details: http://samtools.github.io/hts-specs/SAMv1.pdf |
|
115
|
|
|
|
|
|
|
__FORMAT_TEXT__ |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
Arguments: |
|
118
|
|
|
|
|
|
|
$args_string |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
Examples: |
|
121
|
|
|
|
|
|
|
Parse SAM and calculate average mapping quality: |
|
122
|
|
|
|
|
|
|
recs fromsam input.sam | recs collate -a avg,mapq |
|
123
|
|
|
|
|
|
|
Parse BAM: |
|
124
|
|
|
|
|
|
|
samtools view input.bam | recs fromsam |
|
125
|
|
|
|
|
|
|
Parse SAM pre-filtered by read group: |
|
126
|
|
|
|
|
|
|
samtools view -r SAMPLE-1234 input.sam | recs fromsam |
|
127
|
|
|
|
|
|
|
The same, but probably slower: |
|
128
|
|
|
|
|
|
|
recs fromsam input.sam | recs grep '{{tag/RG/value}} eq "SAMPLE-1234"' |
|
129
|
|
|
|
|
|
|
USAGE |
|
130
|
|
|
|
|
|
|
} |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
1; |