| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#!/usr/bin/env perl |
|
2
|
|
|
|
|
|
|
# ABSTRACT: A script to calculate N50 from one or multiple FASTA/FASTQ files. |
|
3
|
|
|
|
|
|
|
# PODNAME: n50 |
|
4
|
|
|
|
|
|
|
|
|
5
|
2
|
|
|
2
|
|
9337
|
use 5.012; |
|
|
2
|
|
|
|
|
6
|
|
|
6
|
2
|
|
|
2
|
|
10
|
use warnings; |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
113
|
|
|
7
|
2
|
|
|
2
|
|
1148
|
use Pod::Usage; |
|
|
2
|
|
|
|
|
139411
|
|
|
|
2
|
|
|
|
|
331
|
|
|
8
|
2
|
|
|
2
|
|
1270
|
use Term::ANSIColor qw(:constants colorvalid colored); |
|
|
2
|
|
|
|
|
22602
|
|
|
|
2
|
|
|
|
|
2246
|
|
|
9
|
2
|
|
|
2
|
|
1552
|
use Getopt::Long; |
|
|
2
|
|
|
|
|
25470
|
|
|
|
2
|
|
|
|
|
8
|
|
|
10
|
2
|
|
|
2
|
|
315
|
use File::Basename; |
|
|
2
|
|
|
|
|
2
|
|
|
|
2
|
|
|
|
|
208
|
|
|
11
|
2
|
|
|
2
|
|
930
|
use FindBin qw($RealBin); |
|
|
2
|
|
|
|
|
2384
|
|
|
|
2
|
|
|
|
|
345
|
|
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# The following placeholder is to be programmatically replaced with 'use lib "$RealBin/../lib"' if needed |
|
14
|
|
|
|
|
|
|
#~loclib~ |
|
15
|
2
|
50
|
33
|
|
|
265540
|
if ( -e "$RealBin/../lib/Proch/N50.pm" and -e "$RealBin/../Changes" ) { |
|
|
0
|
|
|
|
|
0
|
|
|
16
|
2
|
|
|
2
|
|
1131
|
use lib "$RealBin/../lib"; |
|
|
2
|
|
|
|
|
1441
|
|
|
|
2
|
|
|
|
|
15
|
|
|
17
|
|
|
|
|
|
|
} |
|
18
|
2
|
|
|
2
|
|
1444
|
use Proch::N50; |
|
|
2
|
|
|
|
|
7
|
|
|
|
2
|
|
|
|
|
170
|
|
|
19
|
2
|
|
|
2
|
|
13
|
use Data::Dumper; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
11578
|
|
|
20
|
2
|
|
|
|
|
17
|
our %program = ( |
|
21
|
|
|
|
|
|
|
'NAME' => 'FASTX N50', |
|
22
|
|
|
|
|
|
|
'AUTHOR' => 'Andrea Telatin', |
|
23
|
|
|
|
|
|
|
'MAIL' => 'andrea.telatin@gmail.com', |
|
24
|
|
|
|
|
|
|
'VERSION' => '4.2.0', |
|
25
|
|
|
|
|
|
|
); |
|
26
|
2
|
|
|
|
|
3
|
my $hasJSON = undef; |
|
27
|
2
|
|
|
|
|
3
|
my $t; |
|
28
|
|
|
|
|
|
|
|
|
29
|
2
|
|
|
|
|
5
|
local $Term::ANSIColor::AUTORESET = 1; |
|
30
|
|
|
|
|
|
|
|
|
31
|
2
|
|
|
|
|
5
|
my $opt_separator = "\t"; |
|
32
|
2
|
|
|
|
|
4
|
my $opt_format = 'default'; |
|
33
|
2
|
|
|
|
|
16
|
my %formats = ( |
|
34
|
|
|
|
|
|
|
'default' => 'Prints only N50 for single file, TSV for multiple files', |
|
35
|
|
|
|
|
|
|
'tsv' => 'Tab separated output (file, seqs, total size, N50, min, max)', |
|
36
|
|
|
|
|
|
|
'json' => 'JSON (JavaScript Object Notation) output', |
|
37
|
|
|
|
|
|
|
'csv' => 'Alias for tsv and --separator ","', |
|
38
|
|
|
|
|
|
|
'custom' => 'Custom format with --template STRING', |
|
39
|
|
|
|
|
|
|
'screen' => 'Screen friendly table (requires Text::ASCIITable)', |
|
40
|
|
|
|
|
|
|
'short' => 'Not implemented', |
|
41
|
|
|
|
|
|
|
'full' => 'Not implemented', |
|
42
|
|
|
|
|
|
|
); |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
my ( |
|
45
|
2
|
|
|
|
|
6
|
$opt_help, |
|
46
|
|
|
|
|
|
|
$opt_version, |
|
47
|
|
|
|
|
|
|
$opt_debug, |
|
48
|
|
|
|
|
|
|
$opt_color, |
|
49
|
|
|
|
|
|
|
$opt_nonewline, |
|
50
|
|
|
|
|
|
|
$opt_noheader, |
|
51
|
|
|
|
|
|
|
$opt_pretty, |
|
52
|
|
|
|
|
|
|
$opt_basename, |
|
53
|
|
|
|
|
|
|
$opt_template, |
|
54
|
|
|
|
|
|
|
$opt_fullpath, |
|
55
|
|
|
|
|
|
|
$opt_thousand_separator, |
|
56
|
|
|
|
|
|
|
$opt_ne, |
|
57
|
|
|
|
|
|
|
$opt_format_screen, # x: same as -f screen |
|
58
|
|
|
|
|
|
|
$opt_format_json, # j: same as -f json |
|
59
|
|
|
|
|
|
|
$opt_sort_by, # o |
|
60
|
|
|
|
|
|
|
$opt_reverse_sort, # r |
|
61
|
|
|
|
|
|
|
); |
|
62
|
|
|
|
|
|
|
|
|
63
|
2
|
|
|
|
|
3
|
$opt_sort_by = 'N50'; |
|
64
|
2
|
|
|
|
|
2
|
$opt_reverse_sort = undef; |
|
65
|
2
|
|
|
|
|
17
|
my %valid_sort_keys = ( |
|
66
|
|
|
|
|
|
|
'auN' => 1, |
|
67
|
|
|
|
|
|
|
'Ne' => 1, |
|
68
|
|
|
|
|
|
|
'N90' => 1, |
|
69
|
|
|
|
|
|
|
'N75' => 1, |
|
70
|
|
|
|
|
|
|
'N50' => 1, |
|
71
|
|
|
|
|
|
|
'min' => 1, |
|
72
|
|
|
|
|
|
|
'max' => 1, |
|
73
|
|
|
|
|
|
|
'seqs' => 1, |
|
74
|
|
|
|
|
|
|
'size' => 1, |
|
75
|
|
|
|
|
|
|
'path' => 1, |
|
76
|
|
|
|
|
|
|
); |
|
77
|
2
|
50
|
|
|
|
5
|
$valid_sort_keys{"N$opt_ne"} = 1 if (defined $opt_ne); |
|
78
|
2
|
|
|
|
|
10
|
my $tab = "\t"; |
|
79
|
2
|
|
|
|
|
5
|
my $new = "\n"; |
|
80
|
2
|
|
|
|
|
32
|
my $_opt = GetOptions( |
|
81
|
|
|
|
|
|
|
'a|abspath' => \$opt_fullpath, |
|
82
|
|
|
|
|
|
|
'b|basename' => \$opt_basename, |
|
83
|
|
|
|
|
|
|
'c|color' => \$opt_color, |
|
84
|
|
|
|
|
|
|
'd|debug' => \$opt_debug, |
|
85
|
|
|
|
|
|
|
'e|Ne=i' => \$opt_ne, |
|
86
|
|
|
|
|
|
|
'f|format=s' => \$opt_format, |
|
87
|
|
|
|
|
|
|
'h|help' => \$opt_help, |
|
88
|
|
|
|
|
|
|
'j|json' => \$opt_format_json, |
|
89
|
|
|
|
|
|
|
'n|nonewline' => \$opt_nonewline, |
|
90
|
|
|
|
|
|
|
'o|sortby=s' => \$opt_sort_by, |
|
91
|
|
|
|
|
|
|
'p|pretty' => \$opt_pretty, |
|
92
|
|
|
|
|
|
|
'r|reverse' => \$opt_reverse_sort, |
|
93
|
|
|
|
|
|
|
's|separator=s' => \$opt_separator, |
|
94
|
|
|
|
|
|
|
'q|thousands-sep' => \$opt_thousand_separator, |
|
95
|
|
|
|
|
|
|
't|template=s' => \$opt_template, |
|
96
|
|
|
|
|
|
|
'u|noheader' => \$opt_noheader, |
|
97
|
|
|
|
|
|
|
'v|version' => \$opt_version, |
|
98
|
|
|
|
|
|
|
'x|screen' => \$opt_format_screen, |
|
99
|
|
|
|
|
|
|
); |
|
100
|
|
|
|
|
|
|
|
|
101
|
2
|
50
|
|
|
|
2546
|
$opt_sort_by = 'N50' if ( $opt_sort_by eq 'n50' ); |
|
102
|
2
|
50
|
|
|
|
5
|
pod2usage( { -exitval => 0, -verbose => 2 } ) if $opt_help; |
|
103
|
2
|
100
|
|
|
|
8
|
version() if defined $opt_version; |
|
104
|
|
|
|
|
|
|
|
|
105
|
2
|
|
|
|
|
2
|
our %output_object; |
|
106
|
2
|
|
|
|
|
4
|
our %output_print; |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
# Added in v1.5: list accepted output formats programmatically |
|
109
|
|
|
|
|
|
|
# [-f list] or [--format list] will print a list of accepted formats |
|
110
|
|
|
|
|
|
|
|
|
111
|
2
|
50
|
|
|
|
5
|
if ( $opt_format eq 'list' ) { |
|
112
|
0
|
|
|
|
|
0
|
say STDERR "AVAILABLE OUTPUT FORMATS:"; |
|
113
|
0
|
|
|
|
|
0
|
for my $f ( sort keys %formats ) { |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
# Use colors if allowed |
|
116
|
0
|
0
|
|
|
|
0
|
if ($opt_color) { |
|
117
|
0
|
|
|
|
|
0
|
print BOLD, $f, "\t"; |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
# Print in RED unimplemented format |
|
120
|
0
|
0
|
|
|
|
0
|
if ( $formats{$f} eq 'Not implemented' ) { |
|
121
|
0
|
|
|
|
|
0
|
say RED $formats{$f}, RESET; |
|
122
|
|
|
|
|
|
|
} |
|
123
|
|
|
|
|
|
|
else { |
|
124
|
0
|
|
|
|
|
0
|
say RESET, $formats{$f}; |
|
125
|
|
|
|
|
|
|
} |
|
126
|
|
|
|
|
|
|
} |
|
127
|
|
|
|
|
|
|
else { |
|
128
|
0
|
|
|
|
|
0
|
say "$f\t$formats{$f}"; |
|
129
|
|
|
|
|
|
|
} |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
} |
|
132
|
0
|
|
|
|
|
0
|
exit; |
|
133
|
|
|
|
|
|
|
} |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
# No files provided, no version |
|
136
|
2
|
50
|
66
|
|
|
17
|
unless ( defined $ARGV[0] or $opt_version ) { |
|
137
|
0
|
|
|
|
|
0
|
say STDERR "\n Usage: n50 file.fa ... \n Type n50 --help for full manual"; |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
} |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
# Shot output formats |
|
142
|
2
|
50
|
33
|
|
|
6
|
die "Error: Please specify either -x (--screen) or -j (--json)\n" |
|
143
|
|
|
|
|
|
|
if ( $opt_format_screen and $opt_format_json ); |
|
144
|
|
|
|
|
|
|
|
|
145
|
2
|
0
|
0
|
|
|
7
|
die "Error: -x (--screen) or -j (--json) are incompatible with -f (--format)\n" |
|
|
|
|
33
|
|
|
|
|
|
146
|
|
|
|
|
|
|
if ( $opt_format ne 'default' and ( $opt_format_screen or $opt_format_json ) ); |
|
147
|
|
|
|
|
|
|
|
|
148
|
2
|
50
|
|
|
|
4
|
$opt_format = 'screen' if ($opt_format_screen); |
|
149
|
2
|
50
|
|
|
|
4
|
$opt_format = 'json' if ($opt_format_json); |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
# Sorting by / reverse |
|
152
|
|
|
|
|
|
|
|
|
153
|
2
|
50
|
|
|
|
5
|
unless ( defined $valid_sort_keys{$opt_sort_by} ) { |
|
154
|
0
|
|
|
|
|
0
|
say STDERR " FATAL ERROR: Invalid sort key for -o ($opt_sort_by)"; |
|
155
|
0
|
|
|
|
|
0
|
say STDERR " Valid sort keys are: ", join( ', ', keys %valid_sort_keys ); |
|
156
|
0
|
|
|
|
|
0
|
die; |
|
157
|
|
|
|
|
|
|
} |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
my %sorters = ( |
|
160
|
|
|
|
|
|
|
asc => sub { |
|
161
|
0
|
|
|
0
|
|
0
|
$output_object{$b}{$opt_sort_by} <=> $output_object{$a}{$opt_sort_by}; |
|
162
|
|
|
|
|
|
|
}, |
|
163
|
|
|
|
|
|
|
desc => sub { |
|
164
|
0
|
|
|
0
|
|
0
|
$output_object{$a}{$opt_sort_by} <=> $output_object{$b}{$opt_sort_by}; |
|
165
|
|
|
|
|
|
|
}, |
|
166
|
0
|
|
|
0
|
|
0
|
string_asc => sub { $a cmp $b }, |
|
167
|
0
|
|
|
0
|
|
0
|
string_desc => sub { $b cmp $a }, |
|
168
|
2
|
|
|
|
|
22
|
); |
|
169
|
|
|
|
|
|
|
|
|
170
|
2
|
|
|
|
|
4
|
my $sorting_order; |
|
171
|
2
|
50
|
|
|
|
4
|
if ( $opt_sort_by eq 'path' ) { |
|
172
|
|
|
|
|
|
|
|
|
173
|
0
|
|
|
|
|
0
|
$sorting_order = 'string_asc'; |
|
174
|
0
|
0
|
|
|
|
0
|
$sorting_order = 'string_desc' if ($opt_reverse_sort); |
|
175
|
|
|
|
|
|
|
} |
|
176
|
|
|
|
|
|
|
else { |
|
177
|
2
|
|
|
|
|
3
|
$sorting_order = 'asc'; |
|
178
|
2
|
50
|
|
|
|
5
|
$sorting_order = 'desc' if ($opt_reverse_sort); |
|
179
|
|
|
|
|
|
|
} |
|
180
|
2
|
|
|
|
|
8
|
debug("Sorting by <$opt_sort_by>: order=$sorting_order"); |
|
181
|
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
die("Unexpected fatal error:\nInvalid sort function \"$sorting_order\".\n") |
|
183
|
2
|
50
|
|
|
|
6
|
if ( not defined $sorters{$sorting_order} ); |
|
184
|
2
|
|
|
|
|
2
|
my $sort_function = $sorters{$sorting_order}; |
|
185
|
|
|
|
|
|
|
|
|
186
|
2
|
50
|
|
|
|
7
|
if ( defined $opt_format ) { |
|
187
|
2
|
|
|
|
|
3
|
$opt_format = lc($opt_format); |
|
188
|
2
|
50
|
|
|
|
7
|
if ( !$formats{$opt_format} ) { |
|
189
|
0
|
|
|
|
|
0
|
my @list = sort keys(%formats); |
|
190
|
|
|
|
|
|
|
|
|
191
|
0
|
|
|
|
|
0
|
die " FATAL ERROR:\n Output format not valid (--format '$opt_format').\n Use one of the following: " |
|
192
|
|
|
|
|
|
|
. join( ', ', @list ) . ".\n"; |
|
193
|
|
|
|
|
|
|
} |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
# IMPORT JSON ONLY IF NEEDED |
|
196
|
2
|
50
|
|
|
|
4
|
if ( $opt_format eq 'json' ) { |
|
197
|
|
|
|
|
|
|
|
|
198
|
0
|
|
|
|
|
0
|
$hasJSON = eval { |
|
199
|
0
|
|
|
|
|
0
|
require JSON::PP; |
|
200
|
0
|
|
|
|
|
0
|
JSON::PP->import(); |
|
201
|
0
|
|
|
|
|
0
|
1; |
|
202
|
|
|
|
|
|
|
}; |
|
203
|
0
|
0
|
|
|
|
0
|
die "FATAL ERROR: Please install perl module JSON::PP first [e.g. cpanm JSON::PP]\n" |
|
204
|
|
|
|
|
|
|
unless ($hasJSON); |
|
205
|
|
|
|
|
|
|
} |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
# IMPORT ASCII TABLE ONLY IF NEEDE |
|
208
|
2
|
50
|
|
|
|
7
|
if ( $opt_format eq 'screen' ) { |
|
209
|
0
|
|
|
|
|
0
|
my $has_table = eval { |
|
210
|
0
|
|
|
|
|
0
|
require Text::ASCIITable; |
|
211
|
0
|
|
|
|
|
0
|
Text::ASCIITable->import(); |
|
212
|
0
|
|
|
|
|
0
|
$t = Text::ASCIITable->new(); |
|
213
|
0
|
|
|
|
|
0
|
my @cols = ('File', 'Seqs', 'Total bp', 'N50', 'min', 'max', 'N75', 'N90','auN'); |
|
214
|
0
|
0
|
|
|
|
0
|
push @cols, "N$opt_ne" if (defined $opt_ne); |
|
215
|
0
|
|
|
|
|
0
|
$t->setCols( @cols ); |
|
216
|
0
|
|
|
|
|
0
|
1; |
|
217
|
|
|
|
|
|
|
}; |
|
218
|
0
|
0
|
|
|
|
0
|
if ( !$has_table ) { |
|
219
|
0
|
|
|
|
|
0
|
die |
|
220
|
|
|
|
|
|
|
"ERROR:\nFormat 'screen' requires Text::ASCIITable installed.\n"; |
|
221
|
|
|
|
|
|
|
} |
|
222
|
|
|
|
|
|
|
} |
|
223
|
|
|
|
|
|
|
|
|
224
|
2
|
50
|
33
|
|
|
5
|
if ( $opt_format eq 'custom' and !defined $opt_template ) { |
|
225
|
0
|
|
|
|
|
0
|
print STDERR " [WARNING] There is no default template at the moment!\n"; |
|
226
|
0
|
|
|
|
|
0
|
print STDERR |
|
227
|
|
|
|
|
|
|
" Specify a --template STRING with --format custom or no output will be printed\n"; |
|
228
|
|
|
|
|
|
|
} |
|
229
|
|
|
|
|
|
|
|
|
230
|
2
|
50
|
|
|
|
6
|
if ( $formats{$opt_format} eq 'Not implemented' ) { |
|
231
|
0
|
|
|
|
|
0
|
print STDERR |
|
232
|
|
|
|
|
|
|
" WARNING: Format '$opt_format' not implemented yet. Switching to 'tsv'.\n"; |
|
233
|
0
|
|
|
|
|
0
|
$opt_format = 'tsv'; |
|
234
|
|
|
|
|
|
|
} |
|
235
|
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
} |
|
237
|
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
|
|
239
|
2
|
|
|
|
|
5
|
foreach my $file (@ARGV) { |
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
# Check if file exists / check if '-' supplied read STDIN |
|
242
|
1
|
50
|
33
|
|
|
36
|
if ( ( ! -e "$file" ) and ( $file ne '-' ) ) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
243
|
0
|
|
|
|
|
0
|
die " FATAL ERROR:\n File not found ($file).\n"; |
|
244
|
|
|
|
|
|
|
} elsif (-d "$file") { |
|
245
|
0
|
|
|
|
|
0
|
print STDERR "WARNING: Ignoring directory $file.\n"; |
|
246
|
0
|
|
|
|
|
0
|
next; |
|
247
|
|
|
|
|
|
|
} elsif ( $file eq '-' ) { |
|
248
|
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
# Set file to |
|
250
|
0
|
|
|
|
|
0
|
$file = '-'; |
|
251
|
|
|
|
|
|
|
} |
|
252
|
|
|
|
|
|
|
else { |
|
253
|
|
|
|
|
|
|
# Open filehandle with $file |
|
254
|
1
|
|
50
|
|
|
37
|
open STDIN, '<', "$file" |
|
255
|
|
|
|
|
|
|
|| die " FATAL ERROR:\n Unable to open file for reading ($file).\n"; |
|
256
|
|
|
|
|
|
|
} |
|
257
|
1
|
|
|
|
|
1
|
my $JSON = 0; |
|
258
|
1
|
50
|
33
|
|
|
9
|
$JSON = 1 if (defined $opt_format and $opt_format =~ /JSON/ ); |
|
259
|
1
|
|
50
|
|
|
5
|
my $FileStats = Proch::N50::getStats( $file, $JSON, $opt_ne ) || 0; |
|
260
|
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
# Validate answer: check {status}==1 |
|
263
|
1
|
50
|
|
|
|
3
|
if ( !$FileStats->{status} ) { |
|
264
|
0
|
|
|
|
|
0
|
print STDERR "[WARNING]\tError parsing \"$file\". Skipped.\n"; |
|
265
|
0
|
0
|
|
|
|
0
|
if ($opt_debug) { |
|
266
|
0
|
|
|
|
|
0
|
print Dumper $FileStats; |
|
267
|
|
|
|
|
|
|
} |
|
268
|
0
|
|
|
|
|
0
|
next; |
|
269
|
|
|
|
|
|
|
} |
|
270
|
|
|
|
|
|
|
|
|
271
|
1
|
50
|
|
|
|
5
|
say Dumper $FileStats if ($opt_debug); |
|
272
|
1
|
50
|
|
|
|
2
|
if ( !defined $FileStats->{auN} ) { |
|
273
|
0
|
|
|
|
|
0
|
say STDERR |
|
274
|
|
|
|
|
|
|
"Fatal error: 'auN' statistics not calculated parsing \"$file\". Proch::N50 > 1.2.0 required, ", $Proch::N50::VERSION , " found."; |
|
275
|
0
|
|
|
|
|
0
|
say STDERR Dumper $FileStats; |
|
276
|
0
|
|
|
|
|
0
|
say STDERR $Proch::N50::VERSION; |
|
277
|
0
|
|
|
|
|
0
|
die; |
|
278
|
|
|
|
|
|
|
} |
|
279
|
|
|
|
|
|
|
|
|
280
|
1
|
|
|
|
|
2
|
my $n50 = $FileStats->{N50} + 0; |
|
281
|
1
|
|
|
|
|
2
|
my $n = $FileStats->{seqs} + 0; |
|
282
|
1
|
|
|
|
|
1
|
my $slen = $FileStats->{size} + 0; |
|
283
|
1
|
|
|
|
|
1
|
my $min = $FileStats->{min} + 0; |
|
284
|
1
|
|
|
|
|
2
|
my $max = $FileStats->{max} + 0; |
|
285
|
1
|
|
|
|
|
1
|
my $n75 = $FileStats->{N75} + 0; |
|
286
|
1
|
|
|
|
|
2
|
my $n90 = $FileStats->{N90} + 0; |
|
287
|
1
|
|
|
|
|
4
|
my $auN = $FileStats->{auN} + 0; |
|
288
|
1
|
50
|
|
|
|
9
|
my $Ne = defined $opt_ne ? $FileStats->{Ne} : undef; |
|
289
|
|
|
|
|
|
|
|
|
290
|
1
|
50
|
|
|
|
2
|
say STDERR "[$file]\tTotalSize:$slen;N50:$n50;Sequences:$n" if ($opt_debug); |
|
291
|
|
|
|
|
|
|
|
|
292
|
1
|
50
|
|
|
|
3
|
$file = basename($file) if ($opt_basename); |
|
293
|
1
|
50
|
|
|
|
2
|
$file = File::Spec->rel2abs($file) if ($opt_fullpath); |
|
294
|
|
|
|
|
|
|
|
|
295
|
1
|
|
|
|
|
5
|
my %metrics = ( |
|
296
|
|
|
|
|
|
|
'seqs' => $n, |
|
297
|
|
|
|
|
|
|
'N50' => $n50, |
|
298
|
|
|
|
|
|
|
'size' => $slen, |
|
299
|
|
|
|
|
|
|
'min' => $min, |
|
300
|
|
|
|
|
|
|
'max' => $max, |
|
301
|
|
|
|
|
|
|
'auN' => $auN, |
|
302
|
|
|
|
|
|
|
'N75' => $n75, |
|
303
|
|
|
|
|
|
|
'N90' => $n90, |
|
304
|
|
|
|
|
|
|
); |
|
305
|
1
|
50
|
|
|
|
3
|
$metrics{"N$opt_ne"} = $Ne if (defined $opt_ne); |
|
306
|
|
|
|
|
|
|
|
|
307
|
1
|
50
|
|
|
|
3
|
if ( defined $output_object{$file} ) { |
|
308
|
0
|
|
|
|
|
0
|
print STDERR " WARNING: ", |
|
309
|
|
|
|
|
|
|
"Overwriting '$file': multiple items with the same filename. Try not using -b/--basename.\n"; |
|
310
|
|
|
|
|
|
|
} |
|
311
|
1
|
|
|
|
|
2
|
$output_object{$file} = \%metrics; |
|
312
|
1
|
|
|
|
|
2
|
$output_print{$file} = {}; |
|
313
|
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
|
|
315
|
1
|
|
|
|
|
2
|
for my $key ( keys %{ $output_object{$file} } ) { |
|
|
1
|
|
|
|
|
3
|
|
|
316
|
|
|
|
|
|
|
|
|
317
|
8
|
50
|
33
|
|
|
15
|
if ( $opt_thousand_separator or $opt_format eq 'screen' ) { |
|
318
|
0
|
|
|
|
|
0
|
$output_print{$file}{$key} = thousands($output_object{$file}{$key} ); |
|
319
|
|
|
|
|
|
|
} else { |
|
320
|
8
|
|
|
|
|
14
|
$output_print{$file}{$key} = $output_object{$file}{$key} ; |
|
321
|
|
|
|
|
|
|
} |
|
322
|
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
} |
|
324
|
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
} |
|
327
|
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
|
|
331
|
2
|
|
|
|
|
5
|
my $file_num = scalar keys %output_object; |
|
332
|
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
# Format Output |
|
334
|
|
|
|
|
|
|
|
|
335
|
2
|
50
|
33
|
|
|
9
|
if ( not $opt_format or $opt_format eq 'default' ) { |
|
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
336
|
2
|
|
|
|
|
5
|
debug("Activating format "); |
|
337
|
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
# DEFAULT: format |
|
339
|
2
|
100
|
|
|
|
4
|
if ( $file_num == 1 ) { |
|
340
|
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
# If only one file is supplied, just return N50 (to allow easy pipeline parsing) |
|
342
|
1
|
|
|
|
|
3
|
my @keys = keys %output_object; |
|
343
|
1
|
50
|
|
|
|
2
|
if ($opt_nonewline) { |
|
344
|
|
|
|
|
|
|
print $opt_thousand_separator |
|
345
|
|
|
|
|
|
|
? thousands( $output_object{ $keys[0] }{'N50'} ) |
|
346
|
0
|
0
|
|
|
|
0
|
: $output_object{ $keys[0] }{'N50'}; |
|
347
|
|
|
|
|
|
|
} |
|
348
|
|
|
|
|
|
|
else { |
|
349
|
|
|
|
|
|
|
say $opt_thousand_separator |
|
350
|
|
|
|
|
|
|
? thousands( $output_object{ $keys[0] }{'N50'} ) |
|
351
|
1
|
50
|
|
|
|
0
|
: $output_object{ $keys[0] }{'N50'}; |
|
352
|
|
|
|
|
|
|
} |
|
353
|
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
} |
|
355
|
|
|
|
|
|
|
else { |
|
356
|
|
|
|
|
|
|
# Print table |
|
357
|
1
|
|
|
|
|
0
|
foreach my $r ( sort $sort_function keys %output_object ) { |
|
358
|
|
|
|
|
|
|
say $r, $opt_separator, |
|
359
|
|
|
|
|
|
|
$opt_thousand_separator |
|
360
|
|
|
|
|
|
|
? thousands( $output_print{$r}{'N50'} ) |
|
361
|
0
|
0
|
|
|
|
0
|
: $output_print{$r}{'N50'}; |
|
362
|
|
|
|
|
|
|
} |
|
363
|
|
|
|
|
|
|
} |
|
364
|
|
|
|
|
|
|
} |
|
365
|
|
|
|
|
|
|
elsif ( $opt_format eq 'json' ) { |
|
366
|
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
# Print JSON object |
|
368
|
0
|
|
|
|
|
0
|
my $json = JSON::PP->new->ascii->allow_nonref; |
|
369
|
0
|
|
|
|
|
0
|
my $json_printed = $json->encode( \%output_print ); |
|
370
|
|
|
|
|
|
|
|
|
371
|
0
|
0
|
|
|
|
0
|
if ($opt_pretty) { |
|
372
|
0
|
|
|
|
|
0
|
$json_printed = $json->pretty->encode( \%output_print ); |
|
373
|
|
|
|
|
|
|
} |
|
374
|
0
|
|
|
|
|
0
|
say $json_printed; |
|
375
|
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
} |
|
377
|
|
|
|
|
|
|
elsif ( $opt_format eq 'tsv' or $opt_format eq 'csv' ) { |
|
378
|
|
|
|
|
|
|
|
|
379
|
0
|
0
|
|
|
|
0
|
$opt_separator = ',' if ( $opt_format eq 'csv' ); |
|
380
|
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
# TSV format |
|
382
|
0
|
|
|
|
|
0
|
my @fields = ( 'path', 'seqs', 'size', 'N50', 'min', 'max', 'N75', 'N90','auN'); |
|
383
|
0
|
0
|
|
|
|
0
|
push(@fields, "N$opt_ne") if (defined $opt_ne); |
|
384
|
0
|
0
|
|
|
|
0
|
say '#', join( $opt_separator, @fields ) if ( !defined $opt_noheader ); |
|
385
|
|
|
|
|
|
|
|
|
386
|
0
|
|
|
|
|
0
|
foreach my $r ( sort $sort_function keys %output_object ) { |
|
387
|
0
|
|
|
|
|
0
|
print $r, $opt_separator; |
|
388
|
|
|
|
|
|
|
|
|
389
|
0
|
|
|
|
|
0
|
for ( my $i = 1 ; $i <= $#fields ; $i++ ) { |
|
390
|
0
|
0
|
|
|
|
0
|
if ($opt_thousand_separator) { |
|
391
|
0
|
0
|
0
|
|
|
0
|
print '"' if ( $opt_format eq 'csv' or $opt_separator eq ',' ); |
|
392
|
|
|
|
|
|
|
print thousands( $output_object{$r}{ $fields[$i] } ) |
|
393
|
0
|
0
|
|
|
|
0
|
if ( defined $output_object{$r}{ $fields[$i] } ); |
|
394
|
0
|
0
|
0
|
|
|
0
|
print '"' if ( $opt_format eq 'csv' or $opt_separator eq ',' ); |
|
395
|
|
|
|
|
|
|
} |
|
396
|
|
|
|
|
|
|
else { |
|
397
|
|
|
|
|
|
|
print $output_object{$r}{ $fields[$i] } |
|
398
|
0
|
0
|
|
|
|
0
|
if ( defined $output_object{$r}{ $fields[$i] } ); |
|
399
|
|
|
|
|
|
|
} |
|
400
|
0
|
0
|
0
|
|
|
0
|
if ( ( $i == $#fields ) and ( not $opt_nonewline ) ) { |
|
401
|
0
|
|
|
|
|
0
|
print "\n"; |
|
402
|
|
|
|
|
|
|
} |
|
403
|
|
|
|
|
|
|
else { |
|
404
|
0
|
|
|
|
|
0
|
print $opt_separator; |
|
405
|
|
|
|
|
|
|
} |
|
406
|
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
} |
|
408
|
|
|
|
|
|
|
} |
|
409
|
|
|
|
|
|
|
} |
|
410
|
|
|
|
|
|
|
elsif ( $opt_format eq 'custom' ) { |
|
411
|
0
|
|
|
|
|
0
|
my @fields = ( 'seqs', 'size', 'N50', 'min', 'max','N75', 'N90','auN' ); |
|
412
|
0
|
0
|
|
|
|
0
|
push(@fields, "N$opt_ne") if (defined $opt_ne); |
|
413
|
|
|
|
|
|
|
# Format: custom (use tags {new}{tab} {path} ...) |
|
414
|
0
|
|
|
|
|
0
|
foreach my $r ( sort $sort_function keys %output_object ) { |
|
415
|
0
|
|
|
|
|
0
|
my $output_string = ''; |
|
416
|
0
|
0
|
|
|
|
0
|
$output_string .= $opt_template if ( defined $opt_template ); |
|
417
|
|
|
|
|
|
|
|
|
418
|
0
|
0
|
|
|
|
0
|
$output_string =~ s/(\{new\}|\{n\}|\\n)/$new/g |
|
419
|
|
|
|
|
|
|
if ( $output_string =~ /(\{new\}|\{n\}|\\n)/ ); |
|
420
|
0
|
0
|
|
|
|
0
|
$output_string =~ s/(\{tab\}|\{t\}|\\t)/$tab/g |
|
421
|
|
|
|
|
|
|
if ( $output_string =~ /(\{tab\}|{t}|\\t)/ ); |
|
422
|
0
|
0
|
|
|
|
0
|
$output_string =~ s/\{path\}/$r/g if ( $output_string =~ /\{path\}/ ); |
|
423
|
0
|
|
|
|
|
0
|
foreach my $f (@fields) { |
|
424
|
0
|
|
|
|
|
0
|
$output_string =~ s/\{$f\}/$output_print{$r}{$f}/g; |
|
425
|
|
|
|
|
|
|
} |
|
426
|
0
|
|
|
|
|
0
|
print $output_string; |
|
427
|
|
|
|
|
|
|
} |
|
428
|
|
|
|
|
|
|
} |
|
429
|
|
|
|
|
|
|
elsif ( $opt_format eq 'screen' ) { |
|
430
|
|
|
|
|
|
|
|
|
431
|
0
|
|
|
|
|
0
|
my @fields = ( 'path', 'seqs', 'size', 'N50', 'min', 'max', 'N75', 'N90','auN' ); |
|
432
|
0
|
0
|
|
|
|
0
|
push(@fields, "N$opt_ne") if (defined $opt_ne); |
|
433
|
|
|
|
|
|
|
#my $field = 'N50'; |
|
434
|
|
|
|
|
|
|
|
|
435
|
0
|
|
|
|
|
0
|
foreach my $r ( sort $sort_function keys %output_object ) { |
|
436
|
0
|
|
|
|
|
0
|
my @array; |
|
437
|
0
|
|
|
|
|
0
|
push( @array, $r ); |
|
438
|
0
|
|
|
|
|
0
|
for ( my $i = 1 ; $i <= $#fields ; $i++ ) { |
|
439
|
0
|
0
|
|
|
|
0
|
if ( defined $output_print{$r}{ $fields[$i] } ) { |
|
440
|
0
|
|
|
|
|
0
|
push( @array, $output_print{$r}{ $fields[$i] } ) |
|
441
|
|
|
|
|
|
|
} else { |
|
442
|
0
|
|
|
|
|
0
|
say Dumper $output_print{$r}; |
|
443
|
0
|
|
|
|
|
0
|
die "$fields[$i] not defined"; |
|
444
|
|
|
|
|
|
|
} |
|
445
|
|
|
|
|
|
|
} |
|
446
|
|
|
|
|
|
|
|
|
447
|
0
|
|
|
|
|
0
|
$t->addRow(@array); |
|
448
|
|
|
|
|
|
|
} |
|
449
|
|
|
|
|
|
|
|
|
450
|
0
|
|
|
|
|
0
|
print $t; |
|
451
|
|
|
|
|
|
|
} |
|
452
|
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
# Print debug information |
|
454
|
|
|
|
|
|
|
sub debug { |
|
455
|
4
|
50
|
|
4
|
|
8
|
return unless defined $opt_debug; |
|
456
|
0
|
|
|
|
|
0
|
my ( $message, $title ) = @_; |
|
457
|
0
|
0
|
|
|
|
0
|
$title = 'INFO' unless defined $title; |
|
458
|
0
|
|
|
|
|
0
|
$title = uc($title); |
|
459
|
0
|
|
|
|
|
0
|
printMessage( $message, $title, 'green', 'yellow' ); |
|
460
|
0
|
|
|
|
|
0
|
return 1; |
|
461
|
|
|
|
|
|
|
} |
|
462
|
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
# Print message with colors unless --nocolor |
|
464
|
|
|
|
|
|
|
sub printMessage { |
|
465
|
3
|
|
|
3
|
|
6
|
my ( $message, $title, $title_color, $message_color ) = @_; |
|
466
|
|
|
|
|
|
|
|
|
467
|
3
|
100
|
|
|
|
6
|
if (! defined $title_color) { |
|
|
|
50
|
|
|
|
|
|
|
468
|
2
|
|
|
|
|
3
|
$title_color = 'reset'; |
|
469
|
|
|
|
|
|
|
} elsif (! colorvalid($title_color)) { |
|
470
|
0
|
|
|
|
|
0
|
say STDERR "$title_color not valid title color"; |
|
471
|
0
|
|
|
|
|
0
|
$title_color = 'reset'; |
|
472
|
|
|
|
|
|
|
} |
|
473
|
|
|
|
|
|
|
|
|
474
|
3
|
50
|
|
|
|
19
|
if (! defined $message_color) { |
|
|
|
50
|
|
|
|
|
|
|
475
|
0
|
|
|
|
|
0
|
$message_color = 'reset'; |
|
476
|
|
|
|
|
|
|
} elsif (! colorvalid($message_color)) { |
|
477
|
0
|
|
|
|
|
0
|
say STDERR "$message_color not valid title color"; |
|
478
|
0
|
|
|
|
|
0
|
$message_color = 'reset'; |
|
479
|
|
|
|
|
|
|
} |
|
480
|
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
|
|
482
|
3
|
50
|
|
|
|
42
|
print STDERR colored( "$title", $title_color ), "\t" if ($title); |
|
483
|
3
|
|
|
|
|
10
|
say colored( "$message", $message_color ); |
|
484
|
3
|
|
|
|
|
85
|
return 1; |
|
485
|
|
|
|
|
|
|
} |
|
486
|
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
sub version { |
|
488
|
|
|
|
|
|
|
|
|
489
|
1
|
|
|
1
|
|
7
|
printMessage( "$program{NAME}, ver. $program{VERSION}", |
|
490
|
|
|
|
|
|
|
'', undef, 'bold blue' ); |
|
491
|
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
|
|
494
|
1
|
|
|
|
|
3
|
printMessage("Program to calculate N50 from multiple FASTA/FASTQ files,\n" . |
|
495
|
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
"https://metacpan.org/pod/distribution/Proch-N50/bin/n50", '', 'blue', 'blue' |
|
497
|
|
|
|
|
|
|
); |
|
498
|
1
|
|
|
|
|
3
|
printMessage("Using Proch::N50 $Proch::N50::VERSION", '', undef, 'red'); |
|
499
|
|
|
|
|
|
|
|
|
500
|
1
|
|
|
|
|
2
|
return $program{VERSION}; |
|
501
|
|
|
|
|
|
|
} |
|
502
|
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
# Calculate N50 from a hash of contig lengths and their counts |
|
504
|
|
|
|
|
|
|
sub n50fromHash { |
|
505
|
0
|
|
|
0
|
|
|
my ( $hash_ref, $total ) = @_; |
|
506
|
0
|
|
|
|
|
|
my $tlen = 0; |
|
507
|
0
|
|
|
|
|
|
foreach my $s ( sort { $a <=> $b } keys %{$hash_ref} ) { |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
508
|
0
|
|
|
|
|
|
$tlen += $s * ${$hash_ref}{$s}; |
|
|
0
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
# In my original implementation it was >=, here > to comply with 'seqkit' |
|
511
|
0
|
0
|
|
|
|
|
return $s if ( $tlen > ( $total / 2 ) ); |
|
512
|
|
|
|
|
|
|
} |
|
513
|
0
|
|
|
|
|
|
return 0; |
|
514
|
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
} |
|
516
|
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
sub thousands { |
|
518
|
0
|
|
|
0
|
|
|
my ($number) = @_; |
|
519
|
0
|
|
|
|
|
|
my ($int, $dec) = split /\./, $number; |
|
520
|
0
|
|
|
|
|
|
$int =~ s/(\d{1,3}?)(?=(\d{3})+$)/$1,/g; |
|
521
|
0
|
0
|
|
|
|
|
$dec = $dec ? ".$dec" : ''; |
|
522
|
0
|
|
|
|
|
|
return "$int$dec"; |
|
523
|
|
|
|
|
|
|
} |
|
524
|
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
# Heng Li's subroutine (edited) |
|
526
|
|
|
|
|
|
|
sub readfq { |
|
527
|
0
|
|
|
0
|
|
|
my ( $fh, $aux ) = @_; |
|
528
|
0
|
0
|
|
|
|
|
@$aux = [ undef, 0 ] if ( !(@$aux) ); |
|
529
|
0
|
0
|
|
|
|
|
return if ( $aux->[1] ); |
|
530
|
0
|
0
|
|
|
|
|
if ( !defined( $aux->[0] ) ) { |
|
531
|
0
|
|
|
|
|
|
while (<$fh>) { |
|
532
|
0
|
|
|
|
|
|
chomp; |
|
533
|
0
|
0
|
0
|
|
|
|
if ( substr( $_, 0, 1 ) eq '>' || substr( $_, 0, 1 ) eq '@' ) { |
|
534
|
0
|
|
|
|
|
|
$aux->[0] = $_; |
|
535
|
0
|
|
|
|
|
|
last; |
|
536
|
|
|
|
|
|
|
} |
|
537
|
|
|
|
|
|
|
} |
|
538
|
0
|
0
|
|
|
|
|
if ( !defined( $aux->[0] ) ) { |
|
539
|
0
|
|
|
|
|
|
$aux->[1] = 1; |
|
540
|
0
|
|
|
|
|
|
return; |
|
541
|
|
|
|
|
|
|
} |
|
542
|
|
|
|
|
|
|
} |
|
543
|
|
|
|
|
|
|
|
|
544
|
0
|
|
|
|
|
|
my $name = ''; |
|
545
|
0
|
0
|
|
|
|
|
if ( defined $_ ) { |
|
546
|
0
|
0
|
|
|
|
|
$name = /^.(\S+)/ ? $1 : ''; |
|
547
|
|
|
|
|
|
|
} |
|
548
|
|
|
|
|
|
|
|
|
549
|
0
|
|
|
|
|
|
my $seq = ''; |
|
550
|
0
|
|
|
|
|
|
my $c; |
|
551
|
0
|
|
|
|
|
|
$aux->[0] = undef; |
|
552
|
0
|
|
|
|
|
|
while (<$fh>) { |
|
553
|
0
|
|
|
|
|
|
chomp; |
|
554
|
0
|
|
|
|
|
|
$c = substr( $_, 0, 1 ); |
|
555
|
0
|
0
|
0
|
|
|
|
last if ( $c eq '>' || $c eq '@' || $c eq '+' ); |
|
|
|
|
0
|
|
|
|
|
|
556
|
0
|
|
|
|
|
|
$seq .= $_; |
|
557
|
|
|
|
|
|
|
} |
|
558
|
0
|
|
|
|
|
|
$aux->[0] = $_; |
|
559
|
0
|
0
|
|
|
|
|
$aux->[1] = 1 if ( !defined( $aux->[0] ) ); |
|
560
|
0
|
0
|
|
|
|
|
return ( $name, $seq ) if ( $c ne '+' ); |
|
561
|
0
|
|
|
|
|
|
my $qual = ''; |
|
562
|
0
|
|
|
|
|
|
while (<$fh>) { |
|
563
|
0
|
|
|
|
|
|
chomp; |
|
564
|
0
|
|
|
|
|
|
$qual .= $_; |
|
565
|
0
|
0
|
|
|
|
|
if ( length($qual) >= length($seq) ) { |
|
566
|
0
|
|
|
|
|
|
$aux->[0] = undef; |
|
567
|
0
|
|
|
|
|
|
return ( $name, $seq, $qual ); |
|
568
|
|
|
|
|
|
|
} |
|
569
|
|
|
|
|
|
|
} |
|
570
|
0
|
|
|
|
|
|
$aux->[1] = 1; |
|
571
|
0
|
|
|
|
|
|
return ( $name, $seq ); |
|
572
|
|
|
|
|
|
|
} |
|
573
|
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
__END__ |