line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# -*-CPerl-*- |
2
|
|
|
|
|
|
|
# Last changed Time-stamp: <2015-02-06 16:29:26 mtw> |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
package Bio::ViennaNGS::UCSC; |
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
1377
|
use Exporter; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
47
|
|
7
|
1
|
|
|
1
|
|
7
|
use version; our $VERSION = qv('0.12_15'); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
7
|
|
8
|
1
|
|
|
1
|
|
88
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
35
|
|
9
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
29
|
|
10
|
1
|
|
|
1
|
|
732
|
use Template; |
|
1
|
|
|
|
|
24690
|
|
|
1
|
|
|
|
|
38
|
|
11
|
1
|
|
|
1
|
|
11
|
use Cwd; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
82
|
|
12
|
1
|
|
|
1
|
|
6
|
use File::Basename; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
72
|
|
13
|
1
|
|
|
1
|
|
7
|
use IPC::Cmd qw(can_run run); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
65
|
|
14
|
1
|
|
|
1
|
|
6
|
use File::Share ':all'; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
208
|
|
15
|
1
|
|
|
1
|
|
7
|
use Path::Class; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
72
|
|
16
|
1
|
|
|
1
|
|
6
|
use Data::Dumper; |
|
1
|
|
|
|
|
121
|
|
|
1
|
|
|
|
|
67
|
|
17
|
1
|
|
|
1
|
|
7
|
use Carp; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
71
|
|
18
|
1
|
|
|
1
|
|
6
|
use Bio::ViennaNGS::Util qw(fetch_chrom_sizes); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
6403
|
|
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
our @EXPORT_OK = qw( make_assembly_hub make_track_hub ); |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
our @EXPORT = (); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^# |
27
|
|
|
|
|
|
|
#^^^^^^^^^^^ Subroutines ^^^^^^^^^^# |
28
|
|
|
|
|
|
|
#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^# |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
sub make_assembly_hub{ |
31
|
0
|
|
|
0
|
1
|
|
my ($fasta_path, $filesdir, $basedir, $baseURL, $big_wig_ids, $log) = @_; |
32
|
0
|
|
|
|
|
|
my ($basename,$dir,$ext); |
33
|
0
|
|
|
|
|
|
my $this_function = (caller(0))[3]; |
34
|
|
|
|
|
|
|
#check arguments |
35
|
0
|
0
|
|
|
|
|
croak ("ERROR [$this_function] \$fasta_path does not exist\n") |
36
|
|
|
|
|
|
|
unless (-e $fasta_path); |
37
|
0
|
0
|
|
|
|
|
croak ("ERROR [$this_function] \$basedir does not exist\n") |
38
|
|
|
|
|
|
|
unless (-d $basedir); |
39
|
0
|
0
|
|
|
|
|
croak ("ERROR [$this_function]: no URL (network location for upload to UCSC) provided") |
40
|
|
|
|
|
|
|
unless(defined $baseURL); |
41
|
|
|
|
|
|
|
|
42
|
0
|
0
|
|
|
|
|
if (defined $log){ |
43
|
0
|
0
|
|
|
|
|
open(LOG, ">>", $log) or croak "$!"; |
44
|
|
|
|
|
|
|
} |
45
|
|
|
|
|
|
|
|
46
|
0
|
0
|
|
|
|
|
unless ($baseURL =~ /\/$/) { $baseURL .= "/"; } |
|
0
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
|
48
|
0
|
|
|
|
|
|
my $tmp_path = dist_file('Bio-ViennaNGS', "hub.txt" ); |
49
|
0
|
|
|
|
|
|
($basename,$dir,$ext) = fileparse($tmp_path,qr/\..*/); |
50
|
0
|
|
|
|
|
|
my $template_path = dir($dir,"template-AssemblyHub"); |
51
|
|
|
|
|
|
|
|
52
|
0
|
0
|
|
|
|
|
croak ("ERROR [$this_function] template directory not found\n") |
53
|
|
|
|
|
|
|
unless (-d $template_path); |
54
|
0
|
0
|
|
|
|
|
my $faToTwoBit = can_run('faToTwoBit') or |
55
|
|
|
|
|
|
|
croak ("ERROR [$this_function] faToTwoBit is not installed!"); |
56
|
|
|
|
|
|
|
|
57
|
0
|
0
|
|
|
|
|
my $bedToBigBed = can_run('bedToBigBed') or |
58
|
|
|
|
|
|
|
croak ("ERROR [$this_function] bedToBigBed is not installed!"); |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
# bedfiles path |
61
|
0
|
|
|
|
|
|
my $parsedHeaderRef = parse_fasta_header($fasta_path,$this_function); |
62
|
0
|
|
|
|
|
|
my @parsedHeader = @$parsedHeaderRef; |
63
|
0
|
|
|
|
|
|
my $unchecked_accession = $parsedHeader[0]; |
64
|
0
|
|
|
|
|
|
my $scientificName = $parsedHeader[1]; |
65
|
0
|
|
|
|
|
|
my $accession = valid_ncbi_accession($unchecked_accession); |
66
|
|
|
|
|
|
|
# create assembly hub directory structure |
67
|
0
|
|
|
|
|
|
my $assembly_hub_name = "assemblyHub"; |
68
|
0
|
|
|
|
|
|
my $assembly_hub_directory = dir($basedir, $assembly_hub_name); |
69
|
0
|
|
|
|
|
|
my $genome_assembly_name = $accession; |
70
|
0
|
|
|
|
|
|
my $genome_assembly_directory = dir($assembly_hub_directory,$genome_assembly_name); |
71
|
0
|
|
|
|
|
|
mkdir $assembly_hub_directory; |
72
|
0
|
|
|
|
|
|
mkdir $genome_assembly_directory; |
73
|
0
|
0
|
|
|
|
|
if (defined $log){ |
74
|
0
|
|
|
|
|
|
print LOG "LOG Base directory: $assembly_hub_directory\n"; |
75
|
0
|
|
|
|
|
|
print LOG "LOG Assembly Hub directory: $genome_assembly_directory\n"; |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
#2-bit fasta file conversion |
79
|
0
|
|
|
|
|
|
my $fa_modified = file($assembly_hub_directory, $accession.".fa"); |
80
|
0
|
|
|
|
|
|
modify_fasta_header($fasta_path,$fa_modified,$accession); |
81
|
0
|
|
|
|
|
|
my $twoBit = file($genome_assembly_directory, $accession.".2bit"); |
82
|
0
|
|
|
|
|
|
my $fastaToTwobit_cmd = "$faToTwoBit $fa_modified $twoBit"; |
83
|
0
|
0
|
|
|
|
|
if (defined $log){ print LOG "LOG [$this_function] $fastaToTwobit_cmd\n";} |
|
0
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
|
85
|
0
|
|
|
|
|
|
my( $success, $error_message, $full_buf, $stdout_buf, $stderr_buf ) = |
86
|
|
|
|
|
|
|
run( command => $fastaToTwobit_cmd, verbose => 0 ); |
87
|
0
|
0
|
|
|
|
|
if( !$success ) { |
88
|
0
|
|
|
|
|
|
print STDERR "ERROR [$this_function] External command call unsuccessful\n"; |
89
|
0
|
|
|
|
|
|
print STDERR "ERROR: this is what the command printed:\n"; |
90
|
0
|
|
|
|
|
|
print join "", @$full_buf; |
91
|
0
|
|
|
|
|
|
croak $!; |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
#template definition |
95
|
0
|
|
|
|
|
|
my $template = Template->new({ |
96
|
|
|
|
|
|
|
INCLUDE_PATH => ["$template_path"], |
97
|
|
|
|
|
|
|
RELATIVE=>1, |
98
|
|
|
|
|
|
|
}); |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
#construct hub.txt |
101
|
0
|
|
|
|
|
|
my $hubtxt_path = file($assembly_hub_directory,"hub.txt")->stringify; |
102
|
0
|
|
|
|
|
|
my $hubtxt_file = "hub.txt"; |
103
|
0
|
|
|
|
|
|
my $hubtxt_vars = |
104
|
|
|
|
|
|
|
{ |
105
|
|
|
|
|
|
|
hubName => $accession, |
106
|
|
|
|
|
|
|
shortLabel => $accession, |
107
|
|
|
|
|
|
|
longLabel => $accession, |
108
|
|
|
|
|
|
|
genomesFile => "genome.txt", |
109
|
|
|
|
|
|
|
email => 'email', |
110
|
|
|
|
|
|
|
descriptionURL => "$baseURL" . "description.html" |
111
|
|
|
|
|
|
|
}; |
112
|
0
|
0
|
|
|
|
|
$template->process($hubtxt_file,$hubtxt_vars,"$hubtxt_path") || |
113
|
|
|
|
|
|
|
croak "Template process failed: ", $template->error(), "\n"; |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
#construct genome.txt |
116
|
0
|
|
|
|
|
|
my $genometxt_path = file($assembly_hub_directory, "genome.txt")->stringify; |
117
|
0
|
|
|
|
|
|
my $genometxt_file = "genome.txt"; |
118
|
0
|
|
|
|
|
|
my $genometxt_vars = |
119
|
|
|
|
|
|
|
{ |
120
|
|
|
|
|
|
|
genome => $accession, |
121
|
|
|
|
|
|
|
trackDb => file($accession, "trackDb.txt"), |
122
|
|
|
|
|
|
|
groups => file($accession, "groups.txt"), |
123
|
|
|
|
|
|
|
description => "$accession", |
124
|
|
|
|
|
|
|
twoBitPath => file($accession,$accession.".2bit"), |
125
|
|
|
|
|
|
|
organism => "organism", |
126
|
|
|
|
|
|
|
defaultPos => $accession, |
127
|
|
|
|
|
|
|
orderKey => "10", |
128
|
|
|
|
|
|
|
scientificName => "$scientificName", |
129
|
|
|
|
|
|
|
htmlPath => file($accession,"description.html") |
130
|
|
|
|
|
|
|
}; |
131
|
0
|
0
|
|
|
|
|
$template->process($genometxt_file,$genometxt_vars,$genometxt_path) or |
132
|
|
|
|
|
|
|
croak "Template process failed: ", $template->error(), "\n"; |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
#construct description.html |
135
|
0
|
|
|
|
|
|
my $description_html_path = file($genome_assembly_directory, "description.html")->stringify; |
136
|
0
|
|
|
|
|
|
my $description_html_file = "description.html"; |
137
|
0
|
|
|
|
|
|
my $description_html_vars = |
138
|
|
|
|
|
|
|
{ |
139
|
|
|
|
|
|
|
imageLink => "imageLink", |
140
|
|
|
|
|
|
|
imageSource => "imageSource", |
141
|
|
|
|
|
|
|
imageAlternative => "imageAlternative", |
142
|
|
|
|
|
|
|
taxonomicName => "taxonomicName", |
143
|
|
|
|
|
|
|
imageOrigin => "imageOrigin", |
144
|
|
|
|
|
|
|
imageOriginDescription => "imageOriginDescription", |
145
|
|
|
|
|
|
|
ucscId => "ucscId", |
146
|
|
|
|
|
|
|
sequencingId => "sequencingId", |
147
|
|
|
|
|
|
|
assemblyDate => "assemblyDate", |
148
|
|
|
|
|
|
|
genbankAccessionId => "genbankAccessionId", |
149
|
|
|
|
|
|
|
ncbiGenomeInformationLink => "ncbiGenomeInformationLink", |
150
|
|
|
|
|
|
|
ncbiGenomeInformationDescription => "ncbiGenomeInformationDescription", |
151
|
|
|
|
|
|
|
ncbiAssemblyInformationLink => "ncbiAssemblyInformationLink", |
152
|
|
|
|
|
|
|
ncbiAssemblyInformationDescription => "ncbiAssemblyInformationDescription", |
153
|
|
|
|
|
|
|
bioProjectInformationLink => "bioProjectInformationLink", |
154
|
|
|
|
|
|
|
bioProjectInformationDescription => "bioProjectInformationDescription", |
155
|
|
|
|
|
|
|
sequenceAnnotationLink => "sequenceAnnotationLink" |
156
|
|
|
|
|
|
|
}; |
157
|
0
|
0
|
|
|
|
|
$template->process($description_html_file,$description_html_vars,$description_html_path) or |
158
|
|
|
|
|
|
|
croak "Template process failed: ", $template->error(), "\n"; |
159
|
|
|
|
|
|
|
|
160
|
0
|
|
|
|
|
|
my $groups = make_group("annotation", "Annotation", "1", "0"); |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
#construct group.txt |
163
|
0
|
|
|
|
|
|
my $group_txt_path = file($genome_assembly_directory, "groups.txt")->stringify; |
164
|
0
|
|
|
|
|
|
my $group_txt_file = 'groups.txt'; |
165
|
0
|
|
|
|
|
|
my $group_txt_vars = |
166
|
|
|
|
|
|
|
{ |
167
|
|
|
|
|
|
|
groups => "$groups", |
168
|
|
|
|
|
|
|
}; |
169
|
0
|
0
|
|
|
|
|
$template->process($group_txt_file,$group_txt_vars,$group_txt_path) or |
170
|
|
|
|
|
|
|
croak "Template process failed: ", $template->error(), "\n"; |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
#construct big bed |
173
|
0
|
|
|
|
|
|
my $tracksList; |
174
|
|
|
|
|
|
|
#Bigbeds are only created from infolder |
175
|
0
|
0
|
|
|
|
|
unless($filesdir =~ /-/){ |
176
|
0
|
|
|
|
|
|
my $chromosome_size = retrieve_chromosome_size($fasta_path); |
177
|
0
|
|
|
|
|
|
my $chromosome_size_filepath = file($genome_assembly_directory,"$accession.chrom.sizes"); |
178
|
0
|
|
|
|
|
|
write_chromosome_size_file($chromosome_size_filepath,$accession,$chromosome_size); |
179
|
0
|
|
|
|
|
|
convert_tracks($filesdir, $genome_assembly_directory, $accession, $bedToBigBed, $chromosome_size_filepath); |
180
|
0
|
|
|
|
|
|
my @trackfiles = retrieve_tracks($genome_assembly_directory, $baseURL, $assembly_hub_name, $accession); |
181
|
|
|
|
|
|
|
|
182
|
0
|
|
|
|
|
|
foreach my $track (@trackfiles){ |
183
|
0
|
|
|
|
|
|
my $trackString = make_track(@$track); |
184
|
0
|
|
|
|
|
|
$tracksList .= $trackString; |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
} |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
#big wigs |
189
|
0
|
|
|
|
|
|
my $bigwig_tracks_string = ""; |
190
|
0
|
0
|
|
|
|
|
unless($big_wig_ids=~/^-$/){ |
191
|
0
|
|
|
|
|
|
$bigwig_tracks_string = retrieve_bigwig_tracks($genome_assembly_directory, $baseURL, $assembly_hub_name, $accession, $big_wig_ids); |
192
|
|
|
|
|
|
|
} |
193
|
0
|
|
|
|
|
|
$tracksList .= $bigwig_tracks_string; |
194
|
|
|
|
|
|
|
#construct trackDb.txt |
195
|
0
|
|
|
|
|
|
my $trackDb_txt_path = file($genome_assembly_directory, "trackDb.txt")->stringify; |
196
|
0
|
|
|
|
|
|
my $trackDb_txt_file = 'trackDb.txt'; |
197
|
0
|
|
|
|
|
|
my $trackDb_txt_vars = |
198
|
|
|
|
|
|
|
{ |
199
|
|
|
|
|
|
|
tracks => "$tracksList" |
200
|
|
|
|
|
|
|
}; |
201
|
0
|
0
|
|
|
|
|
$template->process($trackDb_txt_file,$trackDb_txt_vars,$trackDb_txt_path) or |
202
|
|
|
|
|
|
|
croak "Template process failed: ", $template->error(), "\n"; |
203
|
|
|
|
|
|
|
|
204
|
0
|
0
|
|
|
|
|
if (defined $log){ |
205
|
0
|
|
|
|
|
|
print LOG "LOG Assembly Hub created\n"; |
206
|
0
|
|
|
|
|
|
close(LOG); |
207
|
|
|
|
|
|
|
} |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
sub make_track_hub{ |
211
|
0
|
|
|
0
|
1
|
|
my ($species, $filesdir, $basedir, $baseURL, $chrom_sizes_file, $big_wig_ids, $log) = @_; |
212
|
0
|
|
|
|
|
|
my ($basename,$dir,$ext); |
213
|
0
|
|
|
|
|
|
my $this_function = (caller(0))[3]; |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
#check arguments |
216
|
0
|
0
|
|
|
|
|
croak ("ERROR [$this_function] \no species provided\n") |
217
|
|
|
|
|
|
|
unless ($species); |
218
|
0
|
0
|
|
|
|
|
croak ("ERROR [$this_function] \$basedir does not exist\n") |
219
|
|
|
|
|
|
|
unless (-d $basedir); |
220
|
0
|
0
|
|
|
|
|
croak ("ERROR [$this_function]: no URL (network location for upload to UCSC) provided") |
221
|
|
|
|
|
|
|
unless(defined $baseURL); |
222
|
|
|
|
|
|
|
|
223
|
0
|
0
|
|
|
|
|
if (defined $log){ |
224
|
0
|
0
|
|
|
|
|
open(LOG, ">>", $log) or croak "$!"; |
225
|
|
|
|
|
|
|
} |
226
|
|
|
|
|
|
|
|
227
|
0
|
0
|
|
|
|
|
unless ($baseURL =~ /\/$/) { $baseURL .= "/"; } |
|
0
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
|
229
|
0
|
|
|
|
|
|
my $tmp_path = dist_file('Bio-ViennaNGS', "hub.txt" ); |
230
|
0
|
|
|
|
|
|
($basename,$dir,$ext) = fileparse($tmp_path,qr/\..*/); |
231
|
0
|
|
|
|
|
|
my $template_path = dir($dir,"template-TrackHub"); |
232
|
|
|
|
|
|
|
|
233
|
0
|
0
|
|
|
|
|
croak ("ERROR [$this_function] template directory not found\n") |
234
|
|
|
|
|
|
|
unless (-d $template_path); |
235
|
0
|
0
|
|
|
|
|
my $faToTwoBit = can_run('faToTwoBit') or |
236
|
|
|
|
|
|
|
croak ("ERROR [$this_function] faToTwoBit is not installed!"); |
237
|
|
|
|
|
|
|
|
238
|
0
|
0
|
|
|
|
|
my $bedToBigBed = can_run('bedToBigBed') or |
239
|
|
|
|
|
|
|
croak ("ERROR [$this_function] bedToBigBed is not installed!"); |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
# create track hub directory structure |
242
|
0
|
|
|
|
|
|
my $track_hub_name = "trackHub"; |
243
|
0
|
|
|
|
|
|
my $track_hub_directory = dir($basedir, $track_hub_name); |
244
|
0
|
|
|
|
|
|
my $genome_assembly_name = $species; |
245
|
0
|
|
|
|
|
|
my $genome_assembly_directory = dir($track_hub_directory,$genome_assembly_name); |
246
|
0
|
|
|
|
|
|
mkdir $track_hub_directory; |
247
|
0
|
|
|
|
|
|
mkdir $genome_assembly_directory; |
248
|
0
|
0
|
|
|
|
|
if (defined $log){ |
249
|
0
|
|
|
|
|
|
print LOG "LOG Base directory: $track_hub_directory\n"; |
250
|
0
|
|
|
|
|
|
print LOG "LOG Track Hub directory: $genome_assembly_directory\n"; |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
#template definition |
254
|
0
|
|
|
|
|
|
my $template = Template->new({ |
255
|
|
|
|
|
|
|
INCLUDE_PATH => ["$template_path"], |
256
|
|
|
|
|
|
|
RELATIVE=>1, |
257
|
|
|
|
|
|
|
}); |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
#construct hub.txt |
260
|
0
|
|
|
|
|
|
my $hubtxt_path = file($track_hub_directory,"hub.txt")->stringify; |
261
|
0
|
|
|
|
|
|
my $hubtxt_file = "hub.txt"; |
262
|
0
|
|
|
|
|
|
my $hubtxt_vars = |
263
|
|
|
|
|
|
|
{ |
264
|
|
|
|
|
|
|
hubName => $species, |
265
|
|
|
|
|
|
|
shortLabel => $species, |
266
|
|
|
|
|
|
|
longLabel => $species, |
267
|
|
|
|
|
|
|
genomesFile => "genome.txt", |
268
|
|
|
|
|
|
|
email => 'email', |
269
|
|
|
|
|
|
|
descriptionURL => "$baseURL" . "description.html" |
270
|
|
|
|
|
|
|
}; |
271
|
0
|
0
|
|
|
|
|
$template->process($hubtxt_file,$hubtxt_vars,"$hubtxt_path") || |
272
|
|
|
|
|
|
|
croak "Template process failed: ", $template->error(), "\n"; |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
#construct genome.txt |
275
|
0
|
|
|
|
|
|
my $genometxt_path = file($track_hub_directory, "genome.txt")->stringify; |
276
|
0
|
|
|
|
|
|
my $genometxt_file = "genome.txt"; |
277
|
0
|
|
|
|
|
|
my $genometxt_vars = |
278
|
|
|
|
|
|
|
{ |
279
|
|
|
|
|
|
|
genome => $species, |
280
|
|
|
|
|
|
|
trackDb => file($species, "trackDb.txt"), |
281
|
|
|
|
|
|
|
groups => file($species, "groups.txt"), |
282
|
|
|
|
|
|
|
description => "$species", |
283
|
|
|
|
|
|
|
twoBitPath => file($species,$species.".2bit"), |
284
|
|
|
|
|
|
|
organism => "organism", |
285
|
|
|
|
|
|
|
defaultPos => $species, |
286
|
|
|
|
|
|
|
orderKey => "10", |
287
|
|
|
|
|
|
|
scientificName => "scientificName", |
288
|
|
|
|
|
|
|
htmlPath => file($species,"description.html") |
289
|
|
|
|
|
|
|
}; |
290
|
0
|
0
|
|
|
|
|
$template->process($genometxt_file,$genometxt_vars,$genometxt_path) or |
291
|
|
|
|
|
|
|
croak "Template process failed: ", $template->error(), "\n"; |
292
|
|
|
|
|
|
|
|
293
|
0
|
|
|
|
|
|
my $tracksList; |
294
|
|
|
|
|
|
|
#Bigbeds are only created from infolder |
295
|
0
|
0
|
|
|
|
|
unless($filesdir =~ /-/){ |
296
|
0
|
0
|
|
|
|
|
if(-e $chrom_sizes_file){ |
297
|
0
|
|
|
|
|
|
convert_tracks($filesdir, $genome_assembly_directory, $species, $bedToBigBed, $chrom_sizes_file); |
298
|
|
|
|
|
|
|
}else{ |
299
|
0
|
|
|
|
|
|
my $chromosome_sizes = fetch_chrom_sizes($species); |
300
|
0
|
|
|
|
|
|
my $chromosome_size_filepath = file($genome_assembly_directory,"$species.chrom.sizes"); |
301
|
0
|
|
|
|
|
|
write_chromosome_sizes_file($chromosome_size_filepath,$chromosome_sizes); |
302
|
0
|
|
|
|
|
|
convert_tracks($filesdir, $genome_assembly_directory, $species, $bedToBigBed, $chromosome_size_filepath); |
303
|
|
|
|
|
|
|
} |
304
|
0
|
|
|
|
|
|
my @trackfiles = retrieve_tracks($genome_assembly_directory, $baseURL, $track_hub_name, $species); |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
#big beds |
307
|
0
|
|
|
|
|
|
foreach my $track (@trackfiles){ |
308
|
0
|
|
|
|
|
|
my $trackString = make_track(@$track); |
309
|
0
|
|
|
|
|
|
$tracksList .= $trackString; |
310
|
|
|
|
|
|
|
} |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
#big wigs |
313
|
0
|
|
|
|
|
|
my $bigwig_tracks_string = ""; |
314
|
0
|
0
|
|
|
|
|
unless($big_wig_ids=~/^-$/){ |
315
|
0
|
|
|
|
|
|
$bigwig_tracks_string = retrieve_bigwig_tracks($genome_assembly_directory, $baseURL, $track_hub_name, $species, $big_wig_ids); |
316
|
|
|
|
|
|
|
} |
317
|
0
|
|
|
|
|
|
$tracksList .= $bigwig_tracks_string; |
318
|
|
|
|
|
|
|
#construct trackDb.txt |
319
|
0
|
|
|
|
|
|
my $trackDb_txt_path = file($genome_assembly_directory, "trackDb.txt")->stringify; |
320
|
0
|
|
|
|
|
|
my $trackDb_txt_file = 'trackDb.txt'; |
321
|
0
|
|
|
|
|
|
my $trackDb_txt_vars = |
322
|
|
|
|
|
|
|
{ |
323
|
|
|
|
|
|
|
tracks => "$tracksList" |
324
|
|
|
|
|
|
|
}; |
325
|
0
|
0
|
|
|
|
|
$template->process($trackDb_txt_file,$trackDb_txt_vars,$trackDb_txt_path) or |
326
|
|
|
|
|
|
|
croak "Template process failed: ", $template->error(), "\n"; |
327
|
|
|
|
|
|
|
|
328
|
0
|
0
|
|
|
|
|
if (defined $log){ |
329
|
0
|
|
|
|
|
|
print LOG "LOG Track Hub created\n"; |
330
|
0
|
|
|
|
|
|
close(LOG); |
331
|
|
|
|
|
|
|
} |
332
|
|
|
|
|
|
|
} |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
sub convert_tracks{ |
335
|
0
|
|
|
0
|
0
|
|
my ($filesdir,$genome_assembly_directory,$accession,$bedToBigBed,$chromosome_size_filepath) = @_; |
336
|
0
|
|
|
|
|
|
my $currentDirectory = getcwd; |
337
|
0
|
0
|
|
|
|
|
chdir $filesdir or croak $!; |
338
|
0
|
|
|
|
|
|
my @bedfiles = <*.bed>; |
339
|
0
|
|
|
|
|
|
foreach my $bedfile (@bedfiles){ |
340
|
0
|
|
|
|
|
|
my $bed_file_path = file($filesdir,$bedfile); |
341
|
0
|
|
|
|
|
|
my $filename = $bedfile; |
342
|
0
|
|
|
|
|
|
$filename =~ s/.bed$//; |
343
|
0
|
|
|
|
|
|
my $bigbed_file = $filename . ".bb"; |
344
|
0
|
|
|
|
|
|
my $bigbed_file_path = file($genome_assembly_directory,$bigbed_file); |
345
|
0
|
|
|
|
|
|
`$bedToBigBed $bed_file_path $chromosome_size_filepath $bigbed_file_path`; |
346
|
|
|
|
|
|
|
} |
347
|
0
|
0
|
|
|
|
|
chdir $currentDirectory or croak $!; |
348
|
0
|
|
|
|
|
|
return 1; |
349
|
|
|
|
|
|
|
} |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
sub retrieve_tracks{ |
352
|
0
|
|
|
0
|
0
|
|
my ($directoryPath,$baseURL,$assembly_hub_name,$accession) = @_; |
353
|
0
|
|
|
|
|
|
my $currentDirectory = getcwd; |
354
|
0
|
0
|
|
|
|
|
chdir $directoryPath or croak $!; |
355
|
0
|
|
|
|
|
|
my @trackfiles = <*.bb>; |
356
|
0
|
|
|
|
|
|
my @tracks; |
357
|
0
|
|
|
|
|
|
my $counter = 0; |
358
|
0
|
|
|
|
|
|
foreach my $trackfile (@trackfiles){ |
359
|
0
|
|
|
|
|
|
my $filename = $trackfile; |
360
|
0
|
|
|
|
|
|
$filename =~ s/.bb$//; |
361
|
0
|
|
|
|
|
|
my @filenameSplit = split(/\./, $filename); |
362
|
0
|
|
|
|
|
|
my $fileaccession = $filenameSplit[0]; |
363
|
0
|
|
|
|
|
|
my $tag = $filenameSplit[1]; |
364
|
0
|
|
|
|
|
|
my $id = lc($tag); |
365
|
0
|
|
|
|
|
|
my $track = "refseq_" . $id; |
366
|
0
|
|
|
|
|
|
my $dir = dir($baseURL,$assembly_hub_name,$accession); |
367
|
|
|
|
|
|
|
#my $bigDataUrl = file($dir, $trackfile); |
368
|
0
|
|
|
|
|
|
my $bigDataUrl = file($trackfile); |
369
|
0
|
|
|
|
|
|
my $shortLabel = "RefSeq " . $tag; |
370
|
0
|
|
|
|
|
|
my $longLabel = "RefSeq " . $tag; |
371
|
0
|
|
|
|
|
|
my $type = "bigBed 12 ."; |
372
|
0
|
|
|
|
|
|
my $autoScale = "off"; |
373
|
0
|
|
|
|
|
|
my $bedNameLabel = "Gene Id"; |
374
|
0
|
|
|
|
|
|
my $searchIndex = "name"; |
375
|
0
|
|
|
|
|
|
my $colorByStrand = retrieve_color($counter); |
376
|
0
|
|
|
|
|
|
my $visibility = "pack"; |
377
|
0
|
|
|
|
|
|
my $group = "annotation"; |
378
|
0
|
|
|
|
|
|
my $priority = "10"; |
379
|
0
|
|
|
|
|
|
my @track = ($tag,$track,$bigDataUrl,$shortLabel,$longLabel,$type,$autoScale,$bedNameLabel,$searchIndex,$colorByStrand,$visibility,$group,$priority); |
380
|
0
|
|
|
|
|
|
my $trackreference = \@track; |
381
|
0
|
|
|
|
|
|
push(@tracks, $trackreference); |
382
|
0
|
|
|
|
|
|
$counter++; |
383
|
|
|
|
|
|
|
} |
384
|
0
|
0
|
|
|
|
|
chdir $currentDirectory or croak $!; |
385
|
0
|
|
|
|
|
|
return @tracks; |
386
|
|
|
|
|
|
|
} |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
sub retrieve_bigwig_tracks{ |
390
|
0
|
|
|
0
|
0
|
|
my ($directoryPath,$baseURL,$assembly_hub_name,$accession,$bigwigpaths) = @_; |
391
|
0
|
|
|
|
|
|
my $currentDirectory = getcwd; |
392
|
0
|
0
|
|
|
|
|
chdir $directoryPath or croak $!; |
393
|
0
|
|
|
|
|
|
my @big_wig_array = split('\#', $bigwigpaths); |
394
|
0
|
|
|
|
|
|
my $bigwigtracks = ""; |
395
|
0
|
|
|
|
|
|
foreach my $bigwig_entry (@big_wig_array){ |
396
|
0
|
0
|
|
|
|
|
if($bigwig_entry=~/,/){ |
397
|
|
|
|
|
|
|
#multi bigwig container detected |
398
|
0
|
|
|
|
|
|
my @big_wig_pair = split(',', $bigwig_entry); |
399
|
0
|
|
|
|
|
|
my $pos_wig; |
400
|
|
|
|
|
|
|
my $neg_wig; |
401
|
0
|
0
|
|
|
|
|
if ($big_wig_pair[0] =~ /\.pos\./){ |
|
|
0
|
|
|
|
|
|
402
|
0
|
|
|
|
|
|
$pos_wig = $big_wig_pair[0]; |
403
|
|
|
|
|
|
|
}elsif($big_wig_pair[1] =~ /\.pos\./){ |
404
|
0
|
|
|
|
|
|
$pos_wig = $big_wig_pair[1]; |
405
|
|
|
|
|
|
|
}else{ |
406
|
0
|
|
|
|
|
|
my $this_function = (caller(0))[3]; |
407
|
0
|
|
|
|
|
|
croak ("ERROR [$this_function] \no positive big wig file for container provided\n"); |
408
|
|
|
|
|
|
|
} |
409
|
0
|
0
|
|
|
|
|
if ($big_wig_pair[0] =~ /\.neg\./){ |
|
|
0
|
|
|
|
|
|
410
|
0
|
|
|
|
|
|
$neg_wig = $big_wig_pair[0]; |
411
|
|
|
|
|
|
|
}elsif($big_wig_pair[1] =~ /\.neg\./){ |
412
|
0
|
|
|
|
|
|
$neg_wig = $big_wig_pair[1]; |
413
|
|
|
|
|
|
|
}else{ |
414
|
0
|
|
|
|
|
|
my $this_function = (caller(0))[3]; |
415
|
0
|
|
|
|
|
|
croak ("ERROR [$this_function] \no negative big wig file for container provided\n"); |
416
|
|
|
|
|
|
|
} |
417
|
0
|
|
|
|
|
|
my ($basename1,$dir1,$ext1) = fileparse($pos_wig,qr/\..*/); |
418
|
0
|
|
|
|
|
|
my ($basename2,$dir2,$ext2) = fileparse($pos_wig,qr/\..*/); |
419
|
|
|
|
|
|
|
#construct container |
420
|
0
|
|
|
|
|
|
my $id = lc($basename1); |
421
|
0
|
|
|
|
|
|
my $tag = $id; |
422
|
0
|
|
|
|
|
|
my $track = $id; |
423
|
0
|
|
|
|
|
|
my $shortLabel = $tag; |
424
|
0
|
|
|
|
|
|
my $longLabel = $tag; |
425
|
0
|
|
|
|
|
|
my $type = "bigWig"; |
426
|
0
|
|
|
|
|
|
my $autoScale = "on"; |
427
|
0
|
|
|
|
|
|
my $visibility = "full"; |
428
|
0
|
|
|
|
|
|
my $priority = "1500"; |
429
|
0
|
|
|
|
|
|
my $container_string = make_multi_bigwig_container_track($tag, $track, $shortLabel, $longLabel, $type, $autoScale, $visibility, $priority); |
430
|
0
|
|
|
|
|
|
$bigwigtracks .= $container_string; |
431
|
|
|
|
|
|
|
#construct positive track |
432
|
0
|
|
|
|
|
|
my $track1 = $track . "_pos"; |
433
|
0
|
|
|
|
|
|
my $bigDataUrl1 = $pos_wig; |
434
|
0
|
|
|
|
|
|
my $shortLabel1 = $track1; |
435
|
0
|
|
|
|
|
|
my $longLabel1 = $track1; |
436
|
0
|
|
|
|
|
|
my $type1 = "bigWig"; |
437
|
0
|
|
|
|
|
|
my $parent1 = $track; |
438
|
0
|
|
|
|
|
|
my $color1 = "133,154,0"; |
439
|
0
|
|
|
|
|
|
my $track1_string = make_bigwig_container_track($track1, $bigDataUrl1, $shortLabel1, $longLabel1, $type1, $parent1, $color1); |
440
|
0
|
|
|
|
|
|
$bigwigtracks .= $track1_string; |
441
|
|
|
|
|
|
|
#construct negative track |
442
|
0
|
|
|
|
|
|
my $track2 = $track . "_neg"; |
443
|
0
|
|
|
|
|
|
my $bigDataUrl2 = $neg_wig; |
444
|
0
|
|
|
|
|
|
my $shortLabel2 = $track2; |
445
|
0
|
|
|
|
|
|
my $longLabel2 = $track2; |
446
|
0
|
|
|
|
|
|
my $type2 = "bigWig"; |
447
|
0
|
|
|
|
|
|
my $parent2 = $track; |
448
|
0
|
|
|
|
|
|
my $color2 = "220,51,47"; |
449
|
0
|
|
|
|
|
|
my $track2_string = make_bigwig_container_track($track2, $bigDataUrl2, $shortLabel2, $longLabel2, $type2, $parent2, $color2); |
450
|
0
|
|
|
|
|
|
$bigwigtracks .= $track2_string; |
451
|
|
|
|
|
|
|
}else{ |
452
|
|
|
|
|
|
|
#construct single wig |
453
|
0
|
|
|
|
|
|
my ($basename,$dir,$ext) = fileparse($bigwig_entry,qr/\..*/); |
454
|
0
|
|
|
|
|
|
my $id = lc($basename); |
455
|
0
|
|
|
|
|
|
my $tag = $id; |
456
|
0
|
|
|
|
|
|
my $track = $id; |
457
|
0
|
|
|
|
|
|
my $bigDataUrl = $bigwig_entry; |
458
|
0
|
|
|
|
|
|
my $shortLabel = $tag; |
459
|
0
|
|
|
|
|
|
my $longLabel = $tag; |
460
|
0
|
|
|
|
|
|
my $type = "bigWig"; |
461
|
0
|
|
|
|
|
|
my $autoScale = "on"; |
462
|
0
|
|
|
|
|
|
my $visibility = "full"; |
463
|
0
|
|
|
|
|
|
my $priority = "1500"; |
464
|
0
|
|
|
|
|
|
my $color = "38,140,210"; |
465
|
0
|
|
|
|
|
|
my $track_string = make_bigwig_track($tag, $track, $bigDataUrl, $shortLabel, $longLabel, $type, $autoScale, $visibility, $priority, $color); |
466
|
0
|
|
|
|
|
|
$bigwigtracks .= $track_string; |
467
|
|
|
|
|
|
|
} |
468
|
|
|
|
|
|
|
} |
469
|
0
|
0
|
|
|
|
|
chdir $currentDirectory or croak $!; |
470
|
0
|
|
|
|
|
|
return $bigwigtracks; |
471
|
|
|
|
|
|
|
} |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
sub retrieve_color{ |
475
|
0
|
|
|
0
|
0
|
|
my $counter = shift; |
476
|
0
|
|
|
|
|
|
my $digitnumber = length($counter); |
477
|
0
|
|
|
|
|
|
my $colorcode; |
478
|
0
|
0
|
|
|
|
|
if($digitnumber>1){ |
479
|
0
|
|
|
|
|
|
$colorcode = $counter % 10; |
480
|
|
|
|
|
|
|
}else{ |
481
|
0
|
|
|
|
|
|
$colorcode = $counter; |
482
|
|
|
|
|
|
|
} |
483
|
0
|
|
|
|
|
|
my @color; |
484
|
|
|
|
|
|
|
#green |
485
|
0
|
|
|
|
|
|
$color[0] = "133,154,0 133,154,0"; |
486
|
|
|
|
|
|
|
#cyan |
487
|
0
|
|
|
|
|
|
$color[1] = "42,162,152 42,162,152"; |
488
|
|
|
|
|
|
|
#blue |
489
|
0
|
|
|
|
|
|
$color[2] = "38,140,210 38,140,210"; |
490
|
|
|
|
|
|
|
#violet |
491
|
0
|
|
|
|
|
|
$color[3] = "108,114,196 108,114,196"; |
492
|
|
|
|
|
|
|
#magenta |
493
|
0
|
|
|
|
|
|
$color[4] = "211,55,130 211,55,130"; |
494
|
|
|
|
|
|
|
#red |
495
|
0
|
|
|
|
|
|
$color[5] = "220,51,47 220,51,47"; |
496
|
|
|
|
|
|
|
#orange |
497
|
0
|
|
|
|
|
|
$color[6] = "203,76,22 203,76,22"; |
498
|
|
|
|
|
|
|
#yellow |
499
|
0
|
|
|
|
|
|
$color[7] = "181,138,0 181,138,0"; |
500
|
|
|
|
|
|
|
#black |
501
|
0
|
|
|
|
|
|
$color[8] = "0,44,54 0,44,54"; |
502
|
|
|
|
|
|
|
#grey |
503
|
0
|
|
|
|
|
|
$color[9] = "88,111,117 88,111,117"; |
504
|
|
|
|
|
|
|
|
505
|
0
|
|
|
|
|
|
return $color[$colorcode]; |
506
|
|
|
|
|
|
|
} |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
sub make_group{ |
509
|
0
|
|
|
0
|
0
|
|
my ($name, $label, $priority, $defaultIsClosed) = @_; |
510
|
0
|
|
|
|
|
|
my $group ="name $name\nlabel $label\npriority $priority\ndefaultIsClosed $defaultIsClosed\n"; |
511
|
0
|
|
|
|
|
|
return $group; |
512
|
|
|
|
|
|
|
} |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
sub make_track{ |
515
|
0
|
|
|
0
|
0
|
|
my ($tag, $track, $bigDataUrl, $shortLabel, $longLabel, $type, $autoScale, $bedNameLabel, $searchIndex, $colorByStrand, $visibility, $group, $priority) = @_; |
516
|
0
|
|
|
|
|
|
my $trackEntry ="#$tag\ntrack $track\nbigDataUrl $bigDataUrl\nshortLabel $shortLabel\nlongLabel $longLabel\ntype $type\nautoScale $autoScale\nbedNameLabel $bedNameLabel\nsearchIndex $searchIndex\ncolorByStrand $colorByStrand\nvisibility $visibility\ngroup $group\npriority $priority\n\n"; |
517
|
0
|
|
|
|
|
|
return $trackEntry; |
518
|
|
|
|
|
|
|
} |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
sub make_multi_bigwig_container_track{ |
521
|
0
|
|
|
0
|
0
|
|
my ($tag, $track, $shortLabel, $longLabel, $type, $autoScale, $visibility, $priority) = @_; |
522
|
0
|
|
|
|
|
|
my $trackEntry ="#$tag\ntrack $track\ncontainer multiWig\nnoInherit on\nshortLabel $shortLabel\nlongLabel $longLabel\ntype $type\nconfigureable on\nvisibility $visibility\naggregate transparentOverlay\nshowSubtrackColorOnUi on\nautoScale $autoScale\nwindowingFunction maximum\npriority $priority\nalwaysZero on\nyLineMark 0\nyLineOnOff on\nmaxHeightPixels 125:125:11\n\n"; |
523
|
0
|
|
|
|
|
|
return $trackEntry; |
524
|
|
|
|
|
|
|
} |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
sub make_bigwig_container_track{ |
527
|
0
|
|
|
0
|
0
|
|
my ($track, $bigDataUrl, $shortLabel, $longLabel, $type, $parent, $color) = @_; |
528
|
0
|
|
|
|
|
|
my $trackEntry = "track $track\nbigDataUrl $bigDataUrl\nshortLabel $shortLabel\nlongLabel $longLabel\ntype $type\nparent $parent\ncolor $color\n\n"; |
529
|
0
|
|
|
|
|
|
return $trackEntry; |
530
|
|
|
|
|
|
|
} |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
sub make_bigwig_track{ |
533
|
0
|
|
|
0
|
0
|
|
my ($tag, $track, $bigDataUrl, $shortLabel, $longLabel, $type, $autoScale, $visibility, $priority, $color) = @_; |
534
|
0
|
|
|
|
|
|
my $trackEntry ="#$tag\ntrack $track\nbigDataUrl $bigDataUrl\nshortLabel $shortLabel\nlongLabel $longLabel\ntype $type\nvisibility $visibility\nautoScale $autoScale\npriority $priority\nalwaysZero on\nyLineMark 0\nyLineOnOff on\nmaxHeightPixels 125:125:11\ncolor $color\n\n"; |
535
|
0
|
|
|
|
|
|
return $trackEntry; |
536
|
|
|
|
|
|
|
} |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
sub valid_ncbi_accession{ |
539
|
|
|
|
|
|
|
# receives a NCBI accession ID, with or without version number |
540
|
|
|
|
|
|
|
# returns NCBI accession ID without version number |
541
|
0
|
|
|
0
|
0
|
|
my $acc = shift; |
542
|
0
|
0
|
|
|
|
|
if ($acc =~ /^(N[CST]\_\d{6})\.\d+?$/){ |
|
|
0
|
|
|
|
|
|
543
|
0
|
|
|
|
|
|
return $acc; # NCBI accession ID without version |
544
|
|
|
|
|
|
|
} |
545
|
|
|
|
|
|
|
elsif ($acc =~ /^(N[CST]\_\d{6})$/){ |
546
|
0
|
|
|
|
|
|
return $1; # NCBI accession ID without version |
547
|
|
|
|
|
|
|
} |
548
|
|
|
|
|
|
|
else { |
549
|
0
|
|
|
|
|
|
return 0; |
550
|
|
|
|
|
|
|
} |
551
|
|
|
|
|
|
|
} |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
sub parse_fasta_header{ |
554
|
0
|
|
|
0
|
0
|
|
my $filepath = shift; |
555
|
0
|
|
|
|
|
|
my $this_function = shift; |
556
|
0
|
|
|
|
|
|
open my $file, '<', "$filepath"; |
557
|
0
|
|
|
|
|
|
my $fastaheader = <$file>; |
558
|
0
|
|
|
|
|
|
chomp $fastaheader; |
559
|
0
|
|
|
|
|
|
close $file; |
560
|
|
|
|
|
|
|
#>gi|556503834|ref|NC_000913.3| Escherichia coli str. K-12 substr. MG1655 |
561
|
0
|
0
|
|
|
|
|
if($fastaheader=~/^>gi/){ |
562
|
0
|
|
|
|
|
|
print LOG "#NCBI fasta header detected\n"; |
563
|
0
|
|
|
|
|
|
my @headerfields = split(/\|/, $fastaheader); |
564
|
0
|
|
|
|
|
|
my $accession = $headerfields[3]; |
565
|
0
|
|
|
|
|
|
my $scientificName = $headerfields[4]; |
566
|
0
|
|
|
|
|
|
my @ids; |
567
|
0
|
|
|
|
|
|
push(@ids,$accession); |
568
|
0
|
|
|
|
|
|
push(@ids,$scientificName); |
569
|
0
|
|
|
|
|
|
return \@ids; |
570
|
|
|
|
|
|
|
}else{ |
571
|
0
|
|
|
|
|
|
$fastaheader=~s/^>//; |
572
|
0
|
0
|
|
|
|
|
if(valid_ncbi_accession($fastaheader)){ |
573
|
0
|
|
|
|
|
|
print LOG "#Header contains just valid NCBI accession number\n"; |
574
|
0
|
|
|
|
|
|
my @ids; |
575
|
0
|
|
|
|
|
|
push(@ids,$fastaheader); |
576
|
0
|
|
|
|
|
|
push(@ids,"scientific name not set"); |
577
|
0
|
|
|
|
|
|
return \@ids; |
578
|
|
|
|
|
|
|
}else{ |
579
|
0
|
|
|
|
|
|
print LOG "#No valid accession/ ncbi header\n"; |
580
|
0
|
|
|
|
|
|
croak ("ERROR [$this_function] \$fasta_path does not contain a valid accession/ ncbi header\n"); |
581
|
|
|
|
|
|
|
} |
582
|
|
|
|
|
|
|
} |
583
|
|
|
|
|
|
|
} |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
sub write_chromosome_size_file{ |
586
|
0
|
|
|
0
|
0
|
|
my $filepath = shift; |
587
|
0
|
|
|
|
|
|
my $chromosome_name = shift; |
588
|
0
|
|
|
|
|
|
my $chromosome_size = shift; |
589
|
0
|
|
|
|
|
|
my $entry = $chromosome_name . "\t" . $chromosome_size . "\n"; |
590
|
0
|
|
|
|
|
|
open CHROMFILE, '>', "$filepath"; |
591
|
0
|
|
|
|
|
|
print CHROMFILE $entry; |
592
|
0
|
|
|
|
|
|
close CHROMFILE; |
593
|
0
|
|
|
|
|
|
return 1; |
594
|
|
|
|
|
|
|
} |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
sub write_chromosome_sizes_file{ |
597
|
0
|
|
|
0
|
0
|
|
my $filepath = shift; |
598
|
0
|
|
|
|
|
|
my $chromosome_sizes_reference = shift; |
599
|
0
|
|
|
|
|
|
my %chromosome_sizes = %{$chromosome_sizes_reference}; |
|
0
|
|
|
|
|
|
|
600
|
0
|
|
|
|
|
|
open CHROMFILE, '>', "$filepath"; |
601
|
0
|
|
|
|
|
|
foreach my $chromosome_name ( keys %chromosome_sizes){ |
602
|
0
|
|
|
|
|
|
my $chromosome_size = $chromosome_sizes{$chromosome_name}; |
603
|
0
|
|
|
|
|
|
my $entry = $chromosome_name . "\t" . $chromosome_size . "\n"; |
604
|
0
|
|
|
|
|
|
print CHROMFILE $entry; |
605
|
|
|
|
|
|
|
} |
606
|
0
|
|
|
|
|
|
close CHROMFILE; |
607
|
0
|
|
|
|
|
|
return 1; |
608
|
|
|
|
|
|
|
} |
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
sub retrieve_chromosome_size{ |
611
|
0
|
|
|
0
|
0
|
|
my $inputFilepath = shift; |
612
|
0
|
|
|
|
|
|
open INFILE, '<', $inputFilepath; |
613
|
0
|
|
|
|
|
|
my @newfasta; |
614
|
0
|
|
|
|
|
|
my $chromosome_size = 0; |
615
|
0
|
|
|
|
|
|
my $header_skipped = 0; |
616
|
0
|
|
|
|
|
|
while (<INFILE>) { |
617
|
0
|
0
|
|
|
|
|
if($header_skipped){ |
618
|
0
|
|
|
|
|
|
chomp; |
619
|
0
|
|
|
|
|
|
$chromosome_size += length($_); |
620
|
|
|
|
|
|
|
}else{ |
621
|
0
|
|
|
|
|
|
$header_skipped = 1; |
622
|
|
|
|
|
|
|
} |
623
|
|
|
|
|
|
|
} |
624
|
0
|
|
|
|
|
|
close INFILE; |
625
|
0
|
|
|
|
|
|
return $chromosome_size; |
626
|
|
|
|
|
|
|
} |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
sub modify_fasta_header{ |
629
|
0
|
|
|
0
|
0
|
|
my $inputFilepath = shift; |
630
|
0
|
|
|
|
|
|
my $outputFilepath = shift; |
631
|
0
|
|
|
|
|
|
my $header = shift; |
632
|
|
|
|
|
|
|
|
633
|
0
|
|
|
|
|
|
open INFILE, '<', "$inputFilepath"; |
634
|
0
|
|
|
|
|
|
my @newfasta; |
635
|
0
|
|
|
|
|
|
while (<INFILE>) { |
636
|
0
|
|
|
|
|
|
push(@newfasta, $_); |
637
|
|
|
|
|
|
|
} |
638
|
0
|
|
|
|
|
|
close INFILE; |
639
|
|
|
|
|
|
|
#@newfasta = @newfasta[ 1 .. $#newfasta ]; |
640
|
0
|
|
|
|
|
|
shift @newfasta; |
641
|
0
|
|
|
|
|
|
unshift(@newfasta,">".$header."\n"); |
642
|
|
|
|
|
|
|
|
643
|
0
|
|
|
|
|
|
open OUTFILE, '>', "$outputFilepath"; |
644
|
0
|
|
|
|
|
|
foreach my $line (@newfasta) { |
645
|
0
|
|
|
|
|
|
print OUTFILE $line; |
646
|
|
|
|
|
|
|
} |
647
|
0
|
|
|
|
|
|
close OUTFILE; |
648
|
0
|
|
|
|
|
|
return 1; |
649
|
|
|
|
|
|
|
} |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
1; |
652
|
|
|
|
|
|
|
__END__ |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
=head1 NAME |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
Bio::ViennaNGS::UCSC - Perl extension for easy UCSC Genome Browser |
657
|
|
|
|
|
|
|
integration. |
658
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
=head1 SYNOPSIS |
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
use Bio::ViennaNGS::UCSC; |
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
=head1 DESCRIPTION |
664
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
ViennaNGS::UCSC is a Perl extension for managing routine tasks with the |
666
|
|
|
|
|
|
|
UCSC Genome Browser. It comes with a set of utilities that serve as |
667
|
|
|
|
|
|
|
reference implementation of the routines implemented in the library. All |
668
|
|
|
|
|
|
|
utilities are located in the 'scripts' folder. |
669
|
|
|
|
|
|
|
The main functionality is provided by the make_assembly_hub function. |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
=head2 EXPORT |
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
Routines: |
674
|
|
|
|
|
|
|
make_assembly_hub, make_track_hub |
675
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
Variables: |
677
|
|
|
|
|
|
|
none |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
=head3 make_assembly_hub() |
680
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
Build assembly hubs for the UCSC genome browser. |
682
|
|
|
|
|
|
|
This function takes 4 parameters: |
683
|
|
|
|
|
|
|
1. absolute path of input fasta file(e.g. /home/user/input.fa) |
684
|
|
|
|
|
|
|
2. path to the ouput directory (e.g. /home/user/assemblyhubs/) |
685
|
|
|
|
|
|
|
3. base URL where the output folder will be placed for upload to the UCSC genome browser (e.g. http://www.foo.com/folder/) |
686
|
|
|
|
|
|
|
4. path for the log file (/home/user/logs/assemblyhubconstructionlog) |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
=head3 make_track_hub() |
689
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
Build track hubs for the UCSC genome browser. |
691
|
|
|
|
|
|
|
This function takes 4 parameters: |
692
|
|
|
|
|
|
|
1. chromosome id as used in existing ucsc assembly hub (e.g. chr1) |
693
|
|
|
|
|
|
|
2. path to the ouput directory (e.g. /home/user/assemblyhubs/) |
694
|
|
|
|
|
|
|
3. base URL where the output folder will be placed for upload to the UCSC genome browser (e.g. http://www.foo.com/folder/) |
695
|
|
|
|
|
|
|
4. path for the log file (/home/user/logs/assemblyhubconstructionlog) |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
=head1 SEE ALSO |
699
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
=over |
701
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
=item L<Bio::ViennaNGS> |
703
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
=back |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
=head1 AUTHORS |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
=over |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
=item Michael T. Wolfinger, E<lt>michael@wolfinger.euE<gt> |
711
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
=item Florian Eggenhofer, E<lt>florian.eggenhofer@univie.ac.atE<gt> |
713
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
=back |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
Copyright (C) 2015 Michael T. Wolfinger, E<lt>michael@wolfinger.euE<gt> |
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
721
|
|
|
|
|
|
|
it under the same terms as Perl itself, either Perl version 5.10.0 or, |
722
|
|
|
|
|
|
|
at your option, any later version of Perl 5 you may have available. |
723
|
|
|
|
|
|
|
|
724
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, |
725
|
|
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
726
|
|
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
=cut |