| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Bio::FASTASequence; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# ABSTRACT: Parsing sequence informations in FASTA format. |
|
4
|
|
|
|
|
|
|
|
|
5
|
1
|
|
|
1
|
|
23254
|
use 5.006; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
35
|
|
|
6
|
1
|
|
|
1
|
|
5
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
28
|
|
|
7
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
|
1
|
|
|
|
|
6
|
|
|
|
1
|
|
|
|
|
2605
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our $VERSION = '0.07'; |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
# |
|
12
|
|
|
|
|
|
|
# new creates a new instance of Bio::FASTASequence |
|
13
|
|
|
|
|
|
|
# |
|
14
|
|
|
|
|
|
|
sub new{ |
|
15
|
0
|
|
|
0
|
1
|
|
my ($class, $string_text) = @_; |
|
16
|
0
|
|
|
|
|
|
my ($description,$accession_nr); |
|
17
|
0
|
|
|
|
|
|
my %f_db_nr = (); |
|
18
|
0
|
0
|
|
|
|
|
unless($string_text){ |
|
19
|
0
|
|
|
|
|
|
$string_text = ""; |
|
20
|
|
|
|
|
|
|
} |
|
21
|
0
|
|
|
|
|
|
my $self = {}; |
|
22
|
0
|
|
|
|
|
|
bless($self,$class); |
|
23
|
0
|
|
|
|
|
|
my $sequencer; |
|
24
|
|
|
|
|
|
|
#print STDERR "Object doesn't contain fasta-sequence" if($string_text && !_is_fasta($string_text)); |
|
25
|
0
|
0
|
0
|
|
|
|
die "Object doesn't contain fasta-sequence" if($string_text && !_is_fasta($string_text)); |
|
26
|
0
|
0
|
|
|
|
|
if($string_text){ |
|
27
|
0
|
|
|
|
|
|
$string_text =~ s/^(\r?\n)+//; |
|
28
|
0
|
|
|
|
|
|
$string_text =~ s/^>+/>/; |
|
29
|
0
|
|
|
|
|
|
my ($description_line,$sequence) = split(/\n/,$string_text,2); |
|
30
|
0
|
|
|
|
|
|
$description_line =~ s/\s+$//; |
|
31
|
0
|
|
|
|
|
|
$description_line =~ s/\r?\n/\n/g; |
|
32
|
0
|
|
|
|
|
|
$sequence =~ s/\r?\n//g; |
|
33
|
0
|
0
|
|
|
|
|
unless($description_line =~ /^>/){ |
|
34
|
0
|
|
|
|
|
|
$self->{accession_nr} = ""; |
|
35
|
|
|
|
|
|
|
} |
|
36
|
|
|
|
|
|
|
else{ |
|
37
|
|
|
|
|
|
|
# parsing the description line |
|
38
|
0
|
0
|
0
|
|
|
|
if($description_line =~ /^>gi\|/){ |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
39
|
0
|
|
|
|
|
|
my ($gi,$number,$db); |
|
40
|
0
|
|
|
|
|
|
$description_line =~ s/^>//; |
|
41
|
0
|
|
|
|
|
|
($gi,$number,$db,$accession_nr,$description) = split(/\|/,$description_line); |
|
42
|
|
|
|
|
|
|
} |
|
43
|
|
|
|
|
|
|
elsif($description_line =~ /^>sp\|/ || $description_line =~ /^>sptrembl\|/){ |
|
44
|
0
|
|
|
|
|
|
$description_line =~ s/^>//; |
|
45
|
0
|
|
|
|
|
|
my $desc; |
|
46
|
0
|
|
|
|
|
|
($desc,$description) = split(/\s/,$description_line,2); |
|
47
|
0
|
|
|
|
|
|
$accession_nr = (split(/\|/,$desc))[1]; |
|
48
|
|
|
|
|
|
|
} |
|
49
|
|
|
|
|
|
|
elsif($description_line =~ /^>tr\|/){ |
|
50
|
0
|
|
|
|
|
|
$description_line =~ s/^>//; |
|
51
|
0
|
|
|
|
|
|
my $desc; |
|
52
|
0
|
|
|
|
|
|
($desc,$description) = split(/\s/,$description_line,2); |
|
53
|
0
|
|
|
|
|
|
$accession_nr = (split(/\|/,$desc))[1]; |
|
54
|
|
|
|
|
|
|
} |
|
55
|
|
|
|
|
|
|
elsif($description_line =~ /^>[XY\d+]/){ |
|
56
|
0
|
|
|
|
|
|
$description_line =~ s/>//; |
|
57
|
0
|
|
|
|
|
|
chomp $description_line; |
|
58
|
0
|
|
|
|
|
|
$description = (split(/\s/,$description_line,3))[-1]; |
|
59
|
0
|
|
|
|
|
|
$accession_nr = (split(/\s/,$description_line,3))[0]; |
|
60
|
|
|
|
|
|
|
} |
|
61
|
|
|
|
|
|
|
elsif($description_line =~ /^>[0-9A-Za-z_]+\s?/){ |
|
62
|
0
|
|
|
|
|
|
$description_line =~ s/^>//; |
|
63
|
|
|
|
|
|
|
#-------------------------------------------------# |
|
64
|
|
|
|
|
|
|
# IPI-Sequences # |
|
65
|
|
|
|
|
|
|
#-------------------------------------------------# |
|
66
|
0
|
0
|
|
|
|
|
if($description_line =~ /^IPI:/){ |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
# split only at first whitespace and take first element |
|
68
|
0
|
|
|
|
|
|
my $foreign_numbers = (split(/\s/,$description_line,2))[0]; |
|
69
|
0
|
|
|
|
|
|
$description = (split(/\s/,$description_line,3))[2]; |
|
70
|
0
|
|
|
|
|
|
my @foreign_acs = split(/\|/,$foreign_numbers); |
|
71
|
|
|
|
|
|
|
# cross-references to other databases |
|
72
|
0
|
|
|
|
|
|
foreach my $f_ac(@foreign_acs){ |
|
73
|
0
|
|
|
|
|
|
my ($key, $value) = split(/:/,$f_ac); |
|
74
|
0
|
|
|
|
|
|
$f_db_nr{$key} = $value; |
|
75
|
|
|
|
|
|
|
} |
|
76
|
0
|
0
|
|
|
|
|
unless($f_db_nr{'SWISS-PROT'}){ |
|
77
|
0
|
|
|
|
|
|
$f_db_nr{'SWISS-PROT'} = "NULL"; |
|
78
|
|
|
|
|
|
|
} |
|
79
|
0
|
0
|
|
|
|
|
unless($f_db_nr{'ENSEMBL'}){ |
|
80
|
0
|
|
|
|
|
|
$f_db_nr{'ENSEMBL'} = "NULL"; |
|
81
|
|
|
|
|
|
|
} |
|
82
|
0
|
0
|
|
|
|
|
unless($f_db_nr{'REFSEQ_XP'}){ |
|
83
|
0
|
|
|
|
|
|
$f_db_nr{'REFSEQ_XP'} = "NULL"; |
|
84
|
|
|
|
|
|
|
} |
|
85
|
0
|
0
|
|
|
|
|
unless($f_db_nr{'TREMBL'}){ |
|
86
|
0
|
|
|
|
|
|
$f_db_nr{'TREMBL'} = "NULL"; |
|
87
|
|
|
|
|
|
|
} |
|
88
|
0
|
|
|
|
|
|
$accession_nr = $f_db_nr{'IPI'}; |
|
89
|
0
|
|
|
|
|
|
delete $f_db_nr{IPI}; |
|
90
|
|
|
|
|
|
|
} |
|
91
|
|
|
|
|
|
|
#-----------------------------------------# |
|
92
|
|
|
|
|
|
|
# format begins with accession-nr # |
|
93
|
|
|
|
|
|
|
#-----------------------------------------# |
|
94
|
|
|
|
|
|
|
elsif($description_line =~ /^[A-Za-z][0-9][A-Z0-9a-z]{3}[0-9][\s\|]/){ |
|
95
|
0
|
0
|
|
|
|
|
if($description_line =~ /\|/){ |
|
96
|
0
|
|
|
|
|
|
($accession_nr, $description) = split(/\|/,$description_line,2); |
|
97
|
|
|
|
|
|
|
} |
|
98
|
|
|
|
|
|
|
else{ |
|
99
|
0
|
|
|
|
|
|
($accession_nr, $description) = split(/\s/,$description_line,2); |
|
100
|
|
|
|
|
|
|
} |
|
101
|
|
|
|
|
|
|
} |
|
102
|
|
|
|
|
|
|
elsif($description_line =~ /^[A-Za-z0-9_]+\s*?$/){ |
|
103
|
0
|
|
|
|
|
|
chomp $description_line; |
|
104
|
0
|
|
|
|
|
|
$accession_nr = $description_line; |
|
105
|
0
|
|
|
|
|
|
$description_line = ''; |
|
106
|
|
|
|
|
|
|
} |
|
107
|
|
|
|
|
|
|
else{ |
|
108
|
0
|
|
|
|
|
|
($accession_nr,$description)= split(/\s/,$description_line,2); |
|
109
|
|
|
|
|
|
|
} |
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
} |
|
112
|
|
|
|
|
|
|
|
|
113
|
0
|
|
|
|
|
|
$accession_nr =~ s/^>//; |
|
114
|
0
|
|
|
|
|
|
$accession_nr =~ s/[^\w\d]*?$//; |
|
115
|
0
|
|
|
|
|
|
$accession_nr =~ s/\.\d$//; |
|
116
|
0
|
|
|
|
|
|
$sequence =~ s/[^A-Z]//g; |
|
117
|
0
|
|
|
|
|
|
$sequencer = $sequence; |
|
118
|
|
|
|
|
|
|
} |
|
119
|
|
|
|
|
|
|
|
|
120
|
0
|
|
|
|
|
|
$self->{text} = $sequencer; |
|
121
|
0
|
|
|
|
|
|
$self->{accession_nr} = $accession_nr; |
|
122
|
0
|
|
|
|
|
|
$self->{description} = $description; |
|
123
|
0
|
|
|
|
|
|
$self->{seq_length} = length($sequencer); |
|
124
|
0
|
|
|
|
|
|
$self->{dbrefs} = \%f_db_nr; |
|
125
|
0
|
|
|
|
|
|
$self->{crc64} = $self->_crc64(); |
|
126
|
|
|
|
|
|
|
|
|
127
|
0
|
|
|
|
|
|
return $self; |
|
128
|
|
|
|
|
|
|
}#end new |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
# |
|
131
|
|
|
|
|
|
|
# getSequence returns the sequence itself. |
|
132
|
|
|
|
|
|
|
# |
|
133
|
|
|
|
|
|
|
sub getSequence{ |
|
134
|
0
|
|
|
0
|
1
|
|
my ($class) = @_; |
|
135
|
0
|
|
|
|
|
|
return $class->{text}; |
|
136
|
|
|
|
|
|
|
}# end getText |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
# |
|
139
|
|
|
|
|
|
|
# _is_fasta checks whether the given Sequence is in fasta-format or not |
|
140
|
|
|
|
|
|
|
# |
|
141
|
|
|
|
|
|
|
sub _is_fasta{ |
|
142
|
0
|
|
|
0
|
|
|
my ($sequence) = @_; |
|
143
|
0
|
|
|
|
|
|
my @lines = split(/\r?\n/,$sequence); |
|
144
|
0
|
|
|
|
|
|
my $desc = shift(@lines); |
|
145
|
0
|
|
|
|
|
|
my $seq = join("",@lines); |
|
146
|
0
|
|
|
|
|
|
$seq =~ s/\s+//g; |
|
147
|
0
|
0
|
0
|
|
|
|
if($desc =~ /^>/ && $seq !~ /[^A-NP-Z\*\-]/i && length($seq) > 0){ |
|
|
|
|
0
|
|
|
|
|
|
148
|
0
|
|
|
|
|
|
return 1; |
|
149
|
|
|
|
|
|
|
} |
|
150
|
0
|
|
|
|
|
|
return 0; |
|
151
|
|
|
|
|
|
|
}# end _is_fasta |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
# |
|
154
|
|
|
|
|
|
|
# getSequenceLength returns how man aminoacids the sequence contains |
|
155
|
|
|
|
|
|
|
# |
|
156
|
|
|
|
|
|
|
sub getSequenceLength{ |
|
157
|
0
|
|
|
0
|
1
|
|
my ($class) = @_; |
|
158
|
0
|
|
|
|
|
|
return $class->{seq_length}; |
|
159
|
|
|
|
|
|
|
}# end getSequenceLength |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
# |
|
162
|
|
|
|
|
|
|
# getAccessionNr returns the parsed accession number |
|
163
|
|
|
|
|
|
|
# |
|
164
|
|
|
|
|
|
|
sub getAccessionNr{ |
|
165
|
0
|
|
|
0
|
1
|
|
my ($class) = @_; |
|
166
|
0
|
|
|
|
|
|
return $class->{accession_nr}; |
|
167
|
|
|
|
|
|
|
}# end of getAccessionNr |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
# |
|
170
|
|
|
|
|
|
|
# getDescription returns the description |
|
171
|
|
|
|
|
|
|
# |
|
172
|
|
|
|
|
|
|
sub getDescription{ |
|
173
|
0
|
|
|
0
|
1
|
|
my ($class) = @_; |
|
174
|
0
|
|
|
|
|
|
return $class->{description} |
|
175
|
|
|
|
|
|
|
}# end getDescription |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
# |
|
178
|
|
|
|
|
|
|
# getCrc64 returns the crc64-checksum of the sequence |
|
179
|
|
|
|
|
|
|
# |
|
180
|
|
|
|
|
|
|
sub getCrc64{ |
|
181
|
0
|
|
|
0
|
1
|
|
my ($class) = @_; |
|
182
|
0
|
|
|
|
|
|
return $class->{crc64}; |
|
183
|
|
|
|
|
|
|
}# end getCrc64 |
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
# |
|
186
|
|
|
|
|
|
|
# getDBRefs returns an anonymous hash containing all references to foreign databases |
|
187
|
|
|
|
|
|
|
# |
|
188
|
|
|
|
|
|
|
sub getDBRefs{ |
|
189
|
0
|
|
|
0
|
1
|
|
my ($class) = @_; |
|
190
|
0
|
|
|
|
|
|
return $class->{dbrefs}; |
|
191
|
|
|
|
|
|
|
}# end getDBRefs |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
# |
|
194
|
|
|
|
|
|
|
# allIndexesOf returns a reference to an array containing all positions of the requested Substring |
|
195
|
|
|
|
|
|
|
# |
|
196
|
|
|
|
|
|
|
sub allIndexesOf{ |
|
197
|
0
|
|
|
0
|
1
|
|
my ($self,$search) = @_; |
|
198
|
0
|
|
|
|
|
|
my $i = 1; |
|
199
|
0
|
|
|
|
|
|
my $index = 0; |
|
200
|
0
|
|
|
|
|
|
my @indices = (); |
|
201
|
0
|
|
|
|
|
|
while($i != -1){ |
|
202
|
0
|
|
|
|
|
|
$index = index($self->{text},$search,$index); |
|
203
|
0
|
0
|
|
|
|
|
push(@indices,$index) unless ($index == -1); |
|
204
|
0
|
|
|
|
|
|
$i = $index; |
|
205
|
0
|
|
|
|
|
|
$index++; |
|
206
|
|
|
|
|
|
|
} |
|
207
|
0
|
|
|
|
|
|
return \@indices; |
|
208
|
|
|
|
|
|
|
}# end allIndicesOf |
|
209
|
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
# |
|
211
|
|
|
|
|
|
|
# _crc64 calculates the crc64-checksum of the sequence. It's the crc64-checksum like at swiss-prot |
|
212
|
|
|
|
|
|
|
# the code is mainly adapted from SWISS::CRC64 |
|
213
|
|
|
|
|
|
|
# |
|
214
|
|
|
|
|
|
|
sub _crc64 { |
|
215
|
0
|
|
|
0
|
|
|
my ($self) = @_; |
|
216
|
0
|
|
|
|
|
|
my $text = $self->{text}; |
|
217
|
1
|
|
|
1
|
|
9
|
use constant EXP => 0xd8000000; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
911
|
|
|
218
|
0
|
|
|
|
|
|
my @highCrcTable = 256; |
|
219
|
0
|
|
|
|
|
|
my @lowCrcTable = 256; |
|
220
|
0
|
|
|
|
|
|
my $initialized = (); |
|
221
|
0
|
|
|
|
|
|
my $low = 0; |
|
222
|
0
|
|
|
|
|
|
my $high = 0; |
|
223
|
|
|
|
|
|
|
|
|
224
|
0
|
0
|
|
|
|
|
unless($initialized) { |
|
225
|
0
|
|
|
|
|
|
$initialized = 1; |
|
226
|
0
|
|
|
|
|
|
for my $i(0..255) { |
|
227
|
0
|
|
|
|
|
|
my $low_part = $i; |
|
228
|
0
|
|
|
|
|
|
my $high_part = 0; |
|
229
|
0
|
|
|
|
|
|
for my $j(0..7) { |
|
230
|
0
|
|
|
|
|
|
my $flag = $low_part & 1; # rflag ist für alle ungeraden zahlen 1 |
|
231
|
0
|
|
|
|
|
|
$low_part >>= 1;# um ein bit nach rechts verschieben |
|
232
|
0
|
0
|
|
|
|
|
$low_part |= (1 << 31) if $high_part & 1; # bitweises oder mit 2147483648 (), wenn $parth ungerade |
|
233
|
0
|
|
|
|
|
|
$high_part >>= 1; # um ein bit nach rechtsverschieben |
|
234
|
0
|
0
|
|
|
|
|
$high_part ^= EXP if $flag; |
|
235
|
|
|
|
|
|
|
} |
|
236
|
0
|
|
|
|
|
|
$highCrcTable[$i] = $high_part; |
|
237
|
0
|
|
|
|
|
|
$lowCrcTable[$i] = $low_part; |
|
238
|
|
|
|
|
|
|
} |
|
239
|
|
|
|
|
|
|
} |
|
240
|
|
|
|
|
|
|
|
|
241
|
0
|
|
|
|
|
|
foreach (split '', $text) { |
|
242
|
0
|
|
|
|
|
|
my $shr = ($high & 0xFF) << 24; |
|
243
|
0
|
|
|
|
|
|
my $tmph = $high >> 8; |
|
244
|
0
|
|
|
|
|
|
my $tmpl = ($low >> 8) | $shr; |
|
245
|
0
|
|
|
|
|
|
my $index = ($low ^ (unpack "C", $_)) & 0xFF; |
|
246
|
0
|
|
|
|
|
|
$high = $tmph ^ $highCrcTable[$index]; |
|
247
|
0
|
|
|
|
|
|
$low = $tmpl ^ $lowCrcTable[$index]; |
|
248
|
|
|
|
|
|
|
} |
|
249
|
0
|
|
|
|
|
|
return sprintf("%08X%08X", $high, $low); |
|
250
|
|
|
|
|
|
|
}# end crc64 |
|
251
|
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
# |
|
253
|
|
|
|
|
|
|
# seq2file prints the sequence into a file in fasta-file |
|
254
|
|
|
|
|
|
|
# |
|
255
|
|
|
|
|
|
|
sub seq2file{ |
|
256
|
0
|
|
|
0
|
1
|
|
my ($self,$file,$args_ref) = @_; |
|
257
|
|
|
|
|
|
|
# open the file to write |
|
258
|
0
|
0
|
|
|
|
|
open(W_SEQUENCE,">$file") or die "Can't open $file: $!\n"; |
|
259
|
0
|
|
|
|
|
|
print W_SEQUENCE ">",$self->{accession_nr}; |
|
260
|
|
|
|
|
|
|
# add the references |
|
261
|
0
|
|
|
|
|
|
foreach my $dbkey(keys(%{$self->{dbrefs}})){ |
|
|
0
|
|
|
|
|
|
|
|
262
|
0
|
0
|
|
|
|
|
print W_SEQUENCE "|".$dbkey.":".$self->{dbrefs}->{$dbkey} if($self->{dbrefs}->{$dbkey} ne 'NULL'); |
|
263
|
|
|
|
|
|
|
} |
|
264
|
|
|
|
|
|
|
# add description |
|
265
|
0
|
|
|
|
|
|
print W_SEQUENCE " ",$self->{description},"\n"; |
|
266
|
|
|
|
|
|
|
# add the sequence |
|
267
|
0
|
|
|
|
|
|
print W_SEQUENCE $self->{text},"\n"; |
|
268
|
0
|
|
|
|
|
|
close W_SEQUENCE; |
|
269
|
|
|
|
|
|
|
}# end seq2file |
|
270
|
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
# |
|
272
|
|
|
|
|
|
|
# getFASTA return a string in fasta-format |
|
273
|
|
|
|
|
|
|
# |
|
274
|
|
|
|
|
|
|
sub getFASTA{ |
|
275
|
0
|
|
|
0
|
1
|
|
my ($self) = @_; |
|
276
|
0
|
|
|
|
|
|
my $fasta = ">".$self->{accession_nr}; |
|
277
|
|
|
|
|
|
|
# add the references to foreign databases |
|
278
|
0
|
|
|
|
|
|
foreach my $dbkey(keys(%{$self->{dbrefs}})){ |
|
|
0
|
|
|
|
|
|
|
|
279
|
0
|
0
|
|
|
|
|
$fasta .= "|".$dbkey.":".$self->{dbrefs}->{$dbkey} if($self->{dbrefs}->{$dbkey} ne "NULL"); |
|
280
|
|
|
|
|
|
|
} |
|
281
|
|
|
|
|
|
|
# add description |
|
282
|
0
|
0
|
|
|
|
|
$fasta .= " ".$self->{description} if($self->{description}); |
|
283
|
0
|
|
|
|
|
|
$fasta .= "\n"; |
|
284
|
|
|
|
|
|
|
# add sequence |
|
285
|
0
|
|
|
|
|
|
$fasta .= $self->{text}."\n"; |
|
286
|
0
|
|
|
|
|
|
return $fasta; |
|
287
|
|
|
|
|
|
|
}# end getFASTA |
|
288
|
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
# |
|
290
|
|
|
|
|
|
|
# addDBRef adds a reference to a foreign database to the anonymous hash |
|
291
|
|
|
|
|
|
|
# |
|
292
|
|
|
|
|
|
|
sub addDBRef{ |
|
293
|
0
|
|
|
0
|
1
|
|
my ($self,$db,$dbref) = @_; |
|
294
|
|
|
|
|
|
|
# if a reference to the requested database already exists, append the new reference |
|
295
|
0
|
0
|
0
|
|
|
|
if($self->{dbrefs}->{$db} && ($self->{dbrefs}->{$db} ne 'NULL')){ |
|
296
|
0
|
|
|
|
|
|
$self->{dbrefs}->{$db} .= ";".$dbref; |
|
297
|
|
|
|
|
|
|
} |
|
298
|
|
|
|
|
|
|
# if no reference exists, add the reference to the hash |
|
299
|
|
|
|
|
|
|
else{ |
|
300
|
0
|
|
|
|
|
|
$self->{dbrefs}->{$db} = $dbref; |
|
301
|
|
|
|
|
|
|
} |
|
302
|
|
|
|
|
|
|
}# end addDBRef |
|
303
|
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
# |
|
305
|
|
|
|
|
|
|
# seq2xml creates an xml-string containing all information about the given sequence. |
|
306
|
|
|
|
|
|
|
# |
|
307
|
|
|
|
|
|
|
sub seq2xml{ |
|
308
|
0
|
|
|
0
|
1
|
|
my ($self) = @_; |
|
309
|
|
|
|
|
|
|
# create the tags representing the references to foreign databases, e.g. SWISS-Prot |
|
310
|
0
|
|
|
|
|
|
my $dbrefs = " "; |
|
311
|
0
|
|
|
|
|
|
foreach(keys(%{$self->{dbrefs}})){ |
|
|
0
|
|
|
|
|
|
|
|
312
|
0
|
|
|
|
|
|
my $key = uc($_); |
|
313
|
0
|
0
|
|
|
|
|
$dbrefs .= "\n<".$key.'>'.${$self->{dbrefs}}->{$_}.''.$key.'>' if(${$self->{dbrefs}}->{$_} ne 'NULL'); |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
} |
|
315
|
0
|
0
|
|
|
|
|
$dbrefs = "\n$dbrefs" if($dbrefs ne " "); |
|
316
|
|
|
|
|
|
|
# create the xml-string |
|
317
|
0
|
|
|
|
|
|
my $xml = qq~ |
|
318
|
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
$self->{description}$dbrefs |
|
320
|
|
|
|
|
|
|
$self->{seq_length} |
|
321
|
|
|
|
|
|
|
$self->{crc64} |
|
322
|
|
|
|
|
|
|
$self->{text} |
|
323
|
|
|
|
|
|
|
~; |
|
324
|
0
|
|
|
|
|
|
return $xml; |
|
325
|
|
|
|
|
|
|
}# end seq2xml |
|
326
|
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
1; |
|
328
|
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
__END__ |