| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package BioX::CLPM::Engine; |
|
2
|
1
|
|
|
1
|
|
5025
|
use base qw(BioX::CLPM::Base); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
81
|
|
|
3
|
|
|
|
|
|
|
use BioX::CLPM::Sequence; |
|
4
|
|
|
|
|
|
|
use BioX::CLPM::Enzyme; |
|
5
|
|
|
|
|
|
|
use BioX::CLPM::Linker; |
|
6
|
|
|
|
|
|
|
use BioX::CLPM::Fragments; |
|
7
|
|
|
|
|
|
|
use Bio::Perl qw(read_sequence); |
|
8
|
|
|
|
|
|
|
use Class::Std; |
|
9
|
|
|
|
|
|
|
use Class::Std::Utils; |
|
10
|
|
|
|
|
|
|
use Switch; |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
use warnings; |
|
13
|
|
|
|
|
|
|
use strict; |
|
14
|
|
|
|
|
|
|
use Carp; |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
use version; our $VERSION = qv('0.0.1'); |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
{ |
|
19
|
|
|
|
|
|
|
my %sequences_of :ATTR( :get :set :default<[]> :init_arg ); |
|
20
|
|
|
|
|
|
|
my %enzyme_of :ATTR( :get :set :default<''> :init_arg ); |
|
21
|
|
|
|
|
|
|
my %linker_of :ATTR( :get :set :default<''> :init_arg ); |
|
22
|
|
|
|
|
|
|
my %peaks_of :ATTR( :get :set :default<''> :init_arg ); |
|
23
|
|
|
|
|
|
|
my %matches_of :ATTR( :get :set :default<''> :init_arg ); |
|
24
|
|
|
|
|
|
|
my %fragments_of :ATTR( :get :set :default<''> :init_arg ); |
|
25
|
|
|
|
|
|
|
my %tolerance_of :ATTR( :get :set :default<''> :init_arg ); |
|
26
|
|
|
|
|
|
|
my %missed_clvg_of :ATTR( :get :set :default<''> :init_arg ); |
|
27
|
|
|
|
|
|
|
my %var_mod_of :ATTR( :get :set :default<''> :init_arg ); |
|
28
|
|
|
|
|
|
|
my %stat_mod_of :ATTR( :get :set :default<''> :init_arg ); |
|
29
|
|
|
|
|
|
|
#my %attribute_of :ATTR( :get :set :default<''> :init_arg ); |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# PRIV |
|
32
|
|
|
|
|
|
|
sub BUILD { |
|
33
|
|
|
|
|
|
|
my ( $self, $ident, $arg_ref ) = @_; |
|
34
|
|
|
|
|
|
|
$self->db_trunc(); |
|
35
|
|
|
|
|
|
|
return; |
|
36
|
|
|
|
|
|
|
} |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# PRIV |
|
39
|
|
|
|
|
|
|
sub START { |
|
40
|
|
|
|
|
|
|
my ( $self, $ident, $arg_ref ) = @_; |
|
41
|
|
|
|
|
|
|
if ( $arg_ref ) { $self->_run( $arg_ref ); } |
|
42
|
|
|
|
|
|
|
return; |
|
43
|
|
|
|
|
|
|
} |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
# PRIV |
|
46
|
|
|
|
|
|
|
sub _run { |
|
47
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
# Add or replace sequences from id or name |
|
50
|
|
|
|
|
|
|
my @sequences; |
|
51
|
|
|
|
|
|
|
if ( defined $arg_ref->{sequences} ) { |
|
52
|
|
|
|
|
|
|
if ( defined $arg_ref->{sequences}->{files} ) { |
|
53
|
|
|
|
|
|
|
foreach my $file ( @{ $arg_ref->{sequences}->{files} } ) { |
|
54
|
|
|
|
|
|
|
push @sequences, $self->sequence({ file => $file }); |
|
55
|
|
|
|
|
|
|
} |
|
56
|
|
|
|
|
|
|
} |
|
57
|
|
|
|
|
|
|
if ( defined $arg_ref->{sequences}->{ids} ) { |
|
58
|
|
|
|
|
|
|
foreach my $file ( @{ $arg_ref->{sequences}->{ids} } ) { |
|
59
|
|
|
|
|
|
|
#push @sequences, $self->sequence({ file => $arg_ref->{sequence_id} }); |
|
60
|
|
|
|
|
|
|
# TODO retrieve by id |
|
61
|
|
|
|
|
|
|
} |
|
62
|
|
|
|
|
|
|
} |
|
63
|
|
|
|
|
|
|
} |
|
64
|
|
|
|
|
|
|
# TODO retrieve by accn |
|
65
|
|
|
|
|
|
|
# TODO make sure there two, otherwise raise error |
|
66
|
|
|
|
|
|
|
if ( @sequences ) { $self->set_sequences( \@sequences ); } |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# Add or replace enzyme from id or name |
|
69
|
|
|
|
|
|
|
if ( defined $arg_ref->{enzyme_id} or defined $arg_ref->{enzyme_name} ) { |
|
70
|
|
|
|
|
|
|
my $enzyme = $self->enzyme({ id => $arg_ref->{enzyme_id}, |
|
71
|
|
|
|
|
|
|
name => $arg_ref->{enzyme_name} }); |
|
72
|
|
|
|
|
|
|
if ( $enzyme ) { $self->set_enzyme( $enzyme ); } |
|
73
|
|
|
|
|
|
|
} |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
# Add or replace linker from id or name |
|
76
|
|
|
|
|
|
|
if ( defined $arg_ref->{linker_id} or defined $arg_ref->{linker_name} ) { |
|
77
|
|
|
|
|
|
|
my $linker = $self->linker({ id => $arg_ref->{linker_id}, |
|
78
|
|
|
|
|
|
|
name => $arg_ref->{linker_name} }); |
|
79
|
|
|
|
|
|
|
if ( $linker ) { $self->set_linker( $linker ); } |
|
80
|
|
|
|
|
|
|
} |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# Mark linking aa's |
|
83
|
|
|
|
|
|
|
$self->mark_links(); |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
# Cleave sequence into fragments |
|
86
|
|
|
|
|
|
|
$self->cleave(); |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
# Calculate masses |
|
89
|
|
|
|
|
|
|
$self->masses(); |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
# Cross link |
|
92
|
|
|
|
|
|
|
$self->cross_link(); |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
# Match |
|
95
|
|
|
|
|
|
|
#$self->match(); |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
# Show results |
|
98
|
|
|
|
|
|
|
#$self->results(); |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
return; |
|
101
|
|
|
|
|
|
|
} |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# API READ ONLY |
|
104
|
|
|
|
|
|
|
sub sequences { my ( $self ) = @_; return @{ $self->get_sequences() }; } |
|
105
|
|
|
|
|
|
|
sub seq_one { my ( $self ) = @_; my @sequences = $self->sequences(); return $sequences[0]; } |
|
106
|
|
|
|
|
|
|
sub seq_two { my ( $self ) = @_; my @sequences = $self->sequences(); return $sequences[1]; } |
|
107
|
|
|
|
|
|
|
sub enzyme_id { my ( $self ) = @_; return $self->get_enzyme->get_enzyme_id(); } |
|
108
|
|
|
|
|
|
|
sub linker_id { my ( $self ) = @_; return $self->get_linker->get_linker_id(); } |
|
109
|
|
|
|
|
|
|
sub var_mods { my ( $self ) = @_; return %{ $self->get_var_mod() }; } |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
# API |
|
112
|
|
|
|
|
|
|
sub run { |
|
113
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
|
114
|
|
|
|
|
|
|
if ( $arg_ref ) { $self->_run( $arg_ref ); } |
|
115
|
|
|
|
|
|
|
return; |
|
116
|
|
|
|
|
|
|
} |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
# API |
|
119
|
|
|
|
|
|
|
sub sequence { |
|
120
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
|
121
|
|
|
|
|
|
|
my $sequence_id = $arg_ref->{id} ? $arg_ref->{id} : 0; |
|
122
|
|
|
|
|
|
|
my $file = $arg_ref->{file} ? $arg_ref->{file} : ''; |
|
123
|
|
|
|
|
|
|
if ( $sequence_id ) { |
|
124
|
|
|
|
|
|
|
return BioX::CLPM::Sequence->new({ sequence_id => $sequence_id }); |
|
125
|
|
|
|
|
|
|
} elsif ( $file ) { |
|
126
|
|
|
|
|
|
|
return BioX::CLPM::Sequence->new({ file => $file }); |
|
127
|
|
|
|
|
|
|
} |
|
128
|
|
|
|
|
|
|
} |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
# API |
|
131
|
|
|
|
|
|
|
sub enzyme { |
|
132
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
|
133
|
|
|
|
|
|
|
my $enzyme_id = $arg_ref->{id} ? $arg_ref->{id} : 0; |
|
134
|
|
|
|
|
|
|
my $enzyme_name = $arg_ref->{name} ? $arg_ref->{name} : ''; |
|
135
|
|
|
|
|
|
|
if ( $enzyme_id || $enzyme_name ) { |
|
136
|
|
|
|
|
|
|
my $enzyme = BioX::CLPM::Enzyme->new({ enzyme_id => $enzyme_id, |
|
137
|
|
|
|
|
|
|
enzyme_name => $enzyme_name }); |
|
138
|
|
|
|
|
|
|
$self->set_enzyme( $enzyme ); |
|
139
|
|
|
|
|
|
|
return $enzyme; |
|
140
|
|
|
|
|
|
|
} else { |
|
141
|
|
|
|
|
|
|
return $self->get_enzyme(); |
|
142
|
|
|
|
|
|
|
} |
|
143
|
|
|
|
|
|
|
} |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
# API |
|
146
|
|
|
|
|
|
|
sub linker { |
|
147
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
|
148
|
|
|
|
|
|
|
my $linker_id = $arg_ref->{id} ? $arg_ref->{id} : 0; |
|
149
|
|
|
|
|
|
|
my $linker_name = $arg_ref->{name} ? $arg_ref->{name} : ''; |
|
150
|
|
|
|
|
|
|
if ( $linker_id || $linker_name ) { |
|
151
|
|
|
|
|
|
|
my $linker = BioX::CLPM::Linker->new({ linker_id => $linker_id, |
|
152
|
|
|
|
|
|
|
linker_name => $linker_name }); |
|
153
|
|
|
|
|
|
|
$self->set_linker( $linker ); |
|
154
|
|
|
|
|
|
|
return $linker; |
|
155
|
|
|
|
|
|
|
} else { |
|
156
|
|
|
|
|
|
|
return $self->get_linker(); |
|
157
|
|
|
|
|
|
|
} |
|
158
|
|
|
|
|
|
|
} |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
# API |
|
161
|
|
|
|
|
|
|
sub mark_links { |
|
162
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
|
163
|
|
|
|
|
|
|
my $linker = defined $arg_ref->{linker} ? $arg_ref->{linker} : $self->get_linker(); |
|
164
|
|
|
|
|
|
|
my @ends = $linker->ends(); |
|
165
|
|
|
|
|
|
|
my @sequences = defined $arg_ref->{sequences} ? @{ $arg_ref->{sequences} } : $self->sequences(); |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
$self->_mark_links({ sequence => $sequences[0], end => $ends[0] }); |
|
168
|
|
|
|
|
|
|
$self->_mark_links({ sequence => $sequences[1], end => $ends[1] }); |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
$self->set_sequences( \@sequences ); |
|
171
|
|
|
|
|
|
|
return \@sequences; |
|
172
|
|
|
|
|
|
|
} |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
# API |
|
175
|
|
|
|
|
|
|
sub cleave { |
|
176
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
|
177
|
|
|
|
|
|
|
my $enzyme = $self->get_enzyme(); |
|
178
|
|
|
|
|
|
|
my $linker = $self->get_linker(); |
|
179
|
|
|
|
|
|
|
my $missed_clvg = $self->get_missed_clvg(); |
|
180
|
|
|
|
|
|
|
my @sequences = defined $arg_ref->{sequences} ? @{ $arg_ref->{sequences} } : $self->sequences(); |
|
181
|
|
|
|
|
|
|
my @fragments; |
|
182
|
|
|
|
|
|
|
warn "ENGINE cleave() \n"; |
|
183
|
|
|
|
|
|
|
my $last_index = 1; |
|
184
|
|
|
|
|
|
|
for ( my $i = 0; $i < @sequences; $i++ ) { |
|
185
|
|
|
|
|
|
|
@fragments = $self->_cleave({ sequence => $sequences[$i], enzyme => $enzyme }); |
|
186
|
|
|
|
|
|
|
@fragments = $self->_missed({ fragments => \@fragments, missed_clvg => $missed_clvg }); |
|
187
|
|
|
|
|
|
|
@fragments = $self->_filter({ fragments => \@fragments, index => $i }); |
|
188
|
|
|
|
|
|
|
#warn " setting fragments " . join( ', ', @fragments ) . "\n"; |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
my $fragments = BioX::CLPM::Fragments->new({ sequence_id => $sequences[$i]->get_sequence_id(), index => $last_index, type => 'simple' }); |
|
191
|
|
|
|
|
|
|
foreach my $fragment ( @fragments ) { $fragments->add({ sequence => $fragment }); } |
|
192
|
|
|
|
|
|
|
$sequences[$i]->set_fragments( $fragments->get_list() ); |
|
193
|
|
|
|
|
|
|
$last_index = $fragments->get_index(); |
|
194
|
|
|
|
|
|
|
} |
|
195
|
|
|
|
|
|
|
$self->set_sequences( \@sequences ); |
|
196
|
|
|
|
|
|
|
return \@sequences; |
|
197
|
|
|
|
|
|
|
} |
|
198
|
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
# API |
|
200
|
|
|
|
|
|
|
sub masses { |
|
201
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
|
202
|
|
|
|
|
|
|
my %var_mods = defined $arg_ref->{var_mod} ? %{ $arg_ref->{var_mod} } : $self->var_mods(); |
|
203
|
|
|
|
|
|
|
my @sequences = defined $arg_ref->{sequences} ? @{ $arg_ref->{sequences} } : $self->sequences(); |
|
204
|
|
|
|
|
|
|
my $aa_masses = $self->_stat_mod(); |
|
205
|
|
|
|
|
|
|
foreach my $sequence ( @sequences ) { |
|
206
|
|
|
|
|
|
|
my @fragments = $sequence->fragments(); |
|
207
|
|
|
|
|
|
|
for ( my $i = 0; $i < @fragments; $i++ ) { |
|
208
|
|
|
|
|
|
|
my $sequence = $fragments[$i]->get_sequence(); |
|
209
|
|
|
|
|
|
|
my @sequence = split( //, $sequence ); |
|
210
|
|
|
|
|
|
|
my $counts = {}; |
|
211
|
|
|
|
|
|
|
my $mass = 0; |
|
212
|
|
|
|
|
|
|
foreach my $aa ( @sequence ) { |
|
213
|
|
|
|
|
|
|
$aa = uc($aa); |
|
214
|
|
|
|
|
|
|
$mass += $aa_masses->{$aa}; |
|
215
|
|
|
|
|
|
|
$counts->{$aa}++; |
|
216
|
|
|
|
|
|
|
} |
|
217
|
|
|
|
|
|
|
# Add mass of 1 molecule of water |
|
218
|
|
|
|
|
|
|
$mass += 18.010565; |
|
219
|
|
|
|
|
|
|
$fragments[$i]->set_mass( $mass ); |
|
220
|
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
# Keep counts for aa's affected by var_mod |
|
222
|
|
|
|
|
|
|
my $keepers = {}; |
|
223
|
|
|
|
|
|
|
foreach my $var_mod ( keys %var_mods ) { |
|
224
|
|
|
|
|
|
|
$keepers->{$var_mod} = $counts->{$var_mod}; |
|
225
|
|
|
|
|
|
|
} |
|
226
|
|
|
|
|
|
|
$fragments[$i]->set_counts( $keepers ); |
|
227
|
|
|
|
|
|
|
} |
|
228
|
|
|
|
|
|
|
$sequence->set_fragments( \@fragments ); |
|
229
|
|
|
|
|
|
|
} |
|
230
|
|
|
|
|
|
|
$self->set_sequences( \@sequences ); |
|
231
|
|
|
|
|
|
|
return \@sequences; |
|
232
|
|
|
|
|
|
|
} |
|
233
|
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
# API |
|
235
|
|
|
|
|
|
|
sub cross_link { |
|
236
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
|
237
|
|
|
|
|
|
|
my $mass = defined $arg_ref->{mass} ? $arg_ref->{mass} : $self->linker()->get_mass(); |
|
238
|
|
|
|
|
|
|
#my @sequences = defined $arg_ref->{sequences} ? @{ $arg_ref->{sequences} } : $self->sequences(); |
|
239
|
|
|
|
|
|
|
warn "ENGINE cross_link() mass $mass\n"; |
|
240
|
|
|
|
|
|
|
#return \@sequences; |
|
241
|
|
|
|
|
|
|
} |
|
242
|
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
# PRIV |
|
244
|
|
|
|
|
|
|
sub _ffm { |
|
245
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
|
246
|
|
|
|
|
|
|
my $list1 = defined $arg_ref->{list1} ? $arg_ref->{list1} : []; |
|
247
|
|
|
|
|
|
|
my $list2 = defined $arg_ref->{list2} ? $arg_ref->{list2} : []; |
|
248
|
|
|
|
|
|
|
my $type = defined $arg_ref->{type} ? $arg_ref->{type} : ''; |
|
249
|
|
|
|
|
|
|
my $linker = defined $arg_ref->{linker} ? $arg_ref->{linker} : $self->get_linker(); |
|
250
|
|
|
|
|
|
|
foreach my $frag1 ( @$list1 ) { |
|
251
|
|
|
|
|
|
|
foreach my $frag2 ( @$list2 ) { |
|
252
|
|
|
|
|
|
|
my $fragments = BioX::CLPM::Fragments->new({ type => 'linked' }); |
|
253
|
|
|
|
|
|
|
$fragments->add({ fragment_id_1 => $frag1->get_fragment_id(), |
|
254
|
|
|
|
|
|
|
fragment_id_2 => $frag2->get_fragment_id(), |
|
255
|
|
|
|
|
|
|
mass => $frag1->{mass} + $frag2->{mass} + $linker->get_mass() }); |
|
256
|
|
|
|
|
|
|
} |
|
257
|
|
|
|
|
|
|
} |
|
258
|
|
|
|
|
|
|
} |
|
259
|
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
# PRIV |
|
261
|
|
|
|
|
|
|
sub _cleave { |
|
262
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
|
263
|
|
|
|
|
|
|
my $sequence = defined $arg_ref->{sequence} ? $arg_ref->{sequence} : ''; |
|
264
|
|
|
|
|
|
|
my $enzyme = defined $arg_ref->{enzyme} ? $arg_ref->{enzyme} : $self->get_enzyme(); |
|
265
|
|
|
|
|
|
|
my $clvg_position = $enzyme->get_clvg_position(); |
|
266
|
|
|
|
|
|
|
my ( $sgn, @chars ) = split( //, $enzyme->get_rule() ); |
|
267
|
|
|
|
|
|
|
my $length = @chars; |
|
268
|
|
|
|
|
|
|
my $rule = join( '', @chars ); |
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
my $sequence_str = $sequence->get_cl_sequence(); |
|
271
|
|
|
|
|
|
|
my @sequence_chars = split( //, $sequence_str ); |
|
272
|
|
|
|
|
|
|
my $cut = 0; |
|
273
|
|
|
|
|
|
|
my ( $fragment, @fragments ); |
|
274
|
|
|
|
|
|
|
for ( my $i = 0; $i < @sequence_chars; ++$i ){ |
|
275
|
|
|
|
|
|
|
my $aa = $sequence_chars[$i]; |
|
276
|
|
|
|
|
|
|
$cut = 0; |
|
277
|
|
|
|
|
|
|
$fragment .= $aa; |
|
278
|
|
|
|
|
|
|
foreach my $clvg_site( $enzyme->clvg_sites() ){ |
|
279
|
|
|
|
|
|
|
if ( uc( $aa ) eq $clvg_site ){ |
|
280
|
|
|
|
|
|
|
my $next_chars = @sequence_chars[$i+1..$i+$length]; |
|
281
|
|
|
|
|
|
|
unless ( uc( $next_chars ) eq $rule ){ |
|
282
|
|
|
|
|
|
|
push( @fragments, $fragment ); |
|
283
|
|
|
|
|
|
|
$fragment=''; |
|
284
|
|
|
|
|
|
|
} |
|
285
|
|
|
|
|
|
|
$cut = 1; |
|
286
|
|
|
|
|
|
|
} |
|
287
|
|
|
|
|
|
|
} |
|
288
|
|
|
|
|
|
|
} |
|
289
|
|
|
|
|
|
|
if ( !$cut ) { push( @fragments, $fragment ); } |
|
290
|
|
|
|
|
|
|
return @fragments; |
|
291
|
|
|
|
|
|
|
} |
|
292
|
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
# PRIV |
|
294
|
|
|
|
|
|
|
sub _missed { |
|
295
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
|
296
|
|
|
|
|
|
|
my @fragments = defined $arg_ref->{fragments} ? @{ $arg_ref->{fragments} } : (); |
|
297
|
|
|
|
|
|
|
my $missed_clvg = defined $arg_ref->{missed_clvg} ? $arg_ref->{missed_clvg} : 0; |
|
298
|
|
|
|
|
|
|
my ( @results, $k ); |
|
299
|
|
|
|
|
|
|
for ( my $i = $missed_clvg + 1; $i > 1; $i-- ) { |
|
300
|
|
|
|
|
|
|
for ( my $j = 0; $j < @fragments - $i + 1; $j++ ) { |
|
301
|
|
|
|
|
|
|
my $new_fragment = $fragments[$j]; |
|
302
|
|
|
|
|
|
|
for ( $k = 0; $k < $i - 1; $k++ ) { |
|
303
|
|
|
|
|
|
|
$new_fragment .= $fragments[$j+$k+1]; |
|
304
|
|
|
|
|
|
|
} |
|
305
|
|
|
|
|
|
|
while ( $new_fragment =~ m/[a-z]$/ and $i == $missed_clvg + 1){ |
|
306
|
|
|
|
|
|
|
if (! $fragments[$j+$k+1] ) { last; } |
|
307
|
|
|
|
|
|
|
$new_fragment .= $fragments[$j+$k+1]; |
|
308
|
|
|
|
|
|
|
$k++; |
|
309
|
|
|
|
|
|
|
} |
|
310
|
|
|
|
|
|
|
push( @results, $new_fragment ); |
|
311
|
|
|
|
|
|
|
} |
|
312
|
|
|
|
|
|
|
} |
|
313
|
|
|
|
|
|
|
push( @fragments, @results ); |
|
314
|
|
|
|
|
|
|
return @fragments; |
|
315
|
|
|
|
|
|
|
} |
|
316
|
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
# PRIV |
|
318
|
|
|
|
|
|
|
sub _filter { |
|
319
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
|
320
|
|
|
|
|
|
|
my @fragments = defined $arg_ref->{fragments} ? @{ $arg_ref->{fragments} } : (); |
|
321
|
|
|
|
|
|
|
push @fragments, my $final_fragment = pop @fragments; |
|
322
|
|
|
|
|
|
|
my $linker = defined $arg_ref->{linker} ? $arg_ref->{linker} : $self->get_linker(); |
|
323
|
|
|
|
|
|
|
my $index = defined $arg_ref->{index} ? $arg_ref->{index} : 0; |
|
324
|
|
|
|
|
|
|
my @ends = $linker->ends(); |
|
325
|
|
|
|
|
|
|
my $end = $ends[$index]; |
|
326
|
|
|
|
|
|
|
my @results; |
|
327
|
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
foreach my $fragment ( @fragments ) { |
|
329
|
|
|
|
|
|
|
if ( $end ) { if ( $self->_has_lc($fragment) ){ if ( $self->_has_uc_last($fragment) or ( $fragment =~ m/$final_fragment$/ ) ) { push @results, $fragment; } } } |
|
330
|
|
|
|
|
|
|
else { if ( $self->_has_uc_last($fragment) or ( $fragment =~ m/$final_fragment$/ ) ) { push @results, $fragment; } } |
|
331
|
|
|
|
|
|
|
} |
|
332
|
|
|
|
|
|
|
return @results; |
|
333
|
|
|
|
|
|
|
} |
|
334
|
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
# PRIV |
|
336
|
|
|
|
|
|
|
sub _stat_mod { |
|
337
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
|
338
|
|
|
|
|
|
|
my $aa_masses = defined $arg_ref->{aa_masses} ? $arg_ref->{aa_masses} : $self->load_masses(); |
|
339
|
|
|
|
|
|
|
switch( $self->get_stat_mod() ) { |
|
340
|
|
|
|
|
|
|
case 'carbamidomethylated' { $aa_masses->{'C'} = $aa_masses->{'C2'} } |
|
341
|
|
|
|
|
|
|
case 'carboxymethylated' { $aa_masses->{'C'} = $aa_masses->{'C3'} } |
|
342
|
|
|
|
|
|
|
case 'acrylamid adduct' { $aa_masses->{'C'} = $aa_masses->{'C4'} } |
|
343
|
|
|
|
|
|
|
case 'oxidized methionine' { $aa_masses->{'M'} = $aa_masses->{'M2'} } |
|
344
|
|
|
|
|
|
|
} |
|
345
|
|
|
|
|
|
|
return $aa_masses; |
|
346
|
|
|
|
|
|
|
} |
|
347
|
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
# PRIV |
|
349
|
|
|
|
|
|
|
sub _has_lc { |
|
350
|
|
|
|
|
|
|
my ( $self, $str ) = @_; |
|
351
|
|
|
|
|
|
|
if ( $str =~m/.*[a-z]+.*/ ) { return 1; } else { return 0; } |
|
352
|
|
|
|
|
|
|
} |
|
353
|
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
# PRIV |
|
355
|
|
|
|
|
|
|
sub _has_uc_last { |
|
356
|
|
|
|
|
|
|
my ( $self, $str ) = @_; |
|
357
|
|
|
|
|
|
|
if( $str =~ m/[A-Z]$/ ) { return 1; } else { return 0; } |
|
358
|
|
|
|
|
|
|
} |
|
359
|
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
# PRIV |
|
361
|
|
|
|
|
|
|
sub _mark_links { |
|
362
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
|
363
|
|
|
|
|
|
|
my $sequence = defined $arg_ref->{sequence} ? $arg_ref->{sequence} : $self->get_sequence(); |
|
364
|
|
|
|
|
|
|
my $sequence_str = $sequence->get_sequence(); |
|
365
|
|
|
|
|
|
|
my $end = defined $arg_ref->{end} ? $arg_ref->{end} : $self->get_end(); |
|
366
|
|
|
|
|
|
|
my @amino_acids = split( '', $end ); |
|
367
|
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
foreach my $amino_acid ( @amino_acids ) { |
|
369
|
|
|
|
|
|
|
my $amino_acid_lc = lc($amino_acid); |
|
370
|
|
|
|
|
|
|
$amino_acid = uc($amino_acid); |
|
371
|
|
|
|
|
|
|
$sequence_str =~ s/$amino_acid/$amino_acid_lc/g; |
|
372
|
|
|
|
|
|
|
} |
|
373
|
|
|
|
|
|
|
$sequence->set_cl_sequence( $sequence_str ); |
|
374
|
|
|
|
|
|
|
return $sequence; |
|
375
|
|
|
|
|
|
|
} |
|
376
|
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
# UTIL |
|
378
|
|
|
|
|
|
|
sub insert_run { |
|
379
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
380
|
|
|
|
|
|
|
my $enzyme_id = $self->get_enzyme->get_enzyme_id(); |
|
381
|
|
|
|
|
|
|
my $linker_id = $self->get_linker->get_linker_id(); |
|
382
|
|
|
|
|
|
|
my $tolerance = $self->get_tolerance(); |
|
383
|
|
|
|
|
|
|
my $missed_clvg = $self->get_missed_clvg(); |
|
384
|
|
|
|
|
|
|
my $stat_mod = $self->get_stat_mod(); |
|
385
|
|
|
|
|
|
|
my $var_mod = $self->get_var_mod(); |
|
386
|
|
|
|
|
|
|
my $sql = "insert into run_data ( enzyme_id, linker_id, tolerance, missed_clvg, stat_mod, var_mod) values ($enzyme_id, $linker_id, $tolerance, $missed_clvg, '$stat_mod', '$var_mod' )"; |
|
387
|
|
|
|
|
|
|
$self->sqlexec( $sql ); |
|
388
|
|
|
|
|
|
|
$sql = 'select LAST_INSERT_ID()'; |
|
389
|
|
|
|
|
|
|
my ( $run_id ) = $self->sqlexec( $sql, '\@@' ); |
|
390
|
|
|
|
|
|
|
return $run_id; |
|
391
|
|
|
|
|
|
|
} |
|
392
|
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
# UTIL |
|
394
|
|
|
|
|
|
|
sub db_trunc { |
|
395
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
396
|
|
|
|
|
|
|
warn "ENGINE db_trunc() \n"; |
|
397
|
|
|
|
|
|
|
$self->sqlexec("truncate table sequences"); |
|
398
|
|
|
|
|
|
|
$self->sqlexec("truncate table fragments"); |
|
399
|
|
|
|
|
|
|
$self->sqlexec("truncate table final_fragment_masses"); |
|
400
|
|
|
|
|
|
|
$self->sqlexec("truncate table run_data"); |
|
401
|
|
|
|
|
|
|
$self->sqlexec("truncate table file_masses"); |
|
402
|
|
|
|
|
|
|
$self->sqlexec("truncate table results"); |
|
403
|
|
|
|
|
|
|
$self->sqlexec("truncate table precursor_masses"); |
|
404
|
|
|
|
|
|
|
} |
|
405
|
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
# UTIL |
|
407
|
|
|
|
|
|
|
sub get_seq { |
|
408
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
|
409
|
|
|
|
|
|
|
my $file = $arg_ref->{file} ? $arg_ref->{file} : ''; |
|
410
|
|
|
|
|
|
|
my $id = $arg_ref->{id} ? $arg_ref->{id} : 0; |
|
411
|
|
|
|
|
|
|
my $sequence; |
|
412
|
|
|
|
|
|
|
if ( -e $file ) { |
|
413
|
|
|
|
|
|
|
# Guess file format from extension with read_sequence() |
|
414
|
|
|
|
|
|
|
my $seq_object = read_sequence( $file ); |
|
415
|
|
|
|
|
|
|
$sequence = $seq_object->seq(); |
|
416
|
|
|
|
|
|
|
} |
|
417
|
|
|
|
|
|
|
elsif ( $id ) { |
|
418
|
|
|
|
|
|
|
# Get sequence from database |
|
419
|
|
|
|
|
|
|
# TODO |
|
420
|
|
|
|
|
|
|
} |
|
421
|
|
|
|
|
|
|
return $sequence; |
|
422
|
|
|
|
|
|
|
} |
|
423
|
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
# sub add_sequence { |
|
425
|
|
|
|
|
|
|
# my ( $self, $arg_ref ) = @_; |
|
426
|
|
|
|
|
|
|
# my @sequences = @{$self->get_sequences()}; |
|
427
|
|
|
|
|
|
|
# push( @sequences, |
|
428
|
|
|
|
|
|
|
# BioX::CLPM::Sequence->new( { sequence => $arg_ref->{sequence} }, |
|
429
|
|
|
|
|
|
|
# { sequence_id => $arg_ref->{sequence_id} ? $arg_ref->{sequence_id} : (@$sequences + 1) } ); |
|
430
|
|
|
|
|
|
|
# $self->set_sequences(\@sequences); |
|
431
|
|
|
|
|
|
|
# } |
|
432
|
|
|
|
|
|
|
} |
|
433
|
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
1; # Magic true value required at end of module |
|
435
|
|
|
|
|
|
|
__END__ |