line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package BioX::CLPM::Engine; |
2
|
1
|
|
|
1
|
|
5025
|
use base qw(BioX::CLPM::Base); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
81
|
|
3
|
|
|
|
|
|
|
use BioX::CLPM::Sequence; |
4
|
|
|
|
|
|
|
use BioX::CLPM::Enzyme; |
5
|
|
|
|
|
|
|
use BioX::CLPM::Linker; |
6
|
|
|
|
|
|
|
use BioX::CLPM::Fragments; |
7
|
|
|
|
|
|
|
use Bio::Perl qw(read_sequence); |
8
|
|
|
|
|
|
|
use Class::Std; |
9
|
|
|
|
|
|
|
use Class::Std::Utils; |
10
|
|
|
|
|
|
|
use Switch; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
use warnings; |
13
|
|
|
|
|
|
|
use strict; |
14
|
|
|
|
|
|
|
use Carp; |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
use version; our $VERSION = qv('0.0.1'); |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
{ |
19
|
|
|
|
|
|
|
my %sequences_of :ATTR( :get :set :default<[]> :init_arg ); |
20
|
|
|
|
|
|
|
my %enzyme_of :ATTR( :get :set :default<''> :init_arg ); |
21
|
|
|
|
|
|
|
my %linker_of :ATTR( :get :set :default<''> :init_arg ); |
22
|
|
|
|
|
|
|
my %peaks_of :ATTR( :get :set :default<''> :init_arg ); |
23
|
|
|
|
|
|
|
my %matches_of :ATTR( :get :set :default<''> :init_arg ); |
24
|
|
|
|
|
|
|
my %fragments_of :ATTR( :get :set :default<''> :init_arg ); |
25
|
|
|
|
|
|
|
my %tolerance_of :ATTR( :get :set :default<''> :init_arg ); |
26
|
|
|
|
|
|
|
my %missed_clvg_of :ATTR( :get :set :default<''> :init_arg ); |
27
|
|
|
|
|
|
|
my %var_mod_of :ATTR( :get :set :default<''> :init_arg ); |
28
|
|
|
|
|
|
|
my %stat_mod_of :ATTR( :get :set :default<''> :init_arg ); |
29
|
|
|
|
|
|
|
#my %attribute_of :ATTR( :get :set :default<''> :init_arg ); |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# PRIV |
32
|
|
|
|
|
|
|
sub BUILD { |
33
|
|
|
|
|
|
|
my ( $self, $ident, $arg_ref ) = @_; |
34
|
|
|
|
|
|
|
$self->db_trunc(); |
35
|
|
|
|
|
|
|
return; |
36
|
|
|
|
|
|
|
} |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# PRIV |
39
|
|
|
|
|
|
|
sub START { |
40
|
|
|
|
|
|
|
my ( $self, $ident, $arg_ref ) = @_; |
41
|
|
|
|
|
|
|
if ( $arg_ref ) { $self->_run( $arg_ref ); } |
42
|
|
|
|
|
|
|
return; |
43
|
|
|
|
|
|
|
} |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
# PRIV |
46
|
|
|
|
|
|
|
sub _run { |
47
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
# Add or replace sequences from id or name |
50
|
|
|
|
|
|
|
my @sequences; |
51
|
|
|
|
|
|
|
if ( defined $arg_ref->{sequences} ) { |
52
|
|
|
|
|
|
|
if ( defined $arg_ref->{sequences}->{files} ) { |
53
|
|
|
|
|
|
|
foreach my $file ( @{ $arg_ref->{sequences}->{files} } ) { |
54
|
|
|
|
|
|
|
push @sequences, $self->sequence({ file => $file }); |
55
|
|
|
|
|
|
|
} |
56
|
|
|
|
|
|
|
} |
57
|
|
|
|
|
|
|
if ( defined $arg_ref->{sequences}->{ids} ) { |
58
|
|
|
|
|
|
|
foreach my $file ( @{ $arg_ref->{sequences}->{ids} } ) { |
59
|
|
|
|
|
|
|
#push @sequences, $self->sequence({ file => $arg_ref->{sequence_id} }); |
60
|
|
|
|
|
|
|
# TODO retrieve by id |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
} |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
# TODO retrieve by accn |
65
|
|
|
|
|
|
|
# TODO make sure there two, otherwise raise error |
66
|
|
|
|
|
|
|
if ( @sequences ) { $self->set_sequences( \@sequences ); } |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# Add or replace enzyme from id or name |
69
|
|
|
|
|
|
|
if ( defined $arg_ref->{enzyme_id} or defined $arg_ref->{enzyme_name} ) { |
70
|
|
|
|
|
|
|
my $enzyme = $self->enzyme({ id => $arg_ref->{enzyme_id}, |
71
|
|
|
|
|
|
|
name => $arg_ref->{enzyme_name} }); |
72
|
|
|
|
|
|
|
if ( $enzyme ) { $self->set_enzyme( $enzyme ); } |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
# Add or replace linker from id or name |
76
|
|
|
|
|
|
|
if ( defined $arg_ref->{linker_id} or defined $arg_ref->{linker_name} ) { |
77
|
|
|
|
|
|
|
my $linker = $self->linker({ id => $arg_ref->{linker_id}, |
78
|
|
|
|
|
|
|
name => $arg_ref->{linker_name} }); |
79
|
|
|
|
|
|
|
if ( $linker ) { $self->set_linker( $linker ); } |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# Mark linking aa's |
83
|
|
|
|
|
|
|
$self->mark_links(); |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
# Cleave sequence into fragments |
86
|
|
|
|
|
|
|
$self->cleave(); |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
# Calculate masses |
89
|
|
|
|
|
|
|
$self->masses(); |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
# Cross link |
92
|
|
|
|
|
|
|
$self->cross_link(); |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
# Match |
95
|
|
|
|
|
|
|
#$self->match(); |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
# Show results |
98
|
|
|
|
|
|
|
#$self->results(); |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
return; |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# API READ ONLY |
104
|
|
|
|
|
|
|
sub sequences { my ( $self ) = @_; return @{ $self->get_sequences() }; } |
105
|
|
|
|
|
|
|
sub seq_one { my ( $self ) = @_; my @sequences = $self->sequences(); return $sequences[0]; } |
106
|
|
|
|
|
|
|
sub seq_two { my ( $self ) = @_; my @sequences = $self->sequences(); return $sequences[1]; } |
107
|
|
|
|
|
|
|
sub enzyme_id { my ( $self ) = @_; return $self->get_enzyme->get_enzyme_id(); } |
108
|
|
|
|
|
|
|
sub linker_id { my ( $self ) = @_; return $self->get_linker->get_linker_id(); } |
109
|
|
|
|
|
|
|
sub var_mods { my ( $self ) = @_; return %{ $self->get_var_mod() }; } |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
# API |
112
|
|
|
|
|
|
|
sub run { |
113
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
114
|
|
|
|
|
|
|
if ( $arg_ref ) { $self->_run( $arg_ref ); } |
115
|
|
|
|
|
|
|
return; |
116
|
|
|
|
|
|
|
} |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
# API |
119
|
|
|
|
|
|
|
sub sequence { |
120
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
121
|
|
|
|
|
|
|
my $sequence_id = $arg_ref->{id} ? $arg_ref->{id} : 0; |
122
|
|
|
|
|
|
|
my $file = $arg_ref->{file} ? $arg_ref->{file} : ''; |
123
|
|
|
|
|
|
|
if ( $sequence_id ) { |
124
|
|
|
|
|
|
|
return BioX::CLPM::Sequence->new({ sequence_id => $sequence_id }); |
125
|
|
|
|
|
|
|
} elsif ( $file ) { |
126
|
|
|
|
|
|
|
return BioX::CLPM::Sequence->new({ file => $file }); |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
# API |
131
|
|
|
|
|
|
|
sub enzyme { |
132
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
133
|
|
|
|
|
|
|
my $enzyme_id = $arg_ref->{id} ? $arg_ref->{id} : 0; |
134
|
|
|
|
|
|
|
my $enzyme_name = $arg_ref->{name} ? $arg_ref->{name} : ''; |
135
|
|
|
|
|
|
|
if ( $enzyme_id || $enzyme_name ) { |
136
|
|
|
|
|
|
|
my $enzyme = BioX::CLPM::Enzyme->new({ enzyme_id => $enzyme_id, |
137
|
|
|
|
|
|
|
enzyme_name => $enzyme_name }); |
138
|
|
|
|
|
|
|
$self->set_enzyme( $enzyme ); |
139
|
|
|
|
|
|
|
return $enzyme; |
140
|
|
|
|
|
|
|
} else { |
141
|
|
|
|
|
|
|
return $self->get_enzyme(); |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
# API |
146
|
|
|
|
|
|
|
sub linker { |
147
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
148
|
|
|
|
|
|
|
my $linker_id = $arg_ref->{id} ? $arg_ref->{id} : 0; |
149
|
|
|
|
|
|
|
my $linker_name = $arg_ref->{name} ? $arg_ref->{name} : ''; |
150
|
|
|
|
|
|
|
if ( $linker_id || $linker_name ) { |
151
|
|
|
|
|
|
|
my $linker = BioX::CLPM::Linker->new({ linker_id => $linker_id, |
152
|
|
|
|
|
|
|
linker_name => $linker_name }); |
153
|
|
|
|
|
|
|
$self->set_linker( $linker ); |
154
|
|
|
|
|
|
|
return $linker; |
155
|
|
|
|
|
|
|
} else { |
156
|
|
|
|
|
|
|
return $self->get_linker(); |
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
# API |
161
|
|
|
|
|
|
|
sub mark_links { |
162
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
163
|
|
|
|
|
|
|
my $linker = defined $arg_ref->{linker} ? $arg_ref->{linker} : $self->get_linker(); |
164
|
|
|
|
|
|
|
my @ends = $linker->ends(); |
165
|
|
|
|
|
|
|
my @sequences = defined $arg_ref->{sequences} ? @{ $arg_ref->{sequences} } : $self->sequences(); |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
$self->_mark_links({ sequence => $sequences[0], end => $ends[0] }); |
168
|
|
|
|
|
|
|
$self->_mark_links({ sequence => $sequences[1], end => $ends[1] }); |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
$self->set_sequences( \@sequences ); |
171
|
|
|
|
|
|
|
return \@sequences; |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
# API |
175
|
|
|
|
|
|
|
sub cleave { |
176
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
177
|
|
|
|
|
|
|
my $enzyme = $self->get_enzyme(); |
178
|
|
|
|
|
|
|
my $linker = $self->get_linker(); |
179
|
|
|
|
|
|
|
my $missed_clvg = $self->get_missed_clvg(); |
180
|
|
|
|
|
|
|
my @sequences = defined $arg_ref->{sequences} ? @{ $arg_ref->{sequences} } : $self->sequences(); |
181
|
|
|
|
|
|
|
my @fragments; |
182
|
|
|
|
|
|
|
warn "ENGINE cleave() \n"; |
183
|
|
|
|
|
|
|
my $last_index = 1; |
184
|
|
|
|
|
|
|
for ( my $i = 0; $i < @sequences; $i++ ) { |
185
|
|
|
|
|
|
|
@fragments = $self->_cleave({ sequence => $sequences[$i], enzyme => $enzyme }); |
186
|
|
|
|
|
|
|
@fragments = $self->_missed({ fragments => \@fragments, missed_clvg => $missed_clvg }); |
187
|
|
|
|
|
|
|
@fragments = $self->_filter({ fragments => \@fragments, index => $i }); |
188
|
|
|
|
|
|
|
#warn " setting fragments " . join( ', ', @fragments ) . "\n"; |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
my $fragments = BioX::CLPM::Fragments->new({ sequence_id => $sequences[$i]->get_sequence_id(), index => $last_index, type => 'simple' }); |
191
|
|
|
|
|
|
|
foreach my $fragment ( @fragments ) { $fragments->add({ sequence => $fragment }); } |
192
|
|
|
|
|
|
|
$sequences[$i]->set_fragments( $fragments->get_list() ); |
193
|
|
|
|
|
|
|
$last_index = $fragments->get_index(); |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
$self->set_sequences( \@sequences ); |
196
|
|
|
|
|
|
|
return \@sequences; |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
# API |
200
|
|
|
|
|
|
|
sub masses { |
201
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
202
|
|
|
|
|
|
|
my %var_mods = defined $arg_ref->{var_mod} ? %{ $arg_ref->{var_mod} } : $self->var_mods(); |
203
|
|
|
|
|
|
|
my @sequences = defined $arg_ref->{sequences} ? @{ $arg_ref->{sequences} } : $self->sequences(); |
204
|
|
|
|
|
|
|
my $aa_masses = $self->_stat_mod(); |
205
|
|
|
|
|
|
|
foreach my $sequence ( @sequences ) { |
206
|
|
|
|
|
|
|
my @fragments = $sequence->fragments(); |
207
|
|
|
|
|
|
|
for ( my $i = 0; $i < @fragments; $i++ ) { |
208
|
|
|
|
|
|
|
my $sequence = $fragments[$i]->get_sequence(); |
209
|
|
|
|
|
|
|
my @sequence = split( //, $sequence ); |
210
|
|
|
|
|
|
|
my $counts = {}; |
211
|
|
|
|
|
|
|
my $mass = 0; |
212
|
|
|
|
|
|
|
foreach my $aa ( @sequence ) { |
213
|
|
|
|
|
|
|
$aa = uc($aa); |
214
|
|
|
|
|
|
|
$mass += $aa_masses->{$aa}; |
215
|
|
|
|
|
|
|
$counts->{$aa}++; |
216
|
|
|
|
|
|
|
} |
217
|
|
|
|
|
|
|
# Add mass of 1 molecule of water |
218
|
|
|
|
|
|
|
$mass += 18.010565; |
219
|
|
|
|
|
|
|
$fragments[$i]->set_mass( $mass ); |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
# Keep counts for aa's affected by var_mod |
222
|
|
|
|
|
|
|
my $keepers = {}; |
223
|
|
|
|
|
|
|
foreach my $var_mod ( keys %var_mods ) { |
224
|
|
|
|
|
|
|
$keepers->{$var_mod} = $counts->{$var_mod}; |
225
|
|
|
|
|
|
|
} |
226
|
|
|
|
|
|
|
$fragments[$i]->set_counts( $keepers ); |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
$sequence->set_fragments( \@fragments ); |
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
$self->set_sequences( \@sequences ); |
231
|
|
|
|
|
|
|
return \@sequences; |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
# API |
235
|
|
|
|
|
|
|
sub cross_link { |
236
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
237
|
|
|
|
|
|
|
my $mass = defined $arg_ref->{mass} ? $arg_ref->{mass} : $self->linker()->get_mass(); |
238
|
|
|
|
|
|
|
#my @sequences = defined $arg_ref->{sequences} ? @{ $arg_ref->{sequences} } : $self->sequences(); |
239
|
|
|
|
|
|
|
warn "ENGINE cross_link() mass $mass\n"; |
240
|
|
|
|
|
|
|
#return \@sequences; |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
# PRIV |
244
|
|
|
|
|
|
|
sub _ffm { |
245
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
246
|
|
|
|
|
|
|
my $list1 = defined $arg_ref->{list1} ? $arg_ref->{list1} : []; |
247
|
|
|
|
|
|
|
my $list2 = defined $arg_ref->{list2} ? $arg_ref->{list2} : []; |
248
|
|
|
|
|
|
|
my $type = defined $arg_ref->{type} ? $arg_ref->{type} : ''; |
249
|
|
|
|
|
|
|
my $linker = defined $arg_ref->{linker} ? $arg_ref->{linker} : $self->get_linker(); |
250
|
|
|
|
|
|
|
foreach my $frag1 ( @$list1 ) { |
251
|
|
|
|
|
|
|
foreach my $frag2 ( @$list2 ) { |
252
|
|
|
|
|
|
|
my $fragments = BioX::CLPM::Fragments->new({ type => 'linked' }); |
253
|
|
|
|
|
|
|
$fragments->add({ fragment_id_1 => $frag1->get_fragment_id(), |
254
|
|
|
|
|
|
|
fragment_id_2 => $frag2->get_fragment_id(), |
255
|
|
|
|
|
|
|
mass => $frag1->{mass} + $frag2->{mass} + $linker->get_mass() }); |
256
|
|
|
|
|
|
|
} |
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
} |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
# PRIV |
261
|
|
|
|
|
|
|
sub _cleave { |
262
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
263
|
|
|
|
|
|
|
my $sequence = defined $arg_ref->{sequence} ? $arg_ref->{sequence} : ''; |
264
|
|
|
|
|
|
|
my $enzyme = defined $arg_ref->{enzyme} ? $arg_ref->{enzyme} : $self->get_enzyme(); |
265
|
|
|
|
|
|
|
my $clvg_position = $enzyme->get_clvg_position(); |
266
|
|
|
|
|
|
|
my ( $sgn, @chars ) = split( //, $enzyme->get_rule() ); |
267
|
|
|
|
|
|
|
my $length = @chars; |
268
|
|
|
|
|
|
|
my $rule = join( '', @chars ); |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
my $sequence_str = $sequence->get_cl_sequence(); |
271
|
|
|
|
|
|
|
my @sequence_chars = split( //, $sequence_str ); |
272
|
|
|
|
|
|
|
my $cut = 0; |
273
|
|
|
|
|
|
|
my ( $fragment, @fragments ); |
274
|
|
|
|
|
|
|
for ( my $i = 0; $i < @sequence_chars; ++$i ){ |
275
|
|
|
|
|
|
|
my $aa = $sequence_chars[$i]; |
276
|
|
|
|
|
|
|
$cut = 0; |
277
|
|
|
|
|
|
|
$fragment .= $aa; |
278
|
|
|
|
|
|
|
foreach my $clvg_site( $enzyme->clvg_sites() ){ |
279
|
|
|
|
|
|
|
if ( uc( $aa ) eq $clvg_site ){ |
280
|
|
|
|
|
|
|
my $next_chars = @sequence_chars[$i+1..$i+$length]; |
281
|
|
|
|
|
|
|
unless ( uc( $next_chars ) eq $rule ){ |
282
|
|
|
|
|
|
|
push( @fragments, $fragment ); |
283
|
|
|
|
|
|
|
$fragment=''; |
284
|
|
|
|
|
|
|
} |
285
|
|
|
|
|
|
|
$cut = 1; |
286
|
|
|
|
|
|
|
} |
287
|
|
|
|
|
|
|
} |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
if ( !$cut ) { push( @fragments, $fragment ); } |
290
|
|
|
|
|
|
|
return @fragments; |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
# PRIV |
294
|
|
|
|
|
|
|
sub _missed { |
295
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
296
|
|
|
|
|
|
|
my @fragments = defined $arg_ref->{fragments} ? @{ $arg_ref->{fragments} } : (); |
297
|
|
|
|
|
|
|
my $missed_clvg = defined $arg_ref->{missed_clvg} ? $arg_ref->{missed_clvg} : 0; |
298
|
|
|
|
|
|
|
my ( @results, $k ); |
299
|
|
|
|
|
|
|
for ( my $i = $missed_clvg + 1; $i > 1; $i-- ) { |
300
|
|
|
|
|
|
|
for ( my $j = 0; $j < @fragments - $i + 1; $j++ ) { |
301
|
|
|
|
|
|
|
my $new_fragment = $fragments[$j]; |
302
|
|
|
|
|
|
|
for ( $k = 0; $k < $i - 1; $k++ ) { |
303
|
|
|
|
|
|
|
$new_fragment .= $fragments[$j+$k+1]; |
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
while ( $new_fragment =~ m/[a-z]$/ and $i == $missed_clvg + 1){ |
306
|
|
|
|
|
|
|
if (! $fragments[$j+$k+1] ) { last; } |
307
|
|
|
|
|
|
|
$new_fragment .= $fragments[$j+$k+1]; |
308
|
|
|
|
|
|
|
$k++; |
309
|
|
|
|
|
|
|
} |
310
|
|
|
|
|
|
|
push( @results, $new_fragment ); |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
push( @fragments, @results ); |
314
|
|
|
|
|
|
|
return @fragments; |
315
|
|
|
|
|
|
|
} |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
# PRIV |
318
|
|
|
|
|
|
|
sub _filter { |
319
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
320
|
|
|
|
|
|
|
my @fragments = defined $arg_ref->{fragments} ? @{ $arg_ref->{fragments} } : (); |
321
|
|
|
|
|
|
|
push @fragments, my $final_fragment = pop @fragments; |
322
|
|
|
|
|
|
|
my $linker = defined $arg_ref->{linker} ? $arg_ref->{linker} : $self->get_linker(); |
323
|
|
|
|
|
|
|
my $index = defined $arg_ref->{index} ? $arg_ref->{index} : 0; |
324
|
|
|
|
|
|
|
my @ends = $linker->ends(); |
325
|
|
|
|
|
|
|
my $end = $ends[$index]; |
326
|
|
|
|
|
|
|
my @results; |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
foreach my $fragment ( @fragments ) { |
329
|
|
|
|
|
|
|
if ( $end ) { if ( $self->_has_lc($fragment) ){ if ( $self->_has_uc_last($fragment) or ( $fragment =~ m/$final_fragment$/ ) ) { push @results, $fragment; } } } |
330
|
|
|
|
|
|
|
else { if ( $self->_has_uc_last($fragment) or ( $fragment =~ m/$final_fragment$/ ) ) { push @results, $fragment; } } |
331
|
|
|
|
|
|
|
} |
332
|
|
|
|
|
|
|
return @results; |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
# PRIV |
336
|
|
|
|
|
|
|
sub _stat_mod { |
337
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
338
|
|
|
|
|
|
|
my $aa_masses = defined $arg_ref->{aa_masses} ? $arg_ref->{aa_masses} : $self->load_masses(); |
339
|
|
|
|
|
|
|
switch( $self->get_stat_mod() ) { |
340
|
|
|
|
|
|
|
case 'carbamidomethylated' { $aa_masses->{'C'} = $aa_masses->{'C2'} } |
341
|
|
|
|
|
|
|
case 'carboxymethylated' { $aa_masses->{'C'} = $aa_masses->{'C3'} } |
342
|
|
|
|
|
|
|
case 'acrylamid adduct' { $aa_masses->{'C'} = $aa_masses->{'C4'} } |
343
|
|
|
|
|
|
|
case 'oxidized methionine' { $aa_masses->{'M'} = $aa_masses->{'M2'} } |
344
|
|
|
|
|
|
|
} |
345
|
|
|
|
|
|
|
return $aa_masses; |
346
|
|
|
|
|
|
|
} |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
# PRIV |
349
|
|
|
|
|
|
|
sub _has_lc { |
350
|
|
|
|
|
|
|
my ( $self, $str ) = @_; |
351
|
|
|
|
|
|
|
if ( $str =~m/.*[a-z]+.*/ ) { return 1; } else { return 0; } |
352
|
|
|
|
|
|
|
} |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
# PRIV |
355
|
|
|
|
|
|
|
sub _has_uc_last { |
356
|
|
|
|
|
|
|
my ( $self, $str ) = @_; |
357
|
|
|
|
|
|
|
if( $str =~ m/[A-Z]$/ ) { return 1; } else { return 0; } |
358
|
|
|
|
|
|
|
} |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
# PRIV |
361
|
|
|
|
|
|
|
sub _mark_links { |
362
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
363
|
|
|
|
|
|
|
my $sequence = defined $arg_ref->{sequence} ? $arg_ref->{sequence} : $self->get_sequence(); |
364
|
|
|
|
|
|
|
my $sequence_str = $sequence->get_sequence(); |
365
|
|
|
|
|
|
|
my $end = defined $arg_ref->{end} ? $arg_ref->{end} : $self->get_end(); |
366
|
|
|
|
|
|
|
my @amino_acids = split( '', $end ); |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
foreach my $amino_acid ( @amino_acids ) { |
369
|
|
|
|
|
|
|
my $amino_acid_lc = lc($amino_acid); |
370
|
|
|
|
|
|
|
$amino_acid = uc($amino_acid); |
371
|
|
|
|
|
|
|
$sequence_str =~ s/$amino_acid/$amino_acid_lc/g; |
372
|
|
|
|
|
|
|
} |
373
|
|
|
|
|
|
|
$sequence->set_cl_sequence( $sequence_str ); |
374
|
|
|
|
|
|
|
return $sequence; |
375
|
|
|
|
|
|
|
} |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
# UTIL |
378
|
|
|
|
|
|
|
sub insert_run { |
379
|
|
|
|
|
|
|
my ( $self ) = @_; |
380
|
|
|
|
|
|
|
my $enzyme_id = $self->get_enzyme->get_enzyme_id(); |
381
|
|
|
|
|
|
|
my $linker_id = $self->get_linker->get_linker_id(); |
382
|
|
|
|
|
|
|
my $tolerance = $self->get_tolerance(); |
383
|
|
|
|
|
|
|
my $missed_clvg = $self->get_missed_clvg(); |
384
|
|
|
|
|
|
|
my $stat_mod = $self->get_stat_mod(); |
385
|
|
|
|
|
|
|
my $var_mod = $self->get_var_mod(); |
386
|
|
|
|
|
|
|
my $sql = "insert into run_data ( enzyme_id, linker_id, tolerance, missed_clvg, stat_mod, var_mod) values ($enzyme_id, $linker_id, $tolerance, $missed_clvg, '$stat_mod', '$var_mod' )"; |
387
|
|
|
|
|
|
|
$self->sqlexec( $sql ); |
388
|
|
|
|
|
|
|
$sql = 'select LAST_INSERT_ID()'; |
389
|
|
|
|
|
|
|
my ( $run_id ) = $self->sqlexec( $sql, '\@@' ); |
390
|
|
|
|
|
|
|
return $run_id; |
391
|
|
|
|
|
|
|
} |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
# UTIL |
394
|
|
|
|
|
|
|
sub db_trunc { |
395
|
|
|
|
|
|
|
my ( $self ) = @_; |
396
|
|
|
|
|
|
|
warn "ENGINE db_trunc() \n"; |
397
|
|
|
|
|
|
|
$self->sqlexec("truncate table sequences"); |
398
|
|
|
|
|
|
|
$self->sqlexec("truncate table fragments"); |
399
|
|
|
|
|
|
|
$self->sqlexec("truncate table final_fragment_masses"); |
400
|
|
|
|
|
|
|
$self->sqlexec("truncate table run_data"); |
401
|
|
|
|
|
|
|
$self->sqlexec("truncate table file_masses"); |
402
|
|
|
|
|
|
|
$self->sqlexec("truncate table results"); |
403
|
|
|
|
|
|
|
$self->sqlexec("truncate table precursor_masses"); |
404
|
|
|
|
|
|
|
} |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
# UTIL |
407
|
|
|
|
|
|
|
sub get_seq { |
408
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
409
|
|
|
|
|
|
|
my $file = $arg_ref->{file} ? $arg_ref->{file} : ''; |
410
|
|
|
|
|
|
|
my $id = $arg_ref->{id} ? $arg_ref->{id} : 0; |
411
|
|
|
|
|
|
|
my $sequence; |
412
|
|
|
|
|
|
|
if ( -e $file ) { |
413
|
|
|
|
|
|
|
# Guess file format from extension with read_sequence() |
414
|
|
|
|
|
|
|
my $seq_object = read_sequence( $file ); |
415
|
|
|
|
|
|
|
$sequence = $seq_object->seq(); |
416
|
|
|
|
|
|
|
} |
417
|
|
|
|
|
|
|
elsif ( $id ) { |
418
|
|
|
|
|
|
|
# Get sequence from database |
419
|
|
|
|
|
|
|
# TODO |
420
|
|
|
|
|
|
|
} |
421
|
|
|
|
|
|
|
return $sequence; |
422
|
|
|
|
|
|
|
} |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
# sub add_sequence { |
425
|
|
|
|
|
|
|
# my ( $self, $arg_ref ) = @_; |
426
|
|
|
|
|
|
|
# my @sequences = @{$self->get_sequences()}; |
427
|
|
|
|
|
|
|
# push( @sequences, |
428
|
|
|
|
|
|
|
# BioX::CLPM::Sequence->new( { sequence => $arg_ref->{sequence} }, |
429
|
|
|
|
|
|
|
# { sequence_id => $arg_ref->{sequence_id} ? $arg_ref->{sequence_id} : (@$sequences + 1) } ); |
430
|
|
|
|
|
|
|
# $self->set_sequences(\@sequences); |
431
|
|
|
|
|
|
|
# } |
432
|
|
|
|
|
|
|
} |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
1; # Magic true value required at end of module |
435
|
|
|
|
|
|
|
__END__ |