line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Algorithm::MLCS; |
2
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
53490
|
use strict; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
83
|
|
4
|
2
|
|
|
2
|
|
11
|
use warnings FATAL => 'all'; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
93
|
|
5
|
|
|
|
|
|
|
|
6
|
2
|
|
|
2
|
|
11
|
use vars qw/ $VERSION @ISA @EXPORT /; |
|
2
|
|
|
|
|
15
|
|
|
2
|
|
|
|
|
1540
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
require Exporter; |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
@ISA = qw/ Exporter /; |
11
|
|
|
|
|
|
|
@EXPORT = qw/ lcs /; |
12
|
|
|
|
|
|
|
$VERSION = '1.02'; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
# Gets arrayref of sequences (arrayrefs) and return LCS array in list context |
15
|
|
|
|
|
|
|
# or length of LCS in scalar context |
16
|
|
|
|
|
|
|
sub lcs { |
17
|
6
|
|
|
6
|
1
|
329771
|
my ( @seq, @lcs ) = map { _build_seq($_) } _get_dict( $_[0] ); |
|
110
|
|
|
|
|
498
|
|
18
|
|
|
|
|
|
|
|
19
|
6
|
|
33
|
|
|
2360
|
while ( @seq && !( grep { !@$_ } @seq ) ) { |
|
1600
|
|
|
|
|
2916
|
|
20
|
89
|
|
|
|
|
121
|
my %dict = ( %{ $seq[0][0] } ); |
|
89
|
|
|
|
|
2387
|
|
21
|
|
|
|
|
|
|
|
22
|
89
|
|
|
|
|
446
|
for my $s ( @seq[ 1 .. $#seq ] ) { |
23
|
20875
|
100
|
|
|
|
69074
|
%dict = map { |
24
|
21114
|
|
|
|
|
35376
|
$_ => $dict{$_} > $s->[0]{$_} |
25
|
|
|
|
|
|
|
? $s->[0]{$_} : $dict{$_} |
26
|
1511
|
|
|
|
|
4935
|
} grep { $s->[0]{$_} } keys %dict; |
27
|
|
|
|
|
|
|
} |
28
|
|
|
|
|
|
|
|
29
|
89
|
100
|
|
|
|
351
|
last unless %dict; |
30
|
|
|
|
|
|
|
|
31
|
83
|
|
|
|
|
461
|
push @lcs, ( sort { $dict{$b} <=> $dict{$a} } keys %dict )[0]; |
|
3175
|
|
|
|
|
4141
|
|
32
|
|
|
|
|
|
|
|
33
|
83
|
|
|
|
|
245
|
for (@seq) { |
34
|
1490
|
100
|
|
|
|
3216
|
while (@$_) { last if @$_ == ( shift @$_ )->{ $lcs[-1] } } |
|
18116
|
|
|
|
|
100563
|
|
35
|
|
|
|
|
|
|
} |
36
|
|
|
|
|
|
|
} |
37
|
|
|
|
|
|
|
|
38
|
6
|
50
|
|
|
|
5587
|
wantarray ? @lcs : scalar @lcs; |
39
|
|
|
|
|
|
|
} |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# Auxiliary function that gets single sequence arrayref and |
42
|
|
|
|
|
|
|
# build specific data structure for further processing |
43
|
|
|
|
|
|
|
# in order to find LCS |
44
|
|
|
|
|
|
|
sub _build_seq { |
45
|
110
|
|
|
110
|
|
328
|
my ( $seq, %dict, @seq_st ) = @_; |
46
|
|
|
|
|
|
|
|
47
|
110
|
|
|
|
|
178
|
for ( 0 .. $#{$seq} ) { push @{ $dict{ $seq->[$_] } }, $_ } |
|
110
|
|
|
|
|
543
|
|
|
22010
|
|
|
|
|
21599
|
|
|
22010
|
|
|
|
|
52662
|
|
48
|
|
|
|
|
|
|
|
49
|
110
|
|
|
|
|
234
|
for my $i ( 0 .. $#{$seq} ) { |
|
110
|
|
|
|
|
308
|
|
50
|
22010
|
|
|
|
|
24150
|
my %tok; |
51
|
22010
|
|
|
|
|
76579
|
for ( keys %dict ) { |
52
|
326087
|
|
|
|
|
329331
|
$tok{$_} = @{$seq} - $dict{$_}[0]; |
|
326087
|
|
|
|
|
735816
|
|
53
|
326087
|
100
|
|
|
|
883104
|
if ( $dict{$_}[0] == $i ) { |
54
|
22010
|
|
|
|
|
22602
|
shift @{ $dict{$_} }; |
|
22010
|
|
|
|
|
32431
|
|
55
|
22010
|
100
|
|
|
|
26520
|
delete $dict{$_} if !@{ $dict{$_} }; |
|
22010
|
|
|
|
|
68860
|
|
56
|
|
|
|
|
|
|
} |
57
|
|
|
|
|
|
|
} |
58
|
22010
|
|
|
|
|
81438
|
$seq_st[$i] = \%tok; |
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
|
61
|
110
|
|
|
|
|
896
|
return \@seq_st; |
62
|
|
|
|
|
|
|
} |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
# Auxiliary function that gets arrayref of sequences (arrayrefs), |
65
|
|
|
|
|
|
|
# builds dictionary of unique tokens presented in all given sequences |
66
|
|
|
|
|
|
|
# and returns the arrayref of new sequences with only tokens from dictionary |
67
|
|
|
|
|
|
|
sub _get_dict { |
68
|
6
|
|
|
6
|
|
17
|
my $seq = shift; |
69
|
6
|
|
|
|
|
12
|
my %dict = map { $_ => 1 } @{ $seq->[0] }; |
|
1201
|
|
|
|
|
2339
|
|
|
6
|
|
|
|
|
23
|
|
70
|
|
|
|
|
|
|
|
71
|
6
|
|
|
|
|
131
|
for ( @{$seq}[ 1 .. $#{$seq} ] ) { |
|
6
|
|
|
|
|
26
|
|
|
6
|
|
|
|
|
26
|
|
72
|
104
|
|
|
|
|
252
|
%dict = map { $_ => 1 } grep { $dict{$_} } @$_; |
|
20809
|
|
|
|
|
38365
|
|
|
20809
|
|
|
|
|
32146
|
|
73
|
104
|
50
|
|
|
|
3986
|
last unless %dict; |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
|
76
|
6
|
|
|
|
|
23
|
return map { [ grep { $dict{$_} } @$_ ] } @{$seq}; |
|
110
|
|
|
|
|
279
|
|
|
22010
|
|
|
|
|
36407
|
|
|
6
|
|
|
|
|
19
|
|
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
1; |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=head1 NAME |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
Algorithm::MLCS - Fast heuristic algorithm for finding Longest Common Subsequence |
84
|
|
|
|
|
|
|
of multiple sequences |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head1 VERSION |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
Version 1.02 |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=head1 SYNOPSIS |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
use Data::Dumper; |
93
|
|
|
|
|
|
|
use Algorithm::MLCS; |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
my @seqs = ( |
96
|
|
|
|
|
|
|
[ qw/a b c d f g h j q z/ ], |
97
|
|
|
|
|
|
|
[ qw/a b c d f g h j q z/ ], |
98
|
|
|
|
|
|
|
[ qw/a b c x f h j q z/ ], |
99
|
|
|
|
|
|
|
[ qw/a b c f g j q z/ ], |
100
|
|
|
|
|
|
|
); |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
my @lcs = lcs( \@seqs ); |
103
|
|
|
|
|
|
|
my $lcs_length = lcs( \@seqs ); |
104
|
|
|
|
|
|
|
print Dumper( \@lcs ); |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=head1 ABSTRACT |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
Finding the longest common subsequence (LCS) for the general case of an arbitrary |
109
|
|
|
|
|
|
|
number of input sequences is an NP-hard problem. Algorithm::MLCS implements a fast |
110
|
|
|
|
|
|
|
heuristic algorithm that addresses the general case of multiple sequences. |
111
|
|
|
|
|
|
|
It is able to extract common subsequence that is close to the optimal ones. |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=head1 METHODS |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=head2 lcs ( \@seqs ) |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
Finds a Longest Common Subsequence of multiple sequences given by @seqs arrayref. |
118
|
|
|
|
|
|
|
Each element of @seqs is arrayref that represents the one of multiple sequences |
119
|
|
|
|
|
|
|
(e.g. [ ['a', 'b', 'c'], ['a', 'c', 'd', 'e'], ... ]). In list context it returns |
120
|
|
|
|
|
|
|
LCS array, in scalar - the length of LCS. |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=head1 SEE ALSO |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
Algorithm::LCS |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=head1 AUTHOR |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
Slava Moiseev, C<< >> |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
Copyright 2012 Slava Moiseev. |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
135
|
|
|
|
|
|
|
under the same terms as Perl itself. |
136
|
|
|
|
|
|
|
|