File Coverage

blib/lib/Algorithm/MLCS.pm

Criterion	Covered	Total	%
statement	59	59	100.0
branch	12	14	85.7
condition	1	3	33.3
subroutine	6	6	100.0
pod	1	1	100.0
total	79	83	95.1

line	stmt	bran	cond	sub	pod	time	code
1							package Algorithm::MLCS;
2
3	2			2		53490	use strict;
	2					4
	2					83
4	2			2		11	use warnings FATAL => 'all';
	2					4
	2					93
5
6	2			2		11	use vars qw/ $VERSION @ISA @EXPORT /;
	2					15
	2					1540
7
8							require Exporter;
9
10							@ISA = qw/ Exporter /;
11							@EXPORT = qw/ lcs /;
12							$VERSION = '1.02';
13
14							# Gets arrayref of sequences (arrayrefs) and return LCS array in list context
15							# or length of LCS in scalar context
16							sub lcs {
17	6			6	1	329771	my ( @seq, @lcs ) = map { _build_seq($_) } _get_dict( $_[0] );
	110					498
18
19	6		33			2360	while ( @seq && !( grep { !@$_ } @seq ) ) {
	1600					2916
20	89					121	my %dict = ( %{ $seq[0][0] } );
	89					2387
21
22	89					446	for my $s ( @seq[ 1 .. $#seq ] ) {
23	20875	100				69074	%dict = map {
24	21114					35376	$_ => $dict{$_} > $s->[0]{$_}
25							? $s->[0]{$_} : $dict{$_}
26	1511					4935	} grep { $s->[0]{$_} } keys %dict;
27							}
28
29	89	100				351	last unless %dict;
30
31	83					461	push @lcs, ( sort { $dict{$b} <=> $dict{$a} } keys %dict )[0];
	3175					4141
32
33	83					245	for (@seq) {
34	1490	100				3216	while (@$_) { last if @$_ == ( shift @$_ )->{ $lcs[-1] } }
	18116					100563
35							}
36							}
37
38	6	50				5587	wantarray ? @lcs : scalar @lcs;
39							}
40
41							# Auxiliary function that gets single sequence arrayref and
42							# build specific data structure for further processing
43							# in order to find LCS
44							sub _build_seq {
45	110			110		328	my ( $seq, %dict, @seq_st ) = @_;
46
47	110					178	for ( 0 .. $#{$seq} ) { push @{ $dict{ $seq->[$_] } }, $_ }
	110					543
	22010					21599
	22010					52662
48
49	110					234	for my $i ( 0 .. $#{$seq} ) {
	110					308
50	22010					24150	my %tok;
51	22010					76579	for ( keys %dict ) {
52	326087					329331	$tok{$_} = @{$seq} - $dict{$_}[0];
	326087					735816
53	326087	100				883104	if ( $dict{$_}[0] == $i ) {
54	22010					22602	shift @{ $dict{$_} };
	22010					32431
55	22010	100				26520	delete $dict{$_} if !@{ $dict{$_} };
	22010					68860
56							}
57							}
58	22010					81438	$seq_st[$i] = \%tok;
59							}
60
61	110					896	return \@seq_st;
62							}
63
64							# Auxiliary function that gets arrayref of sequences (arrayrefs),
65							# builds dictionary of unique tokens presented in all given sequences
66							# and returns the arrayref of new sequences with only tokens from dictionary
67							sub _get_dict {
68	6			6		17	my $seq = shift;
69	6					12	my %dict = map { $_ => 1 } @{ $seq->[0] };
	1201					2339
	6					23
70
71	6					131	for ( @{$seq}[ 1 .. $#{$seq} ] ) {
	6					26
	6					26
72	104					252	%dict = map { $_ => 1 } grep { $dict{$_} } @$_;
	20809					38365
	20809					32146
73	104	50				3986	last unless %dict;
74							}
75
76	6					23	return map { [ grep { $dict{$_} } @$_ ] } @{$seq};
	110					279
	22010					36407
	6					19
77							}
78
79							1;
80
81							=head1 NAME
82
83							Algorithm::MLCS - Fast heuristic algorithm for finding Longest Common Subsequence
84							of multiple sequences
85
86							=head1 VERSION
87
88							Version 1.02
89
90							=head1 SYNOPSIS
91
92							use Data::Dumper;
93							use Algorithm::MLCS;
94
95							my @seqs = (
96							[ qw/a b c d f g h j q z/ ],
97							[ qw/a b c d f g h j q z/ ],
98							[ qw/a b c x f h j q z/ ],
99							[ qw/a b c f g j q z/ ],
100							);
101
102							my @lcs = lcs( \@seqs );
103							my $lcs_length = lcs( \@seqs );
104							print Dumper( \@lcs );
105
106							=head1 ABSTRACT
107
108							Finding the longest common subsequence (LCS) for the general case of an arbitrary
109							number of input sequences is an NP-hard problem. Algorithm::MLCS implements a fast
110							heuristic algorithm that addresses the general case of multiple sequences.
111							It is able to extract common subsequence that is close to the optimal ones.
112
113							=head1 METHODS
114
115							=head2 lcs ( \@seqs )
116
117							Finds a Longest Common Subsequence of multiple sequences given by @seqs arrayref.
118							Each element of @seqs is arrayref that represents the one of multiple sequences
119							(e.g. [ ['a', 'b', 'c'], ['a', 'c', 'd', 'e'], ... ]). In list context it returns
120							LCS array, in scalar - the length of LCS.
121
122							=head1 SEE ALSO
123
124							Algorithm::LCS
125
126							=head1 AUTHOR
127
128							Slava Moiseev, C<< >>
129
130							=head1 LICENSE AND COPYRIGHT
131
132							Copyright 2012 Slava Moiseev.
133
134							This program is free software; you can redistribute it and/or modify it
135							under the same terms as Perl itself.
136