File Coverage

blib/lib/Sort/Sub/by_similarity_using_editdist.pm

Criterion	Covered	Total	%
statement	8	27	29.6
branch	0	14	0.0
condition			n/a
subroutine	3	8	37.5
pod	0	2	0.0
total	11	51	21.5

line	stmt	bran	sub	pod	time	code
1						package Sort::Sub::by_similarity_using_editdist;
2
3	1		1		418007	use 5.010001;
	1				4
4	1		1		4	use strict;
	1				2
	1				21
5	1		1		4	use warnings;
	1				5
	1				460
6
7						our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
8						our $DATE = '2024-01-12'; # DATE
9						our $DIST = 'Sort-SubBundle-BySimilarity'; # DIST
10						our $VERSION = '0.001'; # VERSION
11
12						sub __min(@) { ## no critic: Subroutines::ProhibitSubroutinePrototypes
13	0		0			my $m = $_[0];
14	0					for (@_) {
15	0	0				$m = $_ if $_ < $m;
16						}
17	0					$m;
18						}
19
20						sub __editdist {
21	0		0			my @a = split //, shift;
22	0					my @b = split //, shift;
23
24						# There is an extra row and column in the matrix. This is the distance from
25						# the empty string to a substring of the target.
26	0					my @d;
27	0					$d[$_][0] = $_ for 0 .. @a;
28	0					$d[0][$_] = $_ for 0 .. @b;
29
30	0					for my $i (1 .. @a) {
31	0					for my $j (1 .. @b) {
32	0	0				$d[$i][$j] = (
33						$a[$i-1] eq $b[$j-1]
34						? $d[$i-1][$j-1]
35						: 1 + __min(
36						$d[$i-1][$j],
37						$d[$i][$j-1],
38						$d[$i-1][$j-1]
39						)
40						);
41						}
42						}
43
44	0					$d[@a][@b];
45						}
46
47						sub meta {
48						return {
49	0		0	0		v => 1,
50						summary => 'Sort strings by similarity to target string (most similar first)',
51						description => <<'MARKDOWN',
52
53						MARKDOWN
54						args => {
55						string => {
56						schema => 'str*',
57						req => 1,
58						},
59						},
60						};
61						}
62
63						sub gen_sorter {
64	0		0	0		my ($is_reverse, $is_ci, $args) = @_;
65
66						sub {
67	0	0	0			my $dist_a = __editdist(($is_ci ? lc($_[0]) : $_[0]), ($is_ci ? lc($args->{string}) : $args->{string}));
		0
68	0	0				my $dist_b = __editdist(($is_ci ? lc($_[1]) : $_[1]), ($is_ci ? lc($args->{string}) : $args->{string}));
		0
69	0	0				my $cmp = ($is_reverse ? -1:1) * ($dist_a <=> $dist_b);
70	0					};
71						}
72
73						1;
74						# ABSTRACT: Sort strings by similarity to target string (most similar first)
75
76						__END__