| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Text::WagnerFischer; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 1 |  |  | 1 |  | 996 | use strict; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 37 |  | 
| 4 | 1 |  |  | 1 |  | 5 | use Exporter; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 42 |  | 
| 5 | 1 |  |  | 1 |  | 6 | use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $REFC); | 
|  | 1 |  |  |  |  | 13 |  | 
|  | 1 |  |  |  |  | 1127 |  | 
| 6 |  |  |  |  |  |  |  | 
| 7 |  |  |  |  |  |  | $VERSION     = '0.04'; | 
| 8 |  |  |  |  |  |  | @ISA         = qw(Exporter); | 
| 9 |  |  |  |  |  |  | @EXPORT      = (); | 
| 10 |  |  |  |  |  |  | @EXPORT_OK   = qw(&distance); | 
| 11 |  |  |  |  |  |  | %EXPORT_TAGS = (); | 
| 12 |  |  |  |  |  |  |  | 
| 13 |  |  |  |  |  |  | $REFC=[0,1,1]; | 
| 14 |  |  |  |  |  |  |  | 
| 15 |  |  |  |  |  |  | sub _min { | 
| 16 |  |  |  |  |  |  |  | 
| 17 | 93 |  |  | 93 |  | 108 | my ($first,$second,$third)=@_; | 
| 18 | 93 |  |  |  |  | 98 | my $result=$first; | 
| 19 |  |  |  |  |  |  |  | 
| 20 | 93 | 100 |  |  |  | 283 | $result=$second if ($second < $result); | 
| 21 | 93 | 100 |  |  |  | 141 | $result=$third if ($third < $result); | 
| 22 |  |  |  |  |  |  |  | 
| 23 | 93 |  |  |  |  | 353 | return $result | 
| 24 |  |  |  |  |  |  | } | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | sub _weight { | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | #the cost function | 
| 29 |  |  |  |  |  |  |  | 
| 30 | 279 |  |  | 279 |  | 555 | my ($x,$y,$refc)=@_; | 
| 31 |  |  |  |  |  |  |  | 
| 32 | 279 | 100 | 100 |  |  | 1142 | if ($x eq $y) { | 
|  |  | 100 |  |  |  |  |  | 
| 33 |  |  |  |  |  |  |  | 
| 34 | 27 |  |  |  |  | 65 | return $refc->[0] #cost for letter match | 
| 35 |  |  |  |  |  |  |  | 
| 36 |  |  |  |  |  |  | } elsif (($x eq '-') or ($y eq '-')) { | 
| 37 |  |  |  |  |  |  |  | 
| 38 | 186 |  |  |  |  | 525 | return $refc->[1] #cost for insertion/deletion operation | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  | } else { | 
| 41 |  |  |  |  |  |  |  | 
| 42 | 66 |  |  |  |  | 243 | return $refc->[2] #cost for letter mismatch | 
| 43 |  |  |  |  |  |  | } | 
| 44 |  |  |  |  |  |  | } | 
| 45 |  |  |  |  |  |  |  | 
| 46 |  |  |  |  |  |  | sub distance { | 
| 47 |  |  |  |  |  |  |  | 
| 48 | 5 |  |  | 5 | 0 | 69 | my ($refc,$s,@t)=@_; | 
| 49 |  |  |  |  |  |  |  | 
| 50 | 5 | 100 |  |  |  | 21 | if (!@t) { | 
|  |  | 100 |  |  |  |  |  | 
| 51 |  |  |  |  |  |  |  | 
| 52 | 2 | 50 |  |  |  | 8 | if (ref($refc) ne "ARRAY") { | 
| 53 |  |  |  |  |  |  |  | 
| 54 | 2 | 50 |  |  |  | 5 | if (ref($s) ne "ARRAY") { | 
| 55 |  |  |  |  |  |  |  | 
| 56 |  |  |  |  |  |  | #array cost missing: using default [0,1,1] | 
| 57 |  |  |  |  |  |  |  | 
| 58 | 2 |  |  |  |  | 6 | $t[0]=$s; | 
| 59 | 2 |  |  |  |  | 4 | $s=$refc; | 
| 60 | 2 |  |  |  |  | 4 | $refc=$REFC; | 
| 61 |  |  |  |  |  |  |  | 
| 62 |  |  |  |  |  |  | } else { | 
| 63 |  |  |  |  |  |  |  | 
| 64 | 0 |  |  |  |  | 0 | require Carp; | 
| 65 | 0 |  |  |  |  | 0 | Carp::croak("Text::WagnerFischer: second string is needed"); | 
| 66 |  |  |  |  |  |  | } | 
| 67 |  |  |  |  |  |  |  | 
| 68 |  |  |  |  |  |  | } else { | 
| 69 |  |  |  |  |  |  |  | 
| 70 | 0 |  |  |  |  | 0 | require Carp; | 
| 71 | 0 |  |  |  |  | 0 | Carp::croak("Text::WagnerFischer: second string is needed"); | 
| 72 |  |  |  |  |  |  | } | 
| 73 |  |  |  |  |  |  |  | 
| 74 |  |  |  |  |  |  | } elsif (ref($refc) ne "ARRAY") { | 
| 75 |  |  |  |  |  |  |  | 
| 76 |  |  |  |  |  |  | #array cost missing: using default [0,1,1] | 
| 77 |  |  |  |  |  |  |  | 
| 78 | 1 |  |  |  |  | 4 | unshift @t,$s; | 
| 79 | 1 |  |  |  |  | 1 | $s=$refc; | 
| 80 | 1 |  |  |  |  | 2 | $refc=$REFC; | 
| 81 |  |  |  |  |  |  | } | 
| 82 |  |  |  |  |  |  |  | 
| 83 | 5 |  |  |  |  | 9 | my $n=length($s); | 
| 84 | 5 |  |  |  |  | 7 | my @result; | 
| 85 |  |  |  |  |  |  |  | 
| 86 | 5 |  |  |  |  | 9 | foreach my $t (@t) { | 
| 87 |  |  |  |  |  |  |  | 
| 88 | 9 |  |  |  |  | 10 | my @d; | 
| 89 |  |  |  |  |  |  |  | 
| 90 | 9 |  |  |  |  | 13 | my $m=length($t); | 
| 91 | 9 | 50 |  |  |  | 19 | if(!$n) {push @result,$m*$refc->[1];next} | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
| 92 | 9 | 50 |  |  |  | 16 | if(!$m) {push @result,$n*$refc->[1];next} | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
| 93 |  |  |  |  |  |  |  | 
| 94 | 9 |  |  |  |  | 24 | $d[0][0]=0; | 
| 95 |  |  |  |  |  |  |  | 
| 96 |  |  |  |  |  |  | # original algorithm should be: | 
| 97 |  |  |  |  |  |  | # foreach my $i (1 .. $n) { | 
| 98 |  |  |  |  |  |  | # | 
| 99 |  |  |  |  |  |  | #	my $dist_tmp=0; | 
| 100 |  |  |  |  |  |  | #	foreach my $k (1 .. $i) {$dist_tmp+=_weight(substr($s,$i,1),'-',$refc)} | 
| 101 |  |  |  |  |  |  | #	$d[$i][0]=$dist_tmp; | 
| 102 |  |  |  |  |  |  | # } | 
| 103 |  |  |  |  |  |  | # | 
| 104 |  |  |  |  |  |  | # foreach my $j (1 .. $m) { | 
| 105 |  |  |  |  |  |  | # | 
| 106 |  |  |  |  |  |  | #	my $dist_tmp=0; | 
| 107 |  |  |  |  |  |  | #	foreach my $k (1 .. $j) {$dist_tmp+=_weight('-',substr($t,$j,1),$refc)} | 
| 108 |  |  |  |  |  |  | #	$d[0][$j]=$dist_tmp; | 
| 109 |  |  |  |  |  |  | # } | 
| 110 |  |  |  |  |  |  | # that is: | 
| 111 |  |  |  |  |  |  |  | 
| 112 | 9 |  |  |  |  | 21 | foreach my $i (1 .. $n) {$d[$i][0]=$i*$refc->[1];} | 
|  | 27 |  |  |  |  | 66 |  | 
| 113 | 9 |  |  |  |  | 18 | foreach my $j (1 .. $m) {$d[0][$j]=$j*$refc->[1];} | 
|  | 31 |  |  |  |  | 344 |  | 
| 114 |  |  |  |  |  |  |  | 
| 115 | 9 |  |  |  |  | 17 | foreach my $i (1 .. $n) { | 
| 116 | 27 |  |  |  |  | 45 | my $s_i=substr($s,$i-1,1); | 
| 117 | 27 |  |  |  |  | 45 | foreach my $j (1 .. $m) { | 
| 118 |  |  |  |  |  |  |  | 
| 119 | 93 |  |  |  |  | 280 | my $t_i=substr($t,$j-1,1); | 
| 120 |  |  |  |  |  |  |  | 
| 121 | 93 |  |  |  |  | 182 | $d[$i][$j]=_min($d[$i-1][$j]+_weight($s_i,'-',$refc), | 
| 122 |  |  |  |  |  |  | $d[$i][$j-1]+_weight('-',$t_i,$refc), | 
| 123 |  |  |  |  |  |  | $d[$i-1][$j-1]+_weight($s_i,$t_i,$refc)) | 
| 124 |  |  |  |  |  |  | } | 
| 125 |  |  |  |  |  |  | } | 
| 126 |  |  |  |  |  |  |  | 
| 127 | 9 |  |  |  |  | 38 | push @result,$d[$n][$m]; | 
| 128 |  |  |  |  |  |  | } | 
| 129 |  |  |  |  |  |  |  | 
| 130 | 5 | 100 |  |  |  | 13 | if (wantarray) {return @result} else {return $result[0]} | 
|  | 2 |  |  |  |  | 10 |  | 
|  | 3 |  |  |  |  | 11 |  | 
| 131 |  |  |  |  |  |  | } | 
| 132 |  |  |  |  |  |  |  | 
| 133 |  |  |  |  |  |  | 1; | 
| 134 |  |  |  |  |  |  |  | 
| 135 |  |  |  |  |  |  | __END__ |