| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Lingua::EN::SimilarNames::Levenshtein; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 1 |  |  | 1 |  | 2029 | use MooseX::Declare; | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 4 |  |  |  |  |  |  | use Text::LevenshteinXS qw(distance); | 
| 5 |  |  |  |  |  |  | use Math::Combinatorics; | 
| 6 |  |  |  |  |  |  | use strict; | 
| 7 |  |  |  |  |  |  | use warnings; | 
| 8 |  |  |  |  |  |  | use 5.010; | 
| 9 |  |  |  |  |  |  |  | 
| 10 |  |  |  |  |  |  | our $VERSION = '0.10'; | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | =head1 Name | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | Lingua::EN::SimilarNames::Levenshtein - Compare people first and last names. | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | =head1 Synopsis | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  | my $people = [ | 
| 19 |  |  |  |  |  |  | [ 'John',     'Wayne' ], | 
| 20 |  |  |  |  |  |  | [ 'Sundance', 'Kid' ], | 
| 21 |  |  |  |  |  |  | [ 'Jose',     'Wales' ], | 
| 22 |  |  |  |  |  |  | [ 'John',     'Wall' ], | 
| 23 |  |  |  |  |  |  | ]; | 
| 24 |  |  |  |  |  |  |  | 
| 25 |  |  |  |  |  |  | my @people_objects = map { | 
| 26 |  |  |  |  |  |  | Person->new( | 
| 27 |  |  |  |  |  |  | first_name => $_->[0], | 
| 28 |  |  |  |  |  |  | last_name  => $_->[1], | 
| 29 |  |  |  |  |  |  | ) | 
| 30 |  |  |  |  |  |  | } @{$people}; | 
| 31 |  |  |  |  |  |  |  | 
| 32 |  |  |  |  |  |  | # Build list of name pairs within 5 character edits of each other | 
| 33 |  |  |  |  |  |  | my $similar_people = SimilarNames->new( | 
| 34 |  |  |  |  |  |  | list_of_people   => \@people_objects, | 
| 35 |  |  |  |  |  |  | maximum_distance => 5 | 
| 36 |  |  |  |  |  |  | ); | 
| 37 |  |  |  |  |  |  |  | 
| 38 |  |  |  |  |  |  | # Get the people name pairs as an ArrayRef[ArrayRef[ArrayRef[Str]]] | 
| 39 |  |  |  |  |  |  | print Dumper $similar_people->list_of_similar_name_pairs; | 
| 40 |  |  |  |  |  |  | # which results in: | 
| 41 |  |  |  |  |  |  | [ | 
| 42 |  |  |  |  |  |  | [ [ "Jose", "Wales" ], [ "John", "Wall" ] ], | 
| 43 |  |  |  |  |  |  | [ [ "Jose", "Wales" ], [ "John", "Wayne" ] ], | 
| 44 |  |  |  |  |  |  | [ [ "John", "Wall" ],  [ "John", "Wayne" ] ] | 
| 45 |  |  |  |  |  |  | ] | 
| 46 |  |  |  |  |  |  |  | 
| 47 |  |  |  |  |  |  | =head1 Description | 
| 48 |  |  |  |  |  |  |  | 
| 49 |  |  |  |  |  |  | Given a list of people objects, find the people whose names are within a | 
| 50 |  |  |  |  |  |  | specified edit distance. | 
| 51 |  |  |  |  |  |  |  | 
| 52 |  |  |  |  |  |  | =cut | 
| 53 |  |  |  |  |  |  |  | 
| 54 |  |  |  |  |  |  | =head1 Classes | 
| 55 |  |  |  |  |  |  |  | 
| 56 |  |  |  |  |  |  | =head2 Person | 
| 57 |  |  |  |  |  |  |  | 
| 58 |  |  |  |  |  |  | This class defines people objects with first and last name attributes. | 
| 59 |  |  |  |  |  |  |  | 
| 60 |  |  |  |  |  |  | =cut | 
| 61 |  |  |  |  |  |  |  | 
| 62 |  |  |  |  |  |  | class Person { | 
| 63 |  |  |  |  |  |  | has 'first_name' => (isa => 'Str', is => 'ro', default => ''); | 
| 64 |  |  |  |  |  |  | has 'last_name'  => (isa => 'Str', is => 'ro', default => ''); | 
| 65 |  |  |  |  |  |  | has 'full_name'  => ( | 
| 66 |  |  |  |  |  |  | isa        => 'Str', | 
| 67 |  |  |  |  |  |  | is         => 'ro', | 
| 68 |  |  |  |  |  |  | lazy_build => 1, | 
| 69 |  |  |  |  |  |  | ); | 
| 70 |  |  |  |  |  |  |  | 
| 71 |  |  |  |  |  |  | method say_name() { | 
| 72 |  |  |  |  |  |  | say $self->full_name; | 
| 73 |  |  |  |  |  |  | } | 
| 74 |  |  |  |  |  |  |  | 
| 75 |  |  |  |  |  |  | method _build_full_name { | 
| 76 |  |  |  |  |  |  | return $self->first_name . ' ' . $self->last_name; | 
| 77 |  |  |  |  |  |  | } | 
| 78 |  |  |  |  |  |  | } | 
| 79 |  |  |  |  |  |  |  | 
| 80 |  |  |  |  |  |  | =head2 CompareTwoNames | 
| 81 |  |  |  |  |  |  |  | 
| 82 |  |  |  |  |  |  | This class defines comparator objects.  Given two Person objects, | 
| 83 |  |  |  |  |  |  | it computes the edit distance between their names. | 
| 84 |  |  |  |  |  |  |  | 
| 85 |  |  |  |  |  |  | =cut | 
| 86 |  |  |  |  |  |  |  | 
| 87 |  |  |  |  |  |  | class CompareTwoNames { | 
| 88 |  |  |  |  |  |  | has 'one_person'     => (isa => 'Person', is => 'rw'); | 
| 89 |  |  |  |  |  |  | has 'another_person' => (isa => 'Person', is => 'rw'); | 
| 90 |  |  |  |  |  |  | has 'distance_between' => ( | 
| 91 |  |  |  |  |  |  | isa        => 'Int', | 
| 92 |  |  |  |  |  |  | is         => 'ro', | 
| 93 |  |  |  |  |  |  | lazy_build => 1, | 
| 94 |  |  |  |  |  |  | ); | 
| 95 |  |  |  |  |  |  |  | 
| 96 |  |  |  |  |  |  | method _build_distance_between() { | 
| 97 |  |  |  |  |  |  | return Text::LevenshteinXS::distance($self->one_person->first_name, | 
| 98 |  |  |  |  |  |  | $self->another_person->first_name) + | 
| 99 |  |  |  |  |  |  | Text::LevenshteinXS::distance($self->one_person->last_name, | 
| 100 |  |  |  |  |  |  | $self->another_person->last_name); | 
| 101 |  |  |  |  |  |  | }; | 
| 102 |  |  |  |  |  |  | } | 
| 103 |  |  |  |  |  |  |  | 
| 104 |  |  |  |  |  |  | =head2 SimilarNames | 
| 105 |  |  |  |  |  |  |  | 
| 106 |  |  |  |  |  |  | This class takes a list of Person objects and uses CompareTwoNames to | 
| 107 |  |  |  |  |  |  | generate a list of people with similar names based on an edit distance range. | 
| 108 |  |  |  |  |  |  |  | 
| 109 |  |  |  |  |  |  | One can get at the list of Person object pairs with similar name via the | 
| 110 |  |  |  |  |  |  | C<list_of_people_with_similar_names> attribute.  Alternatively, one can | 
| 111 |  |  |  |  |  |  | get at list of the names pairs themselves (no Person object) via the | 
| 112 |  |  |  |  |  |  | C<list_of_similar_name_pairs> attribute. | 
| 113 |  |  |  |  |  |  |  | 
| 114 |  |  |  |  |  |  | =cut | 
| 115 |  |  |  |  |  |  |  | 
| 116 |  |  |  |  |  |  | class SimilarNames { | 
| 117 |  |  |  |  |  |  | has 'list_of_people' => ( | 
| 118 |  |  |  |  |  |  | isa        => 'ArrayRef[Person]', | 
| 119 |  |  |  |  |  |  | is         => 'ro', | 
| 120 |  |  |  |  |  |  | lazy_build => 1 | 
| 121 |  |  |  |  |  |  | ); | 
| 122 |  |  |  |  |  |  | has 'minimum_distance' => (isa => 'Int', is => 'rw', default => 1); | 
| 123 |  |  |  |  |  |  | has 'maximum_distance' => (isa => 'Int', is => 'rw', default => 3); | 
| 124 |  |  |  |  |  |  | has 'list_of_people_with_similar_names' => ( | 
| 125 |  |  |  |  |  |  | isa        => 'ArrayRef[ArrayRef[Person]]', | 
| 126 |  |  |  |  |  |  | is         => 'ro', | 
| 127 |  |  |  |  |  |  | lazy_build => 1 | 
| 128 |  |  |  |  |  |  | ); | 
| 129 |  |  |  |  |  |  | has 'list_of_similar_name_pairs' => ( | 
| 130 |  |  |  |  |  |  | isa        => 'ArrayRef[ArrayRef[ArrayRef[Str]]]', | 
| 131 |  |  |  |  |  |  | is         => 'ro', | 
| 132 |  |  |  |  |  |  | lazy_build => 1 | 
| 133 |  |  |  |  |  |  | ); | 
| 134 |  |  |  |  |  |  |  | 
| 135 |  |  |  |  |  |  | method _build_list_of_people_with_similar_names() { | 
| 136 |  |  |  |  |  |  | my $people_tuples = Math::Combinatorics->new( | 
| 137 |  |  |  |  |  |  | count => 2,                       # This could be abstracted | 
| 138 |  |  |  |  |  |  | data  => $self->list_of_people, | 
| 139 |  |  |  |  |  |  | ); | 
| 140 |  |  |  |  |  |  | my @list_of_people_with_similar_names; | 
| 141 |  |  |  |  |  |  | while (my ($first_person, $second_person) = $people_tuples->next_combination()) { | 
| 142 |  |  |  |  |  |  | my $name_comparison = CompareTwoNames->new( | 
| 143 |  |  |  |  |  |  | one_person     => $first_person, | 
| 144 |  |  |  |  |  |  | another_person => $second_person, | 
| 145 |  |  |  |  |  |  | ); | 
| 146 |  |  |  |  |  |  | my $distance_between_names = $name_comparison->distance_between(); | 
| 147 |  |  |  |  |  |  | if (   ($distance_between_names >= $self->minimum_distance) | 
| 148 |  |  |  |  |  |  | && ($distance_between_names <= $self->maximum_distance)) | 
| 149 |  |  |  |  |  |  | { | 
| 150 |  |  |  |  |  |  | push @list_of_people_with_similar_names, [ $first_person, $second_person ]; | 
| 151 |  |  |  |  |  |  | } | 
| 152 |  |  |  |  |  |  | } | 
| 153 |  |  |  |  |  |  |  | 
| 154 |  |  |  |  |  |  | return \@list_of_people_with_similar_names | 
| 155 |  |  |  |  |  |  | }; | 
| 156 |  |  |  |  |  |  |  | 
| 157 |  |  |  |  |  |  | method _build_list_of_similar_name_pairs() { | 
| 158 |  |  |  |  |  |  | my @list_of_similar_name_pairs; | 
| 159 |  |  |  |  |  |  | foreach my $pair_of_people (@{ $self->list_of_people_with_similar_names }) { | 
| 160 |  |  |  |  |  |  | push @list_of_similar_name_pairs, | 
| 161 |  |  |  |  |  |  | [ | 
| 162 |  |  |  |  |  |  | [ $pair_of_people->[0]->first_name, $pair_of_people->[0]->last_name ], | 
| 163 |  |  |  |  |  |  | [ $pair_of_people->[1]->first_name, $pair_of_people->[1]->last_name ] | 
| 164 |  |  |  |  |  |  | ]; | 
| 165 |  |  |  |  |  |  | } | 
| 166 |  |  |  |  |  |  | return \@list_of_similar_name_pairs | 
| 167 |  |  |  |  |  |  | }; | 
| 168 |  |  |  |  |  |  | } | 
| 169 |  |  |  |  |  |  |  | 
| 170 |  |  |  |  |  |  | __END__ | 
| 171 |  |  |  |  |  |  |  | 
| 172 |  |  |  |  |  |  | =head1 Accessors | 
| 173 |  |  |  |  |  |  |  | 
| 174 |  |  |  |  |  |  | =head2 list_of_similar_name_pairs | 
| 175 |  |  |  |  |  |  |  | 
| 176 |  |  |  |  |  |  | This is called on a SimilarNames object to return a list of similar | 
| 177 |  |  |  |  |  |  | name pairs for the list of Person objects passed in.  It uses the Levenshtein | 
| 178 |  |  |  |  |  |  | edit distance.  This means the names are close to one another in spelling. | 
| 179 |  |  |  |  |  |  |  | 
| 180 |  |  |  |  |  |  | =head2 list_of_people_with_similar_names | 
| 181 |  |  |  |  |  |  |  | 
| 182 |  |  |  |  |  |  | This accessor is similar to the C<list_of_similar_name_pairs> but returns a | 
| 183 |  |  |  |  |  |  | list of Person object pairs instead of the names. | 
| 184 |  |  |  |  |  |  |  | 
| 185 |  |  |  |  |  |  | =head1 Authors | 
| 186 |  |  |  |  |  |  |  | 
| 187 |  |  |  |  |  |  | Mateu X. Hunter C<hunter@missoula.org> | 
| 188 |  |  |  |  |  |  |  | 
| 189 |  |  |  |  |  |  | =head1 Copyright | 
| 190 |  |  |  |  |  |  |  | 
| 191 |  |  |  |  |  |  | Copyright 2010, Mateu X. Hunter | 
| 192 |  |  |  |  |  |  |  | 
| 193 |  |  |  |  |  |  | =head1 License | 
| 194 |  |  |  |  |  |  |  | 
| 195 |  |  |  |  |  |  | You may distribute this code under the same terms as Perl itself. | 
| 196 |  |  |  |  |  |  |  | 
| 197 |  |  |  |  |  |  | =head1 Code Repository | 
| 198 |  |  |  |  |  |  |  | 
| 199 |  |  |  |  |  |  | http://github.com/mateu/Lingua-EN-SimilarNames-Levenshtein | 
| 200 |  |  |  |  |  |  |  | 
| 201 |  |  |  |  |  |  | =cut | 
| 202 |  |  |  |  |  |  |  | 
| 203 |  |  |  |  |  |  | 1 |