File Coverage

blib/lib/Algorithm/SixDegrees.pm
Criterion Covered Total %
statement 169 189 89.4
branch 96 124 77.4
condition 35 54 64.8
subroutine 12 12 100.0
pod 6 6 100.0
total 318 385 82.6


line stmt bran cond sub pod time code
1             package Algorithm::SixDegrees;
2              
3             require 5.006;
4 5     5   112786 use warnings;
  5         11  
  5         156  
5 5     5   27 use strict;
  5         11  
  5         178  
6 5     5   5559 use UNIVERSAL qw(isa);
  5         81  
  5         33  
7              
8             =head1 NAME
9              
10             Algorithm::SixDegrees - Find a path through linked elements in a set
11              
12             =head1 VERSION
13              
14             Version 0.03
15              
16             =cut
17              
18             our $VERSION = '0.03';
19             our $ERROR = '';
20              
21             =head1 SYNOPSIS
22              
23             use Algorithm::SixDegrees;
24              
25             my $sd1 = Algorithm::SixDegrees->new();
26             $sd1->data_source( actors => \&starred_in );
27             $sd1->data_source( movies => \&stars_of );
28             @elems = $sd1->make_link('actors', 'Tom Cruise', 'Kevin Bacon');
29              
30             my $sd2 = Algorithm::SixDegrees->new();
31             $sd2->forward_data_source( friends => \&friends, @args );
32             $sd2->reverse_data_source( friends => \&friend_of, @args );
33             @elems = $sd2->make_link('friends', 'Bob', 'Mark');
34              
35             =head1 DESCRIPTION
36              
37             C is a Perl implementation of a breadth-first
38             search through a set of linked elements in order to find the shortest
39             possible chain linking two specific elements together.
40              
41             In simpler terms, this module will take a bunch of related items and
42             attempt to find a relationship between two of them. It looks for the
43             shortest (and generally, simplest) relationship it can find.
44              
45             =head1 CONSTRUCTOR
46              
47             =head2 new()
48              
49             C requires use as an object; it can't (yet) be used
50             as a stand-alone module. C takes no arguments, however.
51              
52             =cut
53              
54             sub new {
55 10     10 1 90 my $class = shift;
56 10         56 my $self = {
57             _source_left => {},
58             _source_right => {},
59             _sources => [],
60             _investigated => {},
61             };
62 10         46 return bless $self,$class;
63             }
64              
65             =head1 FUNCTIONS
66              
67             =head2 forward_data_source( name => \&sub, @args );
68              
69             Tells C that all items in the data set relating to
70             C can be retrieved by calling C. See L.
71              
72             In our friends example above, if Bob considers Mark a friend, but Mark
73             doesn't consider Bob a friend, calling the sub with "Bob" as an argument
74             should return "Mark", but calling the sub with "Mark" as an argument
75             should not return "Bob".
76              
77             =cut
78              
79             sub forward_data_source {
80 23     23 1 5653 my ($self, $name, $sub, @args) = @_;
81 23 100       120 die "Data sources must be named\n" unless defined($name);
82 19 100       70 die "Data sources must have code supplied\n" unless defined($sub);
83 17 100 100     166 die "Data sources must have a coderef argument\n" unless ref($sub) && isa($sub,'CODE');
84 13         53 $self->{'_source_left'}{$name}{'sub'} = $sub;
85 13         36 $self->{'_source_left'}{$name}{'args'} = \@args;
86 13         17 foreach my $source (@{$self->{'_sources'}}) {
  13         35  
87 6 100       24 return if $source eq $name;
88             }
89 12         23 push(@{$self->{'_sources'}},$name);
  12         27  
90 12         25 return;
91             }
92              
93             =head2 reverse_data_source( name => \&sub, @args );
94              
95             Tells C that all items in the data set related to
96             by C can be retrieved by calling C. See L.
97              
98             In the same friends example, calling the sub with "Bob" as an argument
99             should not return "Mark", but calling the sub with "Mark" as an argument
100             should return "Bob".
101              
102             =cut
103              
104             sub reverse_data_source {
105 18     18 1 5106 my ($self, $name, $sub, @args) = @_;
106 18 100       83 die "Data sources must be named\n" unless defined($name);
107 16 100       48 die "Data sources must have code supplied\n" unless defined($sub);
108 15 100 100     108 die "Data sources must have a coderef argument\n" unless ref($sub) && isa($sub,'CODE');
109 13         42 $self->{'_source_right'}{$name}{'sub'} = $sub;
110 13         29 $self->{'_source_right'}{$name}{'args'} = \@args;
111 13         20 foreach my $source (@{$self->{'_sources'}}) {
  13         28  
112 17 100       54 return if $source eq $name;
113             }
114 1         2 push(@{$self->{'_sources'}},$name);
  1         4  
115 1         3 return;
116             }
117              
118             =head2 data_source( name => \&sub, @args );
119              
120             Sets up a data source as both forward and reverse. This is useful if
121             the data source is mutually relational; that is, in our actors/movies
122             example, Kevin Bacon is always in Mystic River, and Mystic River always
123             has Kevin Bacon in it.
124              
125             =cut
126              
127             sub data_source {
128 15     15 1 4903 my ($self, $name, $sub, @args) = @_;
129 15         40 $self->forward_data_source($name,$sub,@args);
130 10         28 $self->reverse_data_source($name,$sub,@args);
131 10         22 return;
132             }
133              
134             =head2 make_link
135              
136             Does the work of making the link. Returns a list or arrayref, based
137             on calling context.
138              
139             =cut
140              
141             sub make_link {
142 35     35 1 564 my ($self, $mainsource, $start, $end) = @_;
143 35         48 $ERROR = undef;
144              
145 35 100 66     219 unless (ref($self) && isa($self,__PACKAGE__)) {
146 2         3 $ERROR = 'Invalid object reference used to call make_link';
147 2         10 return;
148             }
149 33 100       69 unless (defined($mainsource)) {
150 1         3 $ERROR = 'Data set name is not defined';
151 1         5 return;
152             }
153 32 100       64 unless (defined($start)) {
154 1         3 $ERROR = 'Starting identifier is not defined';
155 1         7 return;
156             }
157 31 100       67 unless (defined($end)) {
158 1         2 $ERROR = 'Ending identifier is not defined';
159 1         5 return;
160             }
161              
162             # Assume working from "left to right"; therefore, links leading
163             # from the starting identifier are on the "left side", and links
164             # leading to the ending identifier are on the "right side".
165 30         55 my %leftside = ();
166 30         41 my %rightside = ();
167              
168             # If $altsource gets defined, that means there are two sources used.
169 30         34 my $altsource;
170              
171 30 100 100     184 unless (exists($self->{'_sources'}) && isa($self->{'_sources'},'ARRAY')) {
172 2         4 $ERROR = 'No data sources defined';
173 2         14 return;
174             }
175 28         37 my @sources = @{$self->{'_sources'}};
  28         72  
176 28         40 my $source_exists = 0;
177 28         66 foreach my $source (@sources) {
178 39 100       87 if ($mainsource eq $source) {
179 26         32 $source_exists = 1;
180 26         154 $leftside{$source} = {$start, undef};
181 26         70 $rightside{$source} = {$end, undef};
182             } else {
183 13         17 $altsource = $source;
184 13         25 $leftside{$source} = {};
185 13         64 $rightside{$source} = {};
186             }
187 39 100 100     533 unless (ref($self->{'_source_left'}) &&
      66        
188             ref($self->{'_source_left'}{$source}) &&
189             isa($self->{'_source_left'}{$source}{'sub'},'CODE')) {
190 2         5 $ERROR = "Source '$source' does not have a valid forward subroutine";
191 2         13 return;
192             }
193 37 100 100     856 unless (ref($self->{'_source_right'}) &&
      66        
194             ref($self->{'_source_right'}{$source}) &&
195             isa($self->{'_source_right'}{$source}{'sub'},'CODE')) {
196 2         6 $ERROR = "Source '$source' does not have a valid reverse subroutine";
197 2         15 return;
198             }
199 35         121 $self->{'_investigated'}{$source} = {};
200             }
201 24 100       241 unless ($source_exists) {
202 2         17 $ERROR = "Source '$mainsource' was not defined";
203 2         11 return;
204             }
205 22 100       56 if (scalar(keys(%leftside)) > 2) {
206 1         2 $ERROR = 'Too many defined data sources; maximum is 2';
207 1         7 return;
208             }
209              
210              
211 21 100       47 if ($start eq $end) {
212             # Only one element if the start and end are the same.
213 5 100       46 return wantarray ? ($start) : [$start];
214             }
215              
216 16         22 my $leftcount = 1;
217 16         22 my $rightcount = 1;
218              
219             # If altsource exists, pull the left side main, then pull the right side main,
220             # and check for middle matches. This reduces database hits as opposed to
221             # where it's pulled left main - left alt; left alt >= 1 at that point, whereas
222             # right main on the first loop == 1. Following that, pull the left alt and
223             # then the right alt, which gets the CHAINLOOP back in synch.
224              
225 16 100       45 if (defined($altsource)) {
226 8         29 my ($count,$id,$err) = $self->_match('left',$mainsource,$altsource,\%leftside,\%rightside);
227 8 50       40 if (defined($err)) { $ERROR = $err; return; };
  0         0  
  0         0  
228 8 50       18 if (defined($id)) { $ERROR = 'Internal error, id cannot match here'; return; };
  0         0  
  0         0  
229 8 50 33     38 return if !defined($count) || $count == 0;
230              
231 8         23 ($count,$id,$err) = $self->_match('right',$mainsource,$altsource,\%rightside,\%leftside);
232 8 50       21 if (defined($err)) { $ERROR = $err; return; };
  0         0  
  0         0  
233 8 100       20 if (defined($id)) {
234 2         8 my @abc = ($leftside{$altsource}{$id},$id,$rightside{$altsource}{$id});
235 2 50       28 return wantarray ? @abc : \@abc;
236             };
237 6 100 66     49 return if !defined($count) || $count == 0;
238              
239 4         12 ($leftcount,$id,$err) = $self->_match('left',$altsource,$mainsource,\%leftside,\%rightside);
240 4 50       17 if (defined($err)) { $ERROR = $err; return; };
  0         0  
  0         0  
241 4 50       11 if (defined($id)) { $ERROR = 'Internal error, id cannot match here'; return; };
  0         0  
  0         0  
242 4 50 33     27 return if !defined($leftcount) || $leftcount == 0;
243              
244 4         14 ($rightcount,$id,$err) = $self->_match('right',$altsource,$mainsource,\%rightside,\%leftside);
245 4 50       14 if (defined($err)) { $ERROR = $err; return; };
  0         0  
  0         0  
246 4 100       20 if (defined($id)) {
247 2         7 my $la = $leftside{$mainsource}{$id};
248 2         5 my $lm = $leftside{$altsource}{$la};
249 2         5 my $ra = $rightside{$mainsource}{$id};
250 2         4 my $rm = $rightside{$altsource}{$ra};
251 2 50 33     59 unless (defined($la) && defined($lm) && defined($ra) && defined($rm)) {
      33        
      33        
252 0         0 $ERROR = 'Internal error, identifier not defined';
253 0         0 return;
254             }
255 2 50       31 return wantarray ? ($lm,$la,$id,$ra,$rm) : [$lm,$la,$id,$ra,$rm];
256             };
257 2 50 33     17 return if !defined($rightcount) || $rightcount == 0;
258              
259             }
260              
261             # There is bias here, but the tie needs to be broken, so in the
262             # event of a tie, move left to right in the chain.
263              
264             CHAINLOOP: {
265 10         13 my $id;
  45         66  
266             my $err;
267 45 100       101 if ($leftcount <= $rightcount) {
268 42 100       75 if (defined($altsource)) {
269 4         16 ($leftcount,$id,$err) = $self->_match_two('left',$mainsource,$altsource,\%leftside,\%rightside);
270             } else {
271 38         101 ($leftcount,$id,$err) = $self->_match_one('left',$mainsource,\%leftside,\%rightside);
272             }
273             } else {
274 3 100       10 if (defined($altsource)) {
275 2         10 ($rightcount,$id,$err) = $self->_match_two('right',$mainsource,$altsource,\%rightside,\%leftside);
276             } else {
277 1         3 ($rightcount,$id,$err) = $self->_match_one('right',$mainsource,\%rightside,\%leftside);
278             }
279             }
280 45 50       124 if(defined($err)) {
281 0         0 $ERROR = $err;
282 0         0 return;
283             }
284 45 100       84 if(defined($id)) {
285             # If _match returns an id, that means a match was found.
286             # To get it, we simply have to trace out from the "middle"
287             # to get the full link.
288 7         16 my @match = ($id);
289             # middle, building to left.
290 7         18 while($match[0] ne $start) {
291 33         77 unshift(@match,$leftside{$mainsource}{$match[0]});
292 33 100       160 unshift(@match,$leftside{$altsource}{$match[0]}) if defined($altsource);
293 33 50       102 if (!defined($match[0])) {
294 0         0 $ERROR = 'Internal error, left identifier was not defined';
295 0         0 return;
296             }
297             }
298             # middle building to right
299 7         22 while($match[-1] ne $end) {
300 4         10 push(@match,$rightside{$mainsource}{$match[-1]});
301 4 50       12 push(@match,$rightside{$altsource}{$match[-1]}) if defined($altsource);
302 4 50       16 if (!defined($match[-1])) {
303 0         0 $ERROR = 'Internal error, right identifier was not defined';
304 0         0 return;
305             }
306             }
307 7 100       107 return wantarray ? @match : \@match;
308             }
309 38 100 100     170 if ($leftcount == 0 || $rightcount == 0) {
310 3         8 last CHAINLOOP;
311             }
312 35         57 redo CHAINLOOP;
313             }
314              
315 3 50       34 return wantarray ? () : [];
316             }
317              
318             =head2 error
319              
320             Returns the current value of C<$Algorithm::SixDegrees::ERROR>. See
321             L.
322              
323             =cut
324              
325             sub error {
326 20     20 1 104 return $ERROR;
327             }
328              
329             sub _match_two {
330 6     6   13 my ($self,$side,$mainsource,$altsource,$thisside,$thatside) = @_;
331             # Assume $self is OK since this is an internal function
332 6         15 my ($count,$id,$err) = $self->_match($side,$mainsource,$altsource,$thisside,$thatside);
333 6 50       16 return (undef,undef,$err) if defined($err);
334 6 50       15 return ($count,$id,$err) if defined($id);
335 6 50 33     41 return (0,undef,undef) if !defined($count) || $count == 0;
336             # mental note: this should never return an id
337             # after all, you can't have two mains together in a true
338             # alternating chain
339 6         15 return $self->_match($side,$altsource,$mainsource,$thisside,$thatside);
340             }
341              
342             sub _match_one {
343 39     39   65 my ($self,$side,$source,$thisside,$thatside) = @_;
344             # Assume $self is OK since this is an internal function
345 39         81 return $self->_match($side,$source,$source,$thisside,$thatside);
346             }
347              
348             sub _match {
349 75     75   122 my ($self,$side,$fromsource,$tosource,$thisside,$thatside) = @_;
350             # Assume $self is OK since this is an internal function
351 75 50       301 return (undef,undef,'Internal error: missing code') unless isa($self->{"_source_$side"}{$fromsource}{'sub'},'CODE');
352 75 50       214 return (undef,undef,'Internal error: missing side (1)') unless isa($thisside,'HASH');
353 75 50       631 return (undef,undef,'Internal error: missing side (2)') unless exists($thisside->{$fromsource});
354 75 50       194 return (undef,undef,'Internal error: missing side (3)') unless isa($thatside,'HASH');
355 75 50       621 return (undef,undef,'Internal error: missing side (4)') unless exists($thatside->{$tosource});
356              
357 75         89 my $newsidecount = 0;
358 75         79 foreach my $id (keys %{$thisside->{$fromsource}}) {
  75         301  
359 212 100       1078 next if exists($self->{"_investigated"}{$fromsource}{$id});
360 80         161 $self->{"_investigated"}{$fromsource}{$id} = 1;
361              
362 80 50       292 my $use_args = isa($self->{"_source_$side"}{$fromsource}{'args'},'ARRAY') ? 1 : 0;
363              
364 80 50       150 my @ids = &{$self->{"_source_$side"}{$fromsource}{'sub'}}($id,($use_args?@{$self->{"_source_$side"}{$fromsource}{'args'}}:()));
  80         337  
  80         196  
365 80 50 66     1943 return (undef,undef,$ERROR) if scalar(@ids) == 1 && !defined($ids[0]);
366 80         190 foreach my $thisid (@ids) {
367 140 100       356 unless (exists($thisside->{$tosource}{$thisid})) {
368 87         180 $thisside->{$tosource}{$thisid} = $id;
369 87         108 $newsidecount++;
370             }
371 140 100       611 return (0,$thisid,undef) if exists($thatside->{$tosource}{$thisid});
372             }
373             }
374              
375 64         221 return $newsidecount;
376             }
377              
378             =head1 SUBROUTINE RULES
379              
380             Passed-in subroutines should take at least one argument, which
381             should be some form of unique identifier, and return a list of
382             unique identifiers that have a relation to the argument.
383              
384             The unique identifiers must be able to be compared with C.
385              
386             The identifiers should be unique in datatype; that is, in an
387             actor/movie relationship, "Kevin Bacon" can be both the name of an
388             actor and a movie.
389              
390             A linked data type must return identifiers that relate across the
391             link; that is, for an actor/movie relationship, an actor subroutine
392             should return movies, and a movie subroutine should return actors.
393              
394             Additional arguments can be provided; these will be stored in the
395             object and passed through as the second and further arguments to
396             the subroutine. This may be useful, for example, if you're using
397             some form of results caching and need to pass a Cd handle
398             around.
399              
400             If you return explicit undef, please set C<$Algorithm::SixDegrees::ERROR>
401             with an error code. Explicit undef means that an error occurred
402             that should terminate the search; it should be returned as a
403             one-element list.
404              
405             =head1 AUTHOR
406              
407             Pete Krawczyk, C<< >>
408              
409             =head1 BUGS
410              
411             Please report any bugs or feature requests to
412             C, or through the web interface at
413             L. I will be notified, and then you'll automatically
414             be notified of progress on your bug as I make changes.
415              
416             =head1 ACKNOWLEDGEMENTS
417              
418             Andy Lester and Ricardo Signes wrote Module::Starter, which helped
419             get the framework up and running fairly quickly.
420              
421             Brad Fitzpatrick of L for giving me access
422             to a LiveJournal interface to determine linking information on that
423             site, which enabled me to write the algorithm that has been reduced
424             into this module.
425              
426             =head1 COPYRIGHT & LICENSE
427              
428             Copyright 2005 Pete Krawczyk, All Rights Reserved.
429              
430             This program is free software; you can redistribute it and/or modify it
431             under the same terms as Perl itself.
432              
433             =cut
434              
435             1; # End of Algorithm::SixDegrees