File Coverage

blib/lib/Algorithm/SixDegrees.pm
Criterion Covered Total %
statement 169 189 89.4
branch 96 124 77.4
condition 35 54 64.8
subroutine 12 12 100.0
pod 6 6 100.0
total 318 385 82.6


line stmt bran cond sub pod time code
1             package Algorithm::SixDegrees;
2              
3             require 5.006;
4 6     6   286650 use warnings;
  6         45  
  6         172  
5 6     6   28 use strict;
  6         9  
  6         115  
6 6     6   23 use Scalar::Util qw(reftype);
  6         9  
  6         9823  
7              
8             =head1 NAME
9              
10             Algorithm::SixDegrees - Find a path through linked elements in a set
11              
12             =head1 VERSION
13              
14             Version 1.00
15              
16             =cut
17              
18             our $VERSION = '1.00';
19             our $ERROR = '';
20              
21             =head1 SYNOPSIS
22              
23             use Algorithm::SixDegrees;
24              
25             my $sd1 = Algorithm::SixDegrees->new();
26             $sd1->data_source( actors => \&starred_in );
27             $sd1->data_source( movies => \&stars_of );
28             @elems = $sd1->make_link('actors', 'Tom Cruise', 'Kevin Bacon');
29              
30             my $sd2 = Algorithm::SixDegrees->new();
31             $sd2->forward_data_source( friends => \&friends, @args );
32             $sd2->reverse_data_source( friends => \&friend_of, @args );
33             @elems = $sd2->make_link('friends', 'Bob', 'Mark');
34              
35             =head1 DESCRIPTION
36              
37             C is a Perl implementation of a breadth-first
38             search through a set of linked elements in order to find the shortest
39             possible chain linking two specific elements together.
40              
41             In simpler terms, this module will take a bunch of related items and
42             attempt to find a relationship between two of them. It looks for the
43             shortest (and generally, simplest) relationship it can find.
44              
45             =head1 CONSTRUCTOR
46              
47             =head2 new()
48              
49             C requires use as an object; it can't (yet) be used
50             as a stand-alone module. C takes no arguments, however.
51              
52             =cut
53              
54             sub new {
55 10     10 1 315 my $class = shift;
56 10         41 my $self = {
57             _source_left => {},
58             _source_right => {},
59             _sources => [],
60             _investigated => {},
61             };
62 10         39 return bless $self,$class;
63             }
64              
65             =head1 FUNCTIONS
66              
67             =head2 forward_data_source( name => \&sub, @args );
68              
69             Tells C that all items in the data set relating to
70             C can be retrieved by calling C. See L.
71              
72             In our friends example above, if Bob considers Mark a friend, but Mark
73             doesn't consider Bob a friend, calling the sub with "Bob" as an argument
74             should return "Mark", but calling the sub with "Mark" as an argument
75             should not return "Bob".
76              
77             =cut
78              
79             sub forward_data_source {
80 23     23 1 2943 my ($self, $name, $sub, @args) = @_;
81 23 100       83 die "Data sources must be named\n" unless defined($name);
82 19 100       45 die "Data sources must have code supplied\n" unless defined($sub);
83 17 100 100     114 die "Data sources must have a coderef argument\n" unless ref($sub) && reftype($sub) eq 'CODE';
84 13         42 $self->{'_source_left'}{$name}{'sub'} = $sub;
85 13         23 $self->{'_source_left'}{$name}{'args'} = \@args;
86 13         19 foreach my $source (@{$self->{'_sources'}}) {
  13         26  
87 6 100       15 return if $source eq $name;
88             }
89 12         18 push(@{$self->{'_sources'}},$name);
  12         29  
90 12         19 return;
91             }
92              
93             =head2 reverse_data_source( name => \&sub, @args );
94              
95             Tells C that all items in the data set related to
96             by C can be retrieved by calling C. See L.
97              
98             In the same friends example, calling the sub with "Bob" as an argument
99             should not return "Mark", but calling the sub with "Mark" as an argument
100             should return "Bob".
101              
102             =cut
103              
104             sub reverse_data_source {
105 18     18 1 2752 my ($self, $name, $sub, @args) = @_;
106 18 100       86 die "Data sources must be named\n" unless defined($name);
107 16 100       48 die "Data sources must have code supplied\n" unless defined($sub);
108 15 100 100     88 die "Data sources must have a coderef argument\n" unless ref($sub) && reftype($sub) eq 'CODE';
109 13         31 $self->{'_source_right'}{$name}{'sub'} = $sub;
110 13         23 $self->{'_source_right'}{$name}{'args'} = \@args;
111 13         20 foreach my $source (@{$self->{'_sources'}}) {
  13         76  
112 17 100       44 return if $source eq $name;
113             }
114 1         2 push(@{$self->{'_sources'}},$name);
  1         3  
115 1         2 return;
116             }
117              
118             =head2 data_source( name => \&sub, @args );
119              
120             Sets up a data source as both forward and reverse. This is useful if
121             the data source is mutually relational; that is, in our actors/movies
122             example, Kevin Bacon is always in Mystic River, and Mystic River always
123             has Kevin Bacon in it.
124              
125             =cut
126              
127             sub data_source {
128 15     15 1 3854 my ($self, $name, $sub, @args) = @_;
129 15         44 $self->forward_data_source($name,$sub,@args);
130 10         26 $self->reverse_data_source($name,$sub,@args);
131 10         19 return;
132             }
133              
134             =head2 make_link
135              
136             Does the work of making the link. Returns a list or arrayref, based
137             on calling context.
138              
139             =cut
140              
141             sub make_link {
142 35     35 1 525 my ($self, $mainsource, $start, $end) = @_;
143 35         47 $ERROR = undef;
144              
145 35 100 66     174 unless (ref($self) && $self->isa(__PACKAGE__)) {
146 2         4 $ERROR = 'Invalid object reference used to call make_link';
147 2         8 return;
148             }
149 33 100       71 unless (defined($mainsource)) {
150 1         6 $ERROR = 'Data set name is not defined';
151 1         3 return;
152             }
153 32 100       53 unless (defined($start)) {
154 1         2 $ERROR = 'Starting identifier is not defined';
155 1         4 return;
156             }
157 31 100       54 unless (defined($end)) {
158 1         2 $ERROR = 'Ending identifier is not defined';
159 1         4 return;
160             }
161              
162             # Assume working from "left to right"; therefore, links leading
163             # from the starting identifier are on the "left side", and links
164             # leading to the ending identifier are on the "right side".
165 30         37 my %leftside = ();
166 30         36 my %rightside = ();
167              
168             # If $altsource gets defined, that means there are two sources used.
169 30         37 my $altsource;
170              
171 30 100 100     138 unless (exists($self->{'_sources'}) && reftype($self->{'_sources'}) eq 'ARRAY') {
172 2         3 $ERROR = 'No data sources defined';
173 2         8 return;
174             }
175 28         45 my @sources = @{$self->{'_sources'}};
  28         59  
176 28         48 my $source_exists = 0;
177 28         40 foreach my $source (@sources) {
178 39 100       80 if ($mainsource eq $source) {
179 26         35 $source_exists = 1;
180 26         55 $leftside{$source} = {$start, undef};
181 26         54 $rightside{$source} = {$end, undef};
182             } else {
183 13         19 $altsource = $source;
184 13         22 $leftside{$source} = {};
185 13         23 $rightside{$source} = {};
186             }
187 39 100 100     194 unless (ref($self->{'_source_left'}) &&
      66        
188             ref($self->{'_source_left'}{$source}) &&
189             reftype($self->{'_source_left'}{$source}{'sub'}) eq 'CODE') {
190 2         5 $ERROR = "Source '$source' does not have a valid forward subroutine";
191 2         11 return;
192             }
193 37 100 100     180 unless (ref($self->{'_source_right'}) &&
      66        
194             ref($self->{'_source_right'}{$source}) &&
195             reftype($self->{'_source_right'}{$source}{'sub'}) eq 'CODE') {
196 2         5 $ERROR = "Source '$source' does not have a valid reverse subroutine";
197 2         10 return;
198             }
199 35         76 $self->{'_investigated'}{$source} = {};
200             }
201 24 100       49 unless ($source_exists) {
202 2         5 $ERROR = "Source '$mainsource' was not defined";
203 2         9 return;
204             }
205 22 100       55 if (scalar(keys(%leftside)) > 2) {
206 1         2 $ERROR = 'Too many defined data sources; maximum is 2';
207 1         6 return;
208             }
209              
210              
211 21 100       38 if ($start eq $end) {
212             # Only one element if the start and end are the same.
213 5 100       35 return wantarray ? ($start) : [$start];
214             }
215              
216 16         20 my $leftcount = 1;
217 16         20 my $rightcount = 1;
218              
219             # If altsource exists, pull the left side main, then pull the right side main,
220             # and check for middle matches. This reduces database hits as opposed to
221             # where it's pulled left main - left alt; left alt >= 1 at that point, whereas
222             # right main on the first loop == 1. Following that, pull the left alt and
223             # then the right alt, which gets the CHAINLOOP back in synch.
224              
225 16 100       31 if (defined($altsource)) {
226 8         22 my ($count,$id,$err) = $self->_match('left',$mainsource,$altsource,\%leftside,\%rightside);
227 8 50       17 if (defined($err)) { $ERROR = $err; return; };
  0         0  
  0         0  
228 8 50       15 if (defined($id)) { $ERROR = 'Internal error, id cannot match here'; return; };
  0         0  
  0         0  
229 8 50 33     31 return if !defined($count) || $count == 0;
230              
231 8         25 ($count,$id,$err) = $self->_match('right',$mainsource,$altsource,\%rightside,\%leftside);
232 8 50       15 if (defined($err)) { $ERROR = $err; return; };
  0         0  
  0         0  
233 8 100       17 if (defined($id)) {
234 2         6 my @abc = ($leftside{$altsource}{$id},$id,$rightside{$altsource}{$id});
235 2 50       19 return wantarray ? @abc : \@abc;
236             };
237 6 100 66     34 return if !defined($count) || $count == 0;
238              
239 4         10 ($leftcount,$id,$err) = $self->_match('left',$altsource,$mainsource,\%leftside,\%rightside);
240 4 50       9 if (defined($err)) { $ERROR = $err; return; };
  0         0  
  0         0  
241 4 50       7 if (defined($id)) { $ERROR = 'Internal error, id cannot match here'; return; };
  0         0  
  0         0  
242 4 50 33     14 return if !defined($leftcount) || $leftcount == 0;
243              
244 4         9 ($rightcount,$id,$err) = $self->_match('right',$altsource,$mainsource,\%rightside,\%leftside);
245 4 50       9 if (defined($err)) { $ERROR = $err; return; };
  0         0  
  0         0  
246 4 100       19 if (defined($id)) {
247 2         4 my $la = $leftside{$mainsource}{$id};
248 2         5 my $lm = $leftside{$altsource}{$la};
249 2         4 my $ra = $rightside{$mainsource}{$id};
250 2         14 my $rm = $rightside{$altsource}{$ra};
251 2 50 33     26 unless (defined($la) && defined($lm) && defined($ra) && defined($rm)) {
      33        
      33        
252 0         0 $ERROR = 'Internal error, identifier not defined';
253 0         0 return;
254             }
255 2 50       25 return wantarray ? ($lm,$la,$id,$ra,$rm) : [$lm,$la,$id,$ra,$rm];
256             };
257 2 50 33     10 return if !defined($rightcount) || $rightcount == 0;
258              
259             }
260              
261             # There is bias here, but the tie needs to be broken, so in the
262             # event of a tie, move left to right in the chain.
263              
264             CHAINLOOP: {
265 10         20 my $id;
  45         58  
266             my $err;
267 45 100       77 if ($leftcount <= $rightcount) {
268 42 100       58 if (defined($altsource)) {
269 4         9 ($leftcount,$id,$err) = $self->_match_two('left',$mainsource,$altsource,\%leftside,\%rightside);
270             } else {
271 38         70 ($leftcount,$id,$err) = $self->_match_one('left',$mainsource,\%leftside,\%rightside);
272             }
273             } else {
274 3 100       35 if (defined($altsource)) {
275 2         9 ($rightcount,$id,$err) = $self->_match_two('right',$mainsource,$altsource,\%rightside,\%leftside);
276             } else {
277 1         7 ($rightcount,$id,$err) = $self->_match_one('right',$mainsource,\%rightside,\%leftside);
278             }
279             }
280 45 50       91 if(defined($err)) {
281 0         0 $ERROR = $err;
282 0         0 return;
283             }
284 45 100       68 if(defined($id)) {
285             # If _match returns an id, that means a match was found.
286             # To get it, we simply have to trace out from the "middle"
287             # to get the full link.
288 7         12 my @match = ($id);
289             # middle, building to left.
290 7         17 while($match[0] ne $start) {
291 33         74 unshift(@match,$leftside{$mainsource}{$match[0]});
292 33 100       56 unshift(@match,$leftside{$altsource}{$match[0]}) if defined($altsource);
293 33 50       60 if (!defined($match[0])) {
294 0         0 $ERROR = 'Internal error, left identifier was not defined';
295 0         0 return;
296             }
297             }
298             # middle building to right
299 7         15 while($match[-1] ne $end) {
300 4         8 push(@match,$rightside{$mainsource}{$match[-1]});
301 4 50       9 push(@match,$rightside{$altsource}{$match[-1]}) if defined($altsource);
302 4 50       12 if (!defined($match[-1])) {
303 0         0 $ERROR = 'Internal error, right identifier was not defined';
304 0         0 return;
305             }
306             }
307 7 100       70 return wantarray ? @match : \@match;
308             }
309 38 100 100     105 if ($leftcount == 0 || $rightcount == 0) {
310 3         6 last CHAINLOOP;
311             }
312 35         54 redo CHAINLOOP;
313             }
314              
315 3 50       306 return wantarray ? () : [];
316             }
317              
318             =head2 error
319              
320             Returns the current value of C<$Algorithm::SixDegrees::ERROR>. See
321             L.
322              
323             =cut
324              
325             sub error {
326 20     20 1 97 return $ERROR;
327             }
328              
329             sub _match_two {
330 6     6   12 my ($self,$side,$mainsource,$altsource,$thisside,$thatside) = @_;
331             # Assume $self is OK since this is an internal function
332 6         12 my ($count,$id,$err) = $self->_match($side,$mainsource,$altsource,$thisside,$thatside);
333 6 50       29 return (undef,undef,$err) if defined($err);
334 6 50       13 return ($count,$id,$err) if defined($id);
335 6 50 33     31 return (0,undef,undef) if !defined($count) || $count == 0;
336             # mental note: this should never return an id
337             # after all, you can't have two mains together in a true
338             # alternating chain
339 6         41 return $self->_match($side,$altsource,$mainsource,$thisside,$thatside);
340             }
341              
342             sub _match_one {
343 39     39   61 my ($self,$side,$source,$thisside,$thatside) = @_;
344             # Assume $self is OK since this is an internal function
345 39         83 return $self->_match($side,$source,$source,$thisside,$thatside);
346             }
347              
348             sub _match {
349 75     75   122 my ($self,$side,$fromsource,$tosource,$thisside,$thatside) = @_;
350             # Assume $self is OK since this is an internal function
351 75 50       247 return (undef,undef,'Internal error: missing code') unless reftype($self->{"_source_$side"}{$fromsource}{'sub'}) eq 'CODE';
352 75 50       145 return (undef,undef,'Internal error: missing side (1)') unless reftype($thisside) eq 'HASH';
353 75 50       122 return (undef,undef,'Internal error: missing side (2)') unless exists($thisside->{$fromsource});
354 75 50       133 return (undef,undef,'Internal error: missing side (3)') unless reftype($thatside) eq 'HASH';
355 75 50       120 return (undef,undef,'Internal error: missing side (4)') unless exists($thatside->{$tosource});
356              
357 75         86 my $newsidecount = 0;
358 75         86 foreach my $id (keys %{$thisside->{$fromsource}}) {
  75         178  
359 200 100       328 next if exists($self->{"_investigated"}{$fromsource}{$id});
360 80         129 $self->{"_investigated"}{$fromsource}{$id} = 1;
361              
362 80 50       201 my $use_args = reftype($self->{"_source_$side"}{$fromsource}{'args'}) eq 'ARRAY' ? 1 : 0;
363              
364 80 50       116 my @ids = &{$self->{"_source_$side"}{$fromsource}{'sub'}}($id,($use_args?@{$self->{"_source_$side"}{$fromsource}{'args'}}:()));
  80         157  
  80         145  
365 80 50 66     1257 return (undef,undef,$ERROR) if scalar(@ids) == 1 && !defined($ids[0]);
366 80         107 foreach my $thisid (@ids) {
367 140 100       224 unless (exists($thisside->{$tosource}{$thisid})) {
368 87         155 $thisside->{$tosource}{$thisid} = $id;
369 87         102 $newsidecount++;
370             }
371 140 100       270 return (0,$thisid,undef) if exists($thatside->{$tosource}{$thisid});
372             }
373             }
374              
375 64         131 return $newsidecount;
376             }
377              
378             =head1 SUBROUTINE RULES
379              
380             Passed-in subroutines should take at least one argument, which
381             should be some form of unique identifier, and return a list of
382             unique identifiers that have a relation to the argument.
383              
384             The unique identifiers must be able to be compared with C.
385              
386             The identifiers should be unique in datatype; that is, in an
387             actor/movie relationship, "Kevin Bacon" can be both the name of an
388             actor and a movie.
389              
390             A linked data type must return identifiers that relate across the
391             link; that is, for an actor/movie relationship, an actor subroutine
392             should return movies, and a movie subroutine should return actors.
393              
394             Additional arguments can be provided; these will be stored in the
395             object and passed through as the second and further arguments to
396             the subroutine. This may be useful, for example, if you're using
397             some form of results caching and need to pass a Cd handle
398             around.
399              
400             If you return explicit undef, please set C<$Algorithm::SixDegrees::ERROR>
401             with an error code. Explicit undef means that an error occurred
402             that should terminate the search; it should be returned as a
403             one-element list.
404              
405             =head1 AUTHOR
406              
407             Pete Krawczyk, C<< >>
408              
409             =head1 BUGS
410              
411             Please report any bugs or feature requests to
412             C, or through the web interface at
413             L. I will be notified, and then you'll automatically
414             be notified of progress on your bug as I make changes.
415              
416             =head1 ACKNOWLEDGEMENTS
417              
418             Andy Lester and Ricardo Signes wrote Module::Starter, which helped
419             get the framework up and running fairly quickly.
420              
421             Brad Fitzpatrick of L for giving me access
422             to a LiveJournal interface to determine linking information on that
423             site, which enabled me to write the algorithm that has been reduced
424             into this module.
425              
426             =head1 COPYRIGHT & LICENSE
427              
428             Copyright 2005 Pete Krawczyk, All Rights Reserved.
429              
430             This program is free software; you can redistribute it and/or modify it
431             under the same terms as Perl itself.
432              
433             =cut
434              
435             1; # End of Algorithm::SixDegrees