File Coverage

blib/lib/Algorithm/SixDegrees.pm
Criterion Covered Total %
statement 169 189 89.4
branch 96 124 77.4
condition 35 54 64.8
subroutine 12 12 100.0
pod 6 6 100.0
total 318 385 82.6


line stmt bran cond sub pod time code
1             package Algorithm::SixDegrees;
2              
3             require 5.006;
4 6     6   356972 use warnings;
  6         51  
  6         304  
5 6     6   35 use strict;
  6         16  
  6         149  
6 6     6   30 use Scalar::Util qw(reftype);
  6         9  
  6         12727  
7              
8             =encoding UTF-8
9              
10             =head1 NAME
11              
12             Algorithm::SixDegrees - Find a path through linked elements in a set
13              
14             =head1 VERSION
15              
16             Version 1.01
17              
18             =cut
19              
20             our $VERSION = '1.01';
21             our $ERROR = '';
22              
23             =head1 SYNOPSIS
24              
25             use Algorithm::SixDegrees;
26              
27             my $sd1 = Algorithm::SixDegrees->new();
28             $sd1->data_source( actors => \&starred_in );
29             $sd1->data_source( movies => \&stars_of );
30             @elems = $sd1->make_link('actors', 'Tom Cruise', 'Kevin Bacon');
31              
32             my $sd2 = Algorithm::SixDegrees->new();
33             $sd2->forward_data_source( friends => \&friends, @args );
34             $sd2->reverse_data_source( friends => \&friend_of, @args );
35             @elems = $sd2->make_link('friends', 'Bob', 'Mark');
36              
37             =head1 DESCRIPTION
38              
39             C is a Perl implementation of a breadth-first
40             search through a set of linked elements in order to find the shortest
41             possible chain linking two specific elements together.
42              
43             In simpler terms, this module will take a bunch of related items and
44             attempt to find a relationship between two of them. It looks for the
45             shortest (and generally, simplest) relationship it can find.
46              
47             =head1 CONSTRUCTOR
48              
49             =head2 new()
50              
51             C requires use as an object; it can't (yet) be used
52             as a stand-alone module. C takes no arguments, however.
53              
54             =cut
55              
56             sub new {
57 10     10 1 407 my $class = shift;
58 10         47 my $self = {
59             _source_left => {},
60             _source_right => {},
61             _sources => [],
62             _investigated => {},
63             };
64 10         45 return bless $self,$class;
65             }
66              
67             =head1 FUNCTIONS
68              
69             =head2 forward_data_source( name => \&sub, @args );
70              
71             Tells C that all items in the data set relating to
72             C can be retrieved by calling C. See L.
73              
74             In our friends example above, if Bob considers Mark a friend, but Mark
75             doesn't consider Bob a friend, calling the sub with "Bob" as an argument
76             should return "Mark", but calling the sub with "Mark" as an argument
77             should not return "Bob".
78              
79             =cut
80              
81             sub forward_data_source {
82 23     23 1 3659 my ($self, $name, $sub, @args) = @_;
83 23 100       147 die "Data sources must be named\n" unless defined($name);
84 19 100       61 die "Data sources must have code supplied\n" unless defined($sub);
85 17 100 100     143 die "Data sources must have a coderef argument\n" unless ref($sub) && reftype($sub) eq 'CODE';
86 13         49 $self->{'_source_left'}{$name}{'sub'} = $sub;
87 13         33 $self->{'_source_left'}{$name}{'args'} = \@args;
88 13         21 foreach my $source (@{$self->{'_sources'}}) {
  13         35  
89 6 100       19 return if $source eq $name;
90             }
91 12         19 push(@{$self->{'_sources'}},$name);
  12         47  
92 12         35 return;
93             }
94              
95             =head2 reverse_data_source( name => \&sub, @args );
96              
97             Tells C that all items in the data set related to
98             by C can be retrieved by calling C. See L.
99              
100             In the same friends example, calling the sub with "Bob" as an argument
101             should not return "Mark", but calling the sub with "Mark" as an argument
102             should return "Bob".
103              
104             =cut
105              
106             sub reverse_data_source {
107 18     18 1 3392 my ($self, $name, $sub, @args) = @_;
108 18 100       66 die "Data sources must be named\n" unless defined($name);
109 16 100       45 die "Data sources must have code supplied\n" unless defined($sub);
110 15 100 100     100 die "Data sources must have a coderef argument\n" unless ref($sub) && reftype($sub) eq 'CODE';
111 13         39 $self->{'_source_right'}{$name}{'sub'} = $sub;
112 13         26 $self->{'_source_right'}{$name}{'args'} = \@args;
113 13         23 foreach my $source (@{$self->{'_sources'}}) {
  13         90  
114 17 100       55 return if $source eq $name;
115             }
116 1         3 push(@{$self->{'_sources'}},$name);
  1         3  
117 1         3 return;
118             }
119              
120             =head2 data_source( name => \&sub, @args );
121              
122             Sets up a data source as both forward and reverse. This is useful if
123             the data source is mutually relational; that is, in our actors/movies
124             example, Kevin Bacon is always in Mystic River, and Mystic River always
125             has Kevin Bacon in it.
126              
127             =cut
128              
129             sub data_source {
130 15     15 1 4969 my ($self, $name, $sub, @args) = @_;
131 15         52 $self->forward_data_source($name,$sub,@args);
132 10         128 $self->reverse_data_source($name,$sub,@args);
133 10         24 return;
134             }
135              
136             =head2 make_link
137              
138             Does the work of making the link. Returns a list or arrayref, based
139             on calling context.
140              
141             =cut
142              
143             sub make_link {
144 35     35 1 744 my ($self, $mainsource, $start, $end) = @_;
145 35         64 $ERROR = undef;
146              
147 35 100 66     208 unless (ref($self) && $self->isa(__PACKAGE__)) {
148 2         4 $ERROR = 'Invalid object reference used to call make_link';
149 2         11 return;
150             }
151 33 100       88 unless (defined($mainsource)) {
152 1         6 $ERROR = 'Data set name is not defined';
153 1         5 return;
154             }
155 32 100       65 unless (defined($start)) {
156 1         3 $ERROR = 'Starting identifier is not defined';
157 1         4 return;
158             }
159 31 100       62 unless (defined($end)) {
160 1         2 $ERROR = 'Ending identifier is not defined';
161 1         5 return;
162             }
163              
164             # Assume working from "left to right"; therefore, links leading
165             # from the starting identifier are on the "left side", and links
166             # leading to the ending identifier are on the "right side".
167 30         50 my %leftside = ();
168 30         56 my %rightside = ();
169              
170             # If $altsource gets defined, that means there are two sources used.
171 30         43 my $altsource;
172              
173 30 100 100     178 unless (exists($self->{'_sources'}) && reftype($self->{'_sources'}) eq 'ARRAY') {
174 2         5 $ERROR = 'No data sources defined';
175 2         9 return;
176             }
177 28         45 my @sources = @{$self->{'_sources'}};
  28         76  
178 28         54 my $source_exists = 0;
179 28         55 foreach my $source (@sources) {
180 39 100       92 if ($mainsource eq $source) {
181 26         41 $source_exists = 1;
182 26         69 $leftside{$source} = {$start, undef};
183 26         62 $rightside{$source} = {$end, undef};
184             } else {
185 13         22 $altsource = $source;
186 13         27 $leftside{$source} = {};
187 13         26 $rightside{$source} = {};
188             }
189 39 100 100     235 unless (ref($self->{'_source_left'}) &&
      66        
190             ref($self->{'_source_left'}{$source}) &&
191             reftype($self->{'_source_left'}{$source}{'sub'}) eq 'CODE') {
192 2         6 $ERROR = "Source '$source' does not have a valid forward subroutine";
193 2         12 return;
194             }
195 37 100 100     286 unless (ref($self->{'_source_right'}) &&
      66        
196             ref($self->{'_source_right'}{$source}) &&
197             reftype($self->{'_source_right'}{$source}{'sub'}) eq 'CODE') {
198 2         6 $ERROR = "Source '$source' does not have a valid reverse subroutine";
199 2         13 return;
200             }
201 35         104 $self->{'_investigated'}{$source} = {};
202             }
203 24 100       55 unless ($source_exists) {
204 2         6 $ERROR = "Source '$mainsource' was not defined";
205 2         12 return;
206             }
207 22 100       61 if (scalar(keys(%leftside)) > 2) {
208 1         2 $ERROR = 'Too many defined data sources; maximum is 2';
209 1         7 return;
210             }
211              
212              
213 21 100       47 if ($start eq $end) {
214             # Only one element if the start and end are the same.
215 5 100       47 return wantarray ? ($start) : [$start];
216             }
217              
218 16         25 my $leftcount = 1;
219 16         25 my $rightcount = 1;
220              
221             # If altsource exists, pull the left side main, then pull the right side main,
222             # and check for middle matches. This reduces database hits as opposed to
223             # where it's pulled left main - left alt; left alt >= 1 at that point, whereas
224             # right main on the first loop == 1. Following that, pull the left alt and
225             # then the right alt, which gets the CHAINLOOP back in synch.
226              
227 16 100       36 if (defined($altsource)) {
228 8         33 my ($count,$id,$err) = $self->_match('left',$mainsource,$altsource,\%leftside,\%rightside);
229 8 50       21 if (defined($err)) { $ERROR = $err; return; };
  0         0  
  0         0  
230 8 50       16 if (defined($id)) { $ERROR = 'Internal error, id cannot match here'; return; };
  0         0  
  0         0  
231 8 50 33     30 return if !defined($count) || $count == 0;
232              
233 8         24 ($count,$id,$err) = $self->_match('right',$mainsource,$altsource,\%rightside,\%leftside);
234 8 50       22 if (defined($err)) { $ERROR = $err; return; };
  0         0  
  0         0  
235 8 100       18 if (defined($id)) {
236 2         8 my @abc = ($leftside{$altsource}{$id},$id,$rightside{$altsource}{$id});
237 2 50       24 return wantarray ? @abc : \@abc;
238             };
239 6 100 66     44 return if !defined($count) || $count == 0;
240              
241 4         13 ($leftcount,$id,$err) = $self->_match('left',$altsource,$mainsource,\%leftside,\%rightside);
242 4 50       12 if (defined($err)) { $ERROR = $err; return; };
  0         0  
  0         0  
243 4 50       20 if (defined($id)) { $ERROR = 'Internal error, id cannot match here'; return; };
  0         0  
  0         0  
244 4 50 33     20 return if !defined($leftcount) || $leftcount == 0;
245              
246 4         24 ($rightcount,$id,$err) = $self->_match('right',$altsource,$mainsource,\%rightside,\%leftside);
247 4 50       12 if (defined($err)) { $ERROR = $err; return; };
  0         0  
  0         0  
248 4 100       12 if (defined($id)) {
249 2         4 my $la = $leftside{$mainsource}{$id};
250 2         13 my $lm = $leftside{$altsource}{$la};
251 2         6 my $ra = $rightside{$mainsource}{$id};
252 2         5 my $rm = $rightside{$altsource}{$ra};
253 2 50 33     31 unless (defined($la) && defined($lm) && defined($ra) && defined($rm)) {
      33        
      33        
254 0         0 $ERROR = 'Internal error, identifier not defined';
255 0         0 return;
256             }
257 2 50       29 return wantarray ? ($lm,$la,$id,$ra,$rm) : [$lm,$la,$id,$ra,$rm];
258             };
259 2 50 33     24 return if !defined($rightcount) || $rightcount == 0;
260              
261             }
262              
263             # There is bias here, but the tie needs to be broken, so in the
264             # event of a tie, move left to right in the chain.
265              
266             CHAINLOOP: {
267 10         17 my $id;
  45         72  
268             my $err;
269 45 100       88 if ($leftcount <= $rightcount) {
270 42 100       68 if (defined($altsource)) {
271 4         17 ($leftcount,$id,$err) = $self->_match_two('left',$mainsource,$altsource,\%leftside,\%rightside);
272             } else {
273 38         81 ($leftcount,$id,$err) = $self->_match_one('left',$mainsource,\%leftside,\%rightside);
274             }
275             } else {
276 3 100       29 if (defined($altsource)) {
277 2         13 ($rightcount,$id,$err) = $self->_match_two('right',$mainsource,$altsource,\%rightside,\%leftside);
278             } else {
279 1         7 ($rightcount,$id,$err) = $self->_match_one('right',$mainsource,\%rightside,\%leftside);
280             }
281             }
282 45 50       106 if(defined($err)) {
283 0         0 $ERROR = $err;
284 0         0 return;
285             }
286 45 100       86 if(defined($id)) {
287             # If _match returns an id, that means a match was found.
288             # To get it, we simply have to trace out from the "middle"
289             # to get the full link.
290 7         17 my @match = ($id);
291             # middle, building to left.
292 7         20 while($match[0] ne $start) {
293 33         69 unshift(@match,$leftside{$mainsource}{$match[0]});
294 33 100       66 unshift(@match,$leftside{$altsource}{$match[0]}) if defined($altsource);
295 33 50       79 if (!defined($match[0])) {
296 0         0 $ERROR = 'Internal error, left identifier was not defined';
297 0         0 return;
298             }
299             }
300             # middle building to right
301 7         19 while($match[-1] ne $end) {
302 4         10 push(@match,$rightside{$mainsource}{$match[-1]});
303 4 50       12 push(@match,$rightside{$altsource}{$match[-1]}) if defined($altsource);
304 4 50       14 if (!defined($match[-1])) {
305 0         0 $ERROR = 'Internal error, right identifier was not defined';
306 0         0 return;
307             }
308             }
309 7 100       87 return wantarray ? @match : \@match;
310             }
311 38 100 100     140 if ($leftcount == 0 || $rightcount == 0) {
312 3         9 last CHAINLOOP;
313             }
314 35         73 redo CHAINLOOP;
315             }
316              
317 3 50       32 return wantarray ? () : [];
318             }
319              
320             =head2 error
321              
322             Returns the current value of C<$Algorithm::SixDegrees::ERROR>. See
323             L.
324              
325             =cut
326              
327             sub error {
328 20     20 1 138 return $ERROR;
329             }
330              
331             sub _match_two {
332 6     6   15 my ($self,$side,$mainsource,$altsource,$thisside,$thatside) = @_;
333             # Assume $self is OK since this is an internal function
334 6         15 my ($count,$id,$err) = $self->_match($side,$mainsource,$altsource,$thisside,$thatside);
335 6 50       41 return (undef,undef,$err) if defined($err);
336 6 50       15 return ($count,$id,$err) if defined($id);
337 6 50 33     47 return (0,undef,undef) if !defined($count) || $count == 0;
338             # mental note: this should never return an id
339             # after all, you can't have two mains together in a true
340             # alternating chain
341 6         18 return $self->_match($side,$altsource,$mainsource,$thisside,$thatside);
342             }
343              
344             sub _match_one {
345 39     39   82 my ($self,$side,$source,$thisside,$thatside) = @_;
346             # Assume $self is OK since this is an internal function
347 39         70 return $self->_match($side,$source,$source,$thisside,$thatside);
348             }
349              
350             sub _match {
351 75     75   148 my ($self,$side,$fromsource,$tosource,$thisside,$thatside) = @_;
352             # Assume $self is OK since this is an internal function
353 75 50       273 return (undef,undef,'Internal error: missing code') unless reftype($self->{"_source_$side"}{$fromsource}{'sub'}) eq 'CODE';
354 75 50       196 return (undef,undef,'Internal error: missing side (1)') unless reftype($thisside) eq 'HASH';
355 75 50       161 return (undef,undef,'Internal error: missing side (2)') unless exists($thisside->{$fromsource});
356 75 50       167 return (undef,undef,'Internal error: missing side (3)') unless reftype($thatside) eq 'HASH';
357 75 50       139 return (undef,undef,'Internal error: missing side (4)') unless exists($thatside->{$tosource});
358              
359 75         110 my $newsidecount = 0;
360 75         142 foreach my $id (keys %{$thisside->{$fromsource}}) {
  75         217  
361 202 100       513 next if exists($self->{"_investigated"}{$fromsource}{$id});
362 79         146 $self->{"_investigated"}{$fromsource}{$id} = 1;
363              
364 79 50       242 my $use_args = reftype($self->{"_source_$side"}{$fromsource}{'args'}) eq 'ARRAY' ? 1 : 0;
365              
366 79 50       142 my @ids = &{$self->{"_source_$side"}{$fromsource}{'sub'}}($id,($use_args?@{$self->{"_source_$side"}{$fromsource}{'args'}}:()));
  79         205  
  79         258  
367 79 50 66     1531 return (undef,undef,$ERROR) if scalar(@ids) == 1 && !defined($ids[0]);
368 79         142 foreach my $thisid (@ids) {
369 138 100       284 unless (exists($thisside->{$tosource}{$thisid})) {
370 86         176 $thisside->{$tosource}{$thisid} = $id;
371 86         127 $newsidecount++;
372             }
373 138 100       346 return (0,$thisid,undef) if exists($thatside->{$tosource}{$thisid});
374             }
375             }
376              
377 64         163 return $newsidecount;
378             }
379              
380             =head1 SUBROUTINE RULES
381              
382             Passed-in subroutines should take at least one argument, which
383             should be some form of unique identifier, and return a list of
384             unique identifiers that have a relation to the argument.
385              
386             The unique identifiers must be able to be compared with C.
387              
388             The identifiers should be unique in datatype; that is, in an
389             actor/movie relationship, "Kevin Bacon" can be both the name of an
390             actor and a movie.
391              
392             A linked data type must return identifiers that relate across the
393             link; that is, for an actor/movie relationship, an actor subroutine
394             should return movies, and a movie subroutine should return actors.
395              
396             Additional arguments can be provided; these will be stored in the
397             object and passed through as the second and further arguments to
398             the subroutine. This may be useful, for example, if you're using
399             some form of results caching and need to pass a Cd handle
400             around.
401              
402             If you return explicit undef, please set C<$Algorithm::SixDegrees::ERROR>
403             with an error code. Explicit undef means that an error occurred
404             that should terminate the search; it should be returned as a
405             one-element list.
406              
407             =head1 AUTHOR
408              
409             Peter Krawczyk, C<< >>
410              
411             =head1 BUGS
412              
413             Please report any bugs or feature requests to
414             C, or through the web interface at
415             L. I will be notified, and then you'll automatically
416             be notified of progress on your bug as I make changes.
417              
418             =head1 ACKNOWLEDGEMENTS
419              
420             Thank you to Slaven Rezić and Peter John Acklam for bug reports.
421             Apologies it took so long to see and fix them. (So much for
422             notifications...)
423              
424             Andy Lester and Ricardo Signes wrote Module::Starter, which helped
425             get the framework up and running fairly quickly.
426              
427             Brad Fitzpatrick of L for giving me access
428             to a LiveJournal interface to determine linking information on that
429             site, which enabled me to write the algorithm that has been reduced
430             into this module.
431              
432             =head1 COPYRIGHT & LICENSE
433              
434             Copyright 2005-2020 Peter Krawczyk, All Rights Reserved.
435              
436             This program is free software; you can redistribute it and/or modify it
437             under the same terms as Perl itself.
438              
439             =cut
440              
441             1; # End of Algorithm::SixDegrees