File Coverage

blib/lib/Sport/Analytics/SimpleRanking.pm
Criterion Covered Total %
statement 343 396 86.6
branch 70 120 58.3
condition 20 33 60.6
subroutine 26 26 100.0
pod 17 17 100.0
total 476 592 80.4


line stmt bran cond sub pod time code
1             package Sport::Analytics::SimpleRanking;
2              
3 4     4   2999813 use warnings;
  4         12  
  4         142  
4 4     4   25 use strict;
  4         8  
  4         151  
5 4     4   23 use List::Util qw( max );
  4         14  
  4         470  
6 4     4   27 use Carp;
  4         8  
  4         36856  
7              
8             =head1 NAME
9              
10             Sport::Analytics::SimpleRanking - This module provides a method that calculate Doug Drinen's simple ranking system.
11              
12             =head1 VERSION
13              
14             Version 0.21
15              
16             =cut
17              
18             our $VERSION = '0.21';
19              
20             =head1 SYNOPSIS
21              
22             This module provides a method that calculates Doug Drinen's simple ranking system.
23             It also provides access to some other useful team and season stats.
24              
25             use Sport::Analytics::SimpleRanking;
26             my $stats = Sport::Analytics::SimpleRanking->new();
27             my $games = [
28             "Boston,13,Atlanta, 27",
29             "Dallas,17,Chicago,21",
30             "Eugene,30,Fairbanks,41",
31             "Atlanta,15,Chicago,3",
32             "Eugene,21,Boston,24",
33             "Fairbanks,17,Dallas,7",
34             "Dallas,19,Atlanta,7",
35             "Boston,9,Fairbanks,31",
36             "Chicago,10,Eugene,30",
37             ];
38             $stats->load_data( $games );
39             my $srs = $stats->simpleranking( verbose => 1 );
40             my $mov = $stats->mov;
41             my $sos = $stats->sos;
42             for ( keys %$srs ) {
43             print "Team $_ has a srs of ", $srs->{$_};
44             print " and a mov of ",$mov->{$_},"\n";
45             }
46              
47              
48             =head1 DESCRIPTION
49              
50             The simple ranking system is one based on rates of scoring, generally by starting with team margin of victory (i.e. average point spread). It is perhaps the simplest model of the form
51              
52             Team Strength = a x (Mov) + b x (Opponent Strength)
53              
54             In the simple ranking system, a = 1 and b = 1/(number of opponents played). Matrix solutions of this linear equation tend to be very unstable, whereas an iterative solution rapidly converges to a stable answer. This object implements the iterative solution, and since doing that much work means the object can calculate a number of other useful values on the data set, it does so as well.
55              
56             One more note, though commonly described as N equations in N unknowns, an additional constraint is required to solve to a single unique answer, and that is that the sum of all simple rankings must add up to 0.0. This also guarantees that the average club in a season has a ranking of zero.
57              
58             =cut
59              
60             package Sport::Analytics::SimpleRanking;
61              
62             return 1;
63              
64             =head1 METHODS
65              
66             =head2 CREATION
67              
68             =head3 new()
69              
70             my $stats = Sport::Analytics::SimpleRanking->new()
71              
72             Output: a working SimpleRanking object.
73              
74             =cut
75              
76             sub new {
77 3     3 1 670 my ( $class, %proto ) = @_;
78 3 50       19 %proto = () unless (%proto);
79 3 50       16 if ( $proto{debug} ) {
80 0         0 for ( keys %proto ) {
81 0         0 print "$_ => $proto{$_}\n";
82             }
83             }
84 3         10 $proto{loaded} = 0;
85 3         8 $proto{calc} = 0;
86              
87             # parameter validation here.
88 3 50       22 $proto{warnTeam} = 1000 unless ( $proto{warnTeam} );
89 3 50       17 $proto{warnGame} = 100000 unless ( $proto{warnGeam} );
90 3 50       35 croak " warnTeam should always be a number."
91             unless ( $proto{warnTeam} =~ /^\d+$/ );
92 3 50       23 croak " warnGame should always be a number."
93             unless ( $proto{warnGame} =~ /^\d+$/ );
94 3   33     30 bless \%proto, ref($class) || $class;
95 3         20 return \%proto;
96             }
97              
98             =head2 ACCESSORS
99              
100             Unless otherwise specified, success returns the value (or values) requested and failure is carped and returns a reference to an empty hash. Failures in the accessors happen when data have not been successfully loaded.
101              
102             =head3 total_games()
103              
104             my $total_games = $stats->total_games();
105              
106             Input: none required
107              
108             Output: The number of games total in the data set loaded.
109             =cut
110              
111             sub total_games {
112 3     3 1 6306 my ( $self ) = @_;
113              
114 3 50       19 if ( !$self->{loaded} ) {
115 0         0 carp "No data are loaded presently.";
116 0         0 return {};
117             }
118 3         12 return $self->{total_games};
119             }
120              
121             =head3 total_teams()
122              
123             my $total_teams = $stats->total_teams();
124              
125             Input: none required
126              
127             Output: The number of teams total in the data set loaded.
128             =cut
129              
130             sub total_teams {
131 2     2 1 10 my ( $self ) = @_;
132              
133 2 50       10 if ( !$self->{loaded} ) {
134 0         0 carp "No data are loaded presently.";
135 0         0 return {};
136             }
137 2         6 return $self->{total_team};
138             }
139              
140              
141             =head3 total_wins()
142              
143             my $total_wins = $stats->total_wins();
144              
145             Input: none required
146              
147             Output: The number of wins total in the data set loaded.
148             =cut
149              
150             sub total_wins {
151 2     2 1 10 my ( $self ) = @_;
152              
153 2 50       9 if ( !$self->{loaded} ) {
154 0         0 carp "No data are loaded presently.";
155 0         0 return {};
156             }
157 2         8 return $self->{total}->{wins};
158             }
159              
160             =head3 home_wins()
161              
162             my $home_wins = $stats->home_wins();
163              
164             Input: none required
165              
166             Output: The number of wins by home teams in the data set loaded.
167             =cut
168              
169             sub home_wins {
170 2     2 1 19 my ( $self ) = @_;
171              
172 2 50       9 if ( !$self->{loaded} ) {
173 0         0 carp "No data are loaded presently.";
174 0         0 return {};
175             }
176 2         8 return $self->{total}->{home_wins};
177             }
178              
179             =head3 home_win_pct()
180              
181             my $home_win_percent = $stats->home_win_pct();
182              
183             Input: none required
184              
185             Output: Percentage number of wins by home teams in the data set loaded.
186             =cut
187              
188             sub home_win_pct {
189 2     2 1 13 my ( $self ) = @_;
190              
191 2 50       9 if ( !$self->{loaded} ) {
192 0         0 carp "No data are loaded presently.";
193 0         0 return {};
194             }
195 2         12 return $self->{total}{home_wins}/$self->{total_games};
196             }
197              
198             =head3 win_margin()
199              
200             my $win_margin = $stats->win_margin();
201              
202             Input: none required
203              
204             Output: Average margin of victory if a team does win.
205             =cut
206              
207             sub win_margin {
208 2     2 1 11 my ( $self ) = @_;
209              
210 2 50       10 if ( !$self->{loaded} ) {
211 0         0 carp "No data are loaded presently.";
212 0         0 return {};
213             }
214 2         8 return $self->{total}{win_margin}/$self->{total}{wins};
215             }
216              
217             =head3 win_score()
218              
219             my $average_winnning_score = $stats->win_score();
220              
221             Input: none required
222              
223             Output: Average winning score if a team does win.
224             =cut
225              
226             sub win_score {
227 2     2 1 49 my ( $self ) = @_;
228              
229 2 50       10 if ( !$self->{loaded} ) {
230 0         0 carp "No data are loaded presently.";
231 0         0 return {};
232             }
233 2         9 return $self->{total}{win_score}/$self->{total}{wins};
234             }
235              
236             =head3 loss_score()
237              
238             my $average_losing_score = $stats->loss_score();
239              
240             Input: none required
241              
242             Output: Average losing score if a team does lose.
243             =cut
244              
245             sub loss_score {
246 2     2 1 11 my ( $self ) = @_;
247              
248 2 50       9 if ( !$self->{loaded} ) {
249 0         0 carp "No data are loaded presently.";
250 0         0 return {};
251             }
252 2         10 return $self->{total}{losing_score}/$self->{total}{wins};
253             }
254              
255             =head3 avg_score()
256              
257             my $average_score = $stats->avg_score();
258              
259             Input: none required
260              
261             Output: Average score under any circumstance.
262             =cut
263              
264             sub avg_score {
265 2     2 1 9 my ( $self ) = @_;
266              
267 2 50       11 if ( !$self->{loaded} ) {
268 0         0 carp "No data are loaded presently.";
269 0         0 return {};
270             }
271 2         10 return $self->{total}{total_scores}/( 2.0*$self->{total_games} );
272             }
273              
274             =head3 team_stats()
275              
276             my $teams = $stats->team_stats();
277             for (sort keys %$teams) {
278             printf "%s: %3d-%3d-%3d\n", $_, $team{$_}{wins}, $team{$_}{losses}, $team{$_}{ties};
279             }
280              
281             Input: none required
282              
283             Output: A reference to a hash of statistics per team. These include
284             wins
285             losses
286             ties
287             games_played
288             points_for
289             points_against
290             point_spread
291             win_pct
292             mov (also known as average point spread).
293              
294             This function will return an empty hash reference if data have not yet been loaded.
295              
296             =cut
297              
298             sub team_stats {
299 3     3 1 527 my ( $self ) = @_;
300              
301 3 50       17 if ( !$self->{loaded} ) {
302 0         0 carp "No data are loaded presently.";
303 0         0 return {};
304             }
305 3         6 my %team;
306 3         8 for my $t ( sort keys %{ $self->{team} } ) {
  3         34  
307 44 50       110 print "Team_Stats: t = $t\n" if ( $self->{debug} );
308 44         51 $team{$t}{wins} = ${$self->{team}}{$t}{wins};
  44         167  
309 44   50     124 $team{$t}{wins} ||= 0;
310 44         50 $team{$t}{losses} = ${$self->{team}}{$t}{losses};
  44         121  
311 44   100     115 $team{$t}{losses} ||= 0;
312 44         59 $team{$t}{ties} = ${$self->{team}}{$t}{ties};
  44         138  
313 44   50     246 $team{$t}{ties} ||= 0;
314 44         53 $team{$t}{games_played} = ${$self->{team}}{$t}{games_played};
  44         128  
315 44         54 $team{$t}{points_for} = ${$self->{team}}{$t}{points_for};
  44         116  
316 44         51 $team{$t}{points_against} = ${$self->{team}}{$t}{points_against};
  44         112  
317 44         96 $team{$t}{point_spread} = $team{$t}{points_for} - $team{$t}{points_against};
318 44         186 $team{$t}{win_pct} = ($team{$t}{wins} + 0.5*$team{$t}{ties})/ $team{$t}{games_played};
319 44         55 $team{$t}{mov} = ${$self->{team}}{$t}{mov};
  44         178  
320             }
321 3         19 return \%team;
322             }
323              
324             =head3 pythag()
325              
326             The Pythagorean formula is a rule of thumb that estimates winning percentage from points scored and points allowed.
327              
328             Estimated Winning Percentage = (Pts Scored)**N/( (Pts Scored)**N + (Pts Allowed)**N )
329              
330             In the original Bill James formulation, the power of the Pythagorean formula, N, is 2. This implementation can calculate the
331             Pythagorean power from the game data set itself.
332              
333              
334             my $teams = $stats->team_stats();
335             my $predicted = $stats->pythag();
336             for (sort keys %$teams) {
337             printf "%s: %6.2f %6.2f\n", $_, $team{$_}{win_pct}, $predicted{$_};
338             }
339              
340             Input: If none given, will assume N = 2.
341              
342             my $predicted = $stats->pythag();
343              
344             If input is a number, that number will be used to calculate the power of the Pythagorean prediction.
345              
346             my $predicted = $stats->pythag(2.5);
347              
348             If input is a reference to a scalar, and the option 'best => 1' is used, then this program will use a golden mean search to find the best fit value of N, and return the value in the reference provided.
349              
350             my $predicted = $stats->pythag( \$exp, best => 1 );
351              
352             Output:
353              
354             A hash reference with team names as keys and predicted winning percentage as values.
355              
356             This function will return an empty hash reference if data have not yet been loaded.
357              
358             =cut
359              
360             sub pythag {
361 3     3 1 3771 my ( $self, $exp, %opt ) = @_;
362              
363 3 50       12 if ( !$self->{loaded} ) {
364 0         0 carp "No data are loaded presently.";
365 0         0 return {};
366             }
367              
368 3         4 my $power = 2.0;
369 3 100       9 if ( $exp ) {
370 2 100       8 if ( $opt{best} ) {
371 1 50       6 if ( ref($exp) eq 'SCALAR' ) {
372 1 50       5 if ( $opt{verbose} ) {
373 0         0 $$exp = $self->_py_sect( verbose => 1 );
374             }
375             else {
376 1         4 $$exp = $self->_py_sect();
377             }
378 1         3 $power = $$exp;
379             }
380             }
381             else {
382 1         2 $power = $exp;
383             }
384             };
385 3         6 my %pred;
386 3         4 for my $t ( sort keys %{ $self->{team} } ) {
  3         46  
387 96         277 $pred{$t} = $self->_py_calc( $self->{team}{$t}{points_for}, $self->{team}{$t}{points_against}, $power );
388             }
389 3         18 return \%pred;
390             }
391              
392             sub _py_calc {
393 864     864   928 my $self = shift;
394 864         874 my $pf = shift;
395 864         800 my $pa = shift;
396 864         841 my $power = shift;
397 864   50     1586 $power ||= 2.0;
398 864         2361 return ( $pf**$power) / ( $pf**$power + $pa**$power );
399             }
400              
401             sub _py_fit {
402 24     24   34 my ($self, $exp ) = @_;
403 24         27 my $ssq = 0;
404 24         25 for my $t ( keys % { $self->{team} } ) {
  24         152  
405 768         2125 my $calc = $self->_py_calc( $self->{team}{$t}{points_for}, $self->{team}{$t}{points_against}, $exp );
406 768         1973 $ssq += ( $self->{team}{$t}{win_pct} - $calc )**2;
407             }
408 24         134 return $ssq;
409             }
410              
411             sub _py_sect {
412 1     1   3 my ( $self, %opt ) = @_;
413 1         2 my $lo = 0.0;
414 1         3 my $hi = 25.0;
415 1         2 my $tol = 0.001;
416 1         4 my $g = $self->_golden_ratio();
417 1         3 my $one_minus_g = 1.0 - $g;
418 1         3 my @p;
419             my @f;
420             #
421             # if [ $lo, $hi ] is an interval in which a minimum is found, choose points so that
422             # p[1] = ~ 2/3 lo + ~ 1/3 hi and p[2] = ~ 1/3 lo + ~ 2/3 hi.
423             #
424 1         2 $p[0] = $lo;
425 1         2 $p[3] = $hi;
426 1         3 $p[1] = $one_minus_g*$p[0] + $g*$p[3];
427 1         3 $p[2] = $g*$p[0] + $one_minus_g*$p[3];
428 1         4 $f[1] = $self->_py_fit( $p[1] );
429 1         4 $f[2] = $self->_py_fit( $p[2] );
430 1         5 while ( abs( $p[3] - $p[0] ) > $tol ) {
431 22 100       43 if ( $f[2] < $f[1] ) {
432 10 50       31 print "Low = $p[1]\n" if ( $opt{verbose} );
433 10         13 $p[0] = $p[1];
434 10         10 $p[1] = $p[2];
435 10         16 $p[2] = $one_minus_g*$p[1] + $g*$p[3];
436 10         12 $f[1] = $f[2];
437 10         19 $f[2] = $self->_py_fit( $p[2] );
438             }
439             else {
440 12 50       29 print "High = $p[2]\n" if ( $opt{verbose} );
441 12         14 $p[3] = $p[2];
442 12         13 $p[2] = $p[1];
443 12         19 $p[1] = $one_minus_g*$p[2] + $g*$p[0];
444 12         14 $f[2] = $f[1];
445 12         26 $f[1] = $self->_py_fit( $p[1] );
446             }
447             }
448 1 50       8 return $f[2] > $f[1] ? $p[1] : $p[2];
449             }
450              
451             sub _golden_ratio {
452 1     1   2 my $self = shift;
453 1         3 return ( 3.0 - sqrt(5))/2 ;
454             }
455              
456              
457              
458             =head2 ALGORITHM COMPONENTS
459              
460             =head3 mov()
461              
462             my $mov = $stats->mov();
463             for (sort keys %$mov) {
464             printf "team %s: margin of victory: %6.2f\n", $_, $mov{$_};
465             }
466              
467             Input: none required
468              
469             Output: a hash of mov values (margin of victory, or average point spread) per team.
470             This function will return an empty hash reference if data have not yet been loaded.
471              
472             =cut
473              
474             sub mov {
475 4     4 1 28 my ( $self ) = @_;
476              
477 4 50       25 if ( !$self->{loaded} ) {
478 0         0 carp "No data are loaded presently.";
479 0         0 return {};
480             }
481             else {
482 4         7 my %mov;
483 4         8 for my $t ( sort keys %{ $self->{team} } ) {
  4         49  
484 76 50       148 print "mov: t = $t\n" if ( $self->{debug} );
485 76         71 $mov{$t} = ${$self->{team}}{$t}{mov};
  76         240  
486             }
487 4         23 return \%mov;
488             }
489             }
490              
491             =head3 sos()
492              
493             Strength of schedule is the sum of the simple rankings of all teams that
494             played a specific team, divided by the total number of teams that played
495             the team.
496              
497             my $sos = $stats->sos();
498             for (sort keys %$sos) {
499             printf "team %s: strength of schedule: %6.2f\n", $_, $sos{$_};
500             }
501              
502              
503             Input: none required
504              
505             Output: a hash of sos values (strength of schedule) per team.
506             This function will return an empty hash reference if data have not yet been calculated.
507              
508             =cut
509              
510             sub sos {
511 1     1 1 5 my ( $self ) = @_;
512              
513 1 50       5 if ( !$self->{calc} ) {
514 0         0 carp "No data are calculated presently.";
515 0         0 return {};
516              
517             }
518             else {
519 1         8 my %sos;
520 1         3 for my $t ( sort keys %{ $self->{team} } ) {
  1         14  
521 32 50       63 print "sos: t = $t\n" if ( $self->{debug} );
522 32         32 $sos{$t} = ${$self->{team}}{$t}{sos};
  32         96  
523             }
524 1         5 return \%sos;
525             }
526             }
527              
528             =head3 simpleranking()
529              
530             Input: none required, options possible.
531              
532             Example:
533              
534             my $stats = Sport::Analytics::SimpleRanking->new();
535             $stats->load_data( \@games );
536             my $srs = $stats->simpleranking( verbose => 1 );
537             my $mov = $stats->mov();
538             my $sos = $stats->sos();
539             for (sort keys %$srs) {
540             printf "team %s: simple ranking: %6.2f = margin of victory: %6.2f", $_, $srs{$_},$mov{$_};
541             printf " + strength of schedule: %6.2f\n",$sos{$_};
542             }
543              
544             Options:
545              
546             epsilon => value
547              
548             This is a convergence criterion. Usually you won't need to set this.
549              
550             maxiter => value
551              
552             A stopgap to prevent runaways. Usually unnecessary as this algorithm converges rapidly.
553              
554             verbose => value
555              
556             Set this on to visually watch values converge.
557              
558             Output: The simple rankings of the data as a hash of values per team name.
559             This function will return an empty hash reference if data have not yet been calculated.
560              
561             =cut
562              
563             sub simpleranking {
564 1     1 1 8 my ( $self, %options ) = @_;
565              
566 1 50       4 if ( !$self->{loaded} ) {
567 0         0 carp "No data are loaded presently.";
568 0         0 return {};
569             }
570              
571 1   50     7 $options{epsilon} ||= 0.001;
572 1   50     5 $options{maxiter} ||= 1000000;
573 1         3 for ( keys %{ $self->{team} } ) {
  1         7  
574 32         62 $self->{team}{$_}{srs} = $self->{team}{$_}{mov};
575 32         59 $self->{team}{$_}{oldsrs} = $self->{team}{$_}{srs};
576 32         79 $self->{team}{$_}{sos} = 0;
577             }
578 1         4 my $delta = 10.0;
579 1         2 my $iter = 0;
580 1   66     10 while ( $delta > $options{epsilon} and $iter < $options{maxiter} ) {
581 11         13 $delta = 0.0;
582 11         13 for ( keys %{ $self->{team} } ) {
  11         66  
583 352 50       740 print "team => $_\n" if ( $self->{debug} );
584 352         380 my $sos = 0.0;
585 352         343 for my $g ( @{ $self->{played}{$_} } ) {
  352         690  
586 5632         10180 $sos += $self->{team}{$g}{srs};
587             }
588 352         685 $sos /= $self->{team}{$_}{games_played};
589 352         719 $self->{team}{$_}{srs} = $self->{team}{$_}{mov} + $sos;
590 352         594 my $newdelt = abs( $sos - $self->{team}{$_}{sos} );
591 352         517 $self->{team}{$_}{sos} = $sos;
592 352         829 $delta = max( $newdelt, $delta );
593             }
594 11         35 for ( keys %{ $self->{team} } ) {
  11         63  
595 352         741 $self->{team}{$_}{oldsrs} = $self->{team}{$_}{srs};
596             }
597 11         33 $iter++;
598 11 50       70 if ( $options{verbose} ) {
599 0         0 print "iter : $iter\n";
600 0         0 print "delta : $delta\n";
601 0         0 for ( sort keys %{$self->{team}} ) {
  0         0  
602 0         0 printf "%20s srs:%7.2f mov:%7.2f sos:%7.2f \n" ,$_ ,
603             $self->{team}{$_}{srs},$self->{team}{$_}{mov}, $self->{team}{$_}{sos};
604             }
605 0 0       0 print "elements in \$self->{team}: ",scalar keys %{$self->{team}},"\n" if ( $self->{debug} );
  0         0  
606 0         0 print "\n\n";
607             }
608             }
609 1         15 $self->_srs_correction();
610 1 50       4 if ( $options{verbose} ) {
611 0         0 print "Adjusted to 0.0\n";
612 0         0 for ( sort keys %{$self->{team}} ) {
  0         0  
613 0         0 printf "%20s srs:%7.2f mov:%7.2f sos:%7.2f \n" ,$_ ,
614             $self->{team}{$_}{srs},$self->{team}{$_}{mov}, $self->{team}{$_}{sos};
615             }
616 0         0 print "\n\n";
617             }
618 1 50       6 print "iter = $iter\n" if $options{verbose};
619 1 50       5 print "epsilon = $options{epsilon}\n" if $options{verbose};
620 1 50       4 printf "delta = %7.4f\n", $delta if $options{verbose};
621 1 50 33     5 print "elements in \$self->{team}: ",scalar keys %{$self->{team}},"\n" if ( $options{verbose} and $self->{debug} );
  0         0  
622 1         3 $self->{calc} = 1;
623 1         2 my %srsmap;
624 1         2 $srsmap{$_} = $self->{team}{$_}{srs} for ( keys %{ $self->{team} } );
  1         50  
625 1         8 return \%srsmap;
626             }
627              
628             #
629             # Any solution SRS = MOV + SOS has an equally valid solution
630             #
631             # SRS + c = MOV + SOS + c.
632             #
633             # You have to correct for that by setting the sum of all srs values to average to 0.0.
634             #
635             sub _srs_correction {
636 1     1   4 my ( $self, %options ) = @_;
637 1         2 my $sum = 0.0;
638 1         3 for ( keys %{ $self->{team} } ) {
  1         7  
639 32         58 $sum += $self->{team}{$_}{srs};
640             }
641 1         18 $sum /= $self->{total_team};
642 1         2 for ( keys %{ $self->{team} } ) {
  1         7  
643 32         45 $self->{team}{$_}{srs} -= $sum;
644 32         57 $self->{team}{$_}{sos} -= $sum;
645             }
646 1         5 return;
647             }
648              
649             =head2 DATA LOADING
650              
651             There are two methods provided, C and C. The method
652             C can only be used once, then C thereafter.
653              
654             =head3 load_data()
655              
656             Input: a reference to an array of comma separated strings of the form:
657              
658             "visting team,score,home team,score"
659              
660             Example:
661              
662             use Sport::Analytics::SimpleRanking;
663             my $stats = Sport::Analytics::SimpleRanking->new();
664             my $games = [
665             "Boston,13,Atlanta, 27",
666             "Dallas,17,Chicago,21",
667             "Eugene,30,Fairbanks,41",
668             "Atlanta,15,Chicago,3",
669             "Eugene,21,Boston,24",
670             "Fairbanks,17,Dallas,7",
671             "Dallas,19,Atlanta,7",
672             "Boston,9,Fairbanks,31",
673             "Chicago,10,Eugene,30",
674             ];
675             $stats->load_data( $games );
676              
677             This calculation requires at least two teams, and then at least two games per
678             team in order to be successful.
679              
680             Output: returns 1 on success, croaks on failure.
681              
682             =cut
683              
684             sub load_data {
685 9     9 1 29270 my ( $self, $games ) = @_;
686 9 100       379 croak("You can only load data once into this object. Use add_data to add more data.")
687             if ( $self->{loaded} );
688 7         13 $self->{loaded} = 0;
689 7 50       43 croak("Method load_data requires a reference to a games array.")
690             unless ( ref($games) eq 'ARRAY' );
691 7         12 $self->{total_games} = 0;
692 7         15 $self->{total} = ();
693 7         17 $self->{team} = ();
694 7         22 $self->{game} = ();
695 7         23 for (@$games) {
696 282         858 my ( $visitor, $visit_score, $home_team, $home_score ) = split "\,", $_;
697 282 50       615 croak "The home score is undefined in array element $self->{total_games}. Perhaps you have missed a comma?"
698             unless ( defined( $home_score ) );
699 282 100       952 croak
700             "The visitor score field in array element $self->{total_games} needs to be a number."
701             unless ( $visit_score =~ /^\s*\d+\s*$/ );
702 281 50       743 croak
703             "The home score field in array element $self->{total_games} needs to be a number."
704             unless ( $home_score =~ /^\s*\d+\s*$/ );
705 281         410 my $diff = $home_score - $visit_score;
706 281 100       572 if ( $diff > 0 ) {
    50          
707 166         242 $self->{total}{wins}++;
708 166         223 $self->{total}{win_score} += $home_score;
709 166         228 $self->{total}{losing_score} += $visit_score;
710 166         235 $self->{total}{total_scores} += ( $home_score + $visit_score );
711 166         207 $self->{total}{home_wins}++;
712 166         794 $self->{total}{win_margin} += $diff;
713 166         803 $self->{team}{$home_team}{wins}++;
714 166         926 $self->{team}{$visitor}{losses}++;
715             }
716             elsif ( $diff == 0 ) {
717 0         0 $self->{total}{ties}++;
718 0         0 $self->{total}{total_scores} += ( $home_score + $visit_score );
719 0         0 $self->{team}{$home_team}{ties}++;
720 0         0 $self->{team}{$visitor}{ties}++;
721             }
722             else {
723 115         224 $self->{total}{wins}++;
724 115         161 $self->{total}{losing_score} += $home_score;
725 115         145 $self->{total}{win_score} += $visit_score;
726 115         157 $self->{total}{total_scores} += ( $home_score + $visit_score );
727 115         153 $self->{total}{visit_wins}++;
728 115         154 $self->{total}{win_margin} -= $diff;
729 115         210 $self->{team}{$home_team}{losses}++;
730 115         218 $self->{team}{$visitor}{wins}++;
731             }
732 281         296 push @{ $self->{game}{visitor} }, $visitor;
  281         656  
733 281         350 push @{ $self->{game}{visit_score} }, $visit_score;
  281         505  
734 281         294 push @{ $self->{game}{home_team} }, $home_team;
  281         543  
735 281         309 push @{ $self->{game}{home_score} }, $home_score;
  281         510  
736 281         289 push @{ $self->{game}{mov} }, $diff;
  281         513  
737 281         533 $self->{team}{$visitor}{games_played}++;
738 281         406 $self->{team}{$home_team}{games_played}++;
739 281         495 $self->{team}{$visitor}{points} -= $diff;
740 281         466 $self->{team}{$home_team}{points} += $diff;
741 281         404 $self->{team}{$visitor}{points_for} += $visit_score;
742 281         422 $self->{team}{$visitor}{points_against} += $home_score;
743 281         441 $self->{team}{$home_team}{points_for} += $home_score;
744 281         424 $self->{team}{$home_team}{points_against} += $visit_score;
745 281         264 push @{ $self->{played}{$visitor} }, $home_team;
  281         658  
746 281         318 push @{ $self->{played}{$home_team} }, $visitor;
  281         588  
747 281         693 $self->{total_games}++;
748             }
749 6 100       206 croak("Method load_data requires at least two games to analyze data.")
750             unless ( $self->{total_games} > 1 );
751 5         6 $self->{total_team} = scalar keys %{ $self->{team} };
  5         18  
752 5 50       17 croak("Method load_data requires at least two teams.")
753             unless ( $self->{total_team} > 1 );
754 5 100       119 croak("Method load_data requires at least as many games as teams.")
755             unless ( $self->{total_team} <= $self->{total_games} );
756 4         7 for my $t ( keys %{ $self->{team} } ) {
  4         28  
757 49 100       210 croak("Method load_data requires team $t to have played at least two games.")
758             unless ( $self->{team}{$t}{games_played} > 1 );
759             }
760 3 50       24 carp("The number of teams in this data set is exceptionally large.")
761             if ( $self->{total_team} > $self->{warnTeam} );
762 3 50       13 carp("The number of games in this data set is exceptionally large.")
763             if ( $self->{total_games} > $self->{warnGame} );
764              
765 3         5 for my $t ( sort keys %{ $self->{team} } ) {
  3         41  
766 44         118 my $team_diff =
767             $self->{team}{$t}{points} / $self->{team}{$t}{games_played};
768 44         80 $self->{team}{$t}{mov} = $team_diff;
769 44   100     118 $self->{team}{$t}{wins} ||= 0;
770 44   50     223 $self->{team}{$t}{ties} ||= 0;
771 44   100     116 $self->{team}{$t}{losses} ||= 0;
772 44         240 $self->{team}{$t}{win_pct} = ($self->{team}{$t}{wins} + 0.5*$self->{team}{$t}{ties})/ $self->{team}{$t}{games_played};
773             }
774 3         9 $self->{loaded} = 1;
775 3         21 return $self->{loaded};
776             }
777              
778             =head3 add_data()
779              
780             Input: a reference to an array of comma separated strings of the form:
781              
782             "visting team,score,home team,score"
783              
784             Example:
785              
786             use Sport::Analytics::SimpleRanking;
787             my $stats = Sport::Analytics::SimpleRanking->new();
788             # first two weeks games.
789             my $games = [
790             "Boston,13,Atlanta, 27",
791             "Dallas,17,Chicago,21",
792             "Eugene,30,Fairbanks,41",
793             "Atlanta,15,Chicago,3",
794             "Eugene,21,Boston,24",
795             "Fairbanks,17,Dallas,7",
796             ];
797             $stats->load_data( $games );
798             # add another week of games.
799             my $newgames = [
800             "Dallas,19,Atlanta,7",
801             "Boston,9,Fairbanks,31",
802             "Chicago,10,Eugene,30",
803             ];
804             $stats->add_data( $newgames );
805              
806             This calculation requires at least two teams, and then at least two games per
807             team in order to be successful.
808              
809             Output: returns 1 on success, croaks on failure.
810              
811             =cut
812              
813             sub add_data {
814 2     2 1 733 my ( $self, $games ) = @_;
815 2 50       8 croak("Method add_data requires a reference to a games array.")
816             unless ( ref($games) eq 'ARRAY' );
817             # two passes allows add_data to croak without disrupting already existing data in the object.
818 2         6 for (@$games) {
819 4         15 my ( $visitor, $visit_score, $home_team, $home_score ) = split "\,", $_;
820 4 100       205 croak "The home score is undefined in array element $self->{total_games}. Perhaps you have missed a comma?"
821             unless ( defined( $home_score ) );
822 3 50       14 croak
823             "The visitor score field in array element $self->{total_games} needs to be a number."
824             unless ( $visit_score =~ /^\s*\d+\s*$/ );
825 3 50       16 croak
826             "The home score field in array element $self->{total_games} needs to be a number."
827             unless ( $home_score =~ /^\s*\d+\s*$/ );
828             }
829 1         3 for (@$games) {
830 3         10 my ( $visitor, $visit_score, $home_team, $home_score ) = split "\,", $_;
831 3         8 my $diff = $home_score - $visit_score;
832 3 100       11 if ( $diff > 0 ) {
    50          
833 2         4 $self->{total}{wins}++;
834 2         5 $self->{total}{win_score} += $home_score;
835 2         3 $self->{total}{losing_score} += $visit_score;
836 2         5 $self->{total}{total_scores} += ( $home_score + $visit_score );
837 2         3 $self->{total}{home_wins}++;
838 2         4 $self->{total}{win_margin} += $diff;
839 2         5 $self->{team}{$home_team}{wins}++;
840 2         5 $self->{team}{$visitor}{losses}++;
841             }
842             elsif ( $diff == 0 ) {
843 0         0 $self->{total}{ties}++;
844 0         0 $self->{total}{total_scores} += ( $home_score + $visit_score );
845 0         0 $self->{team}{$home_team}{ties}++;
846 0         0 $self->{team}{$visitor}{ties}++;
847             }
848             else {
849 1         3 $self->{total}{wins}++;
850 1         4 $self->{total}{losing_score} += $home_score;
851 1         5 $self->{total}{win_score} += $visit_score;
852 1         3 $self->{total}{total_scores} += ( $home_score + $visit_score );
853 1         2 $self->{total}{visit_wins}++;
854 1         4 $self->{total}{win_margin} += -$diff;
855 1         10 $self->{team}{$home_team}{losses}++;
856 1         4 $self->{team}{$visitor}{wins}++;
857             }
858 3         3 push @{ $self->{game}{visitor} }, $visitor;
  3         10  
859 3         5 push @{ $self->{game}{visit_score} }, $visit_score;
  3         10  
860 3         4 push @{ $self->{game}{home_team} }, $home_team;
  3         7  
861 3         4 push @{ $self->{game}{home_score} }, $home_score;
  3         10  
862 3         5 push @{ $self->{game}{mov} }, $diff;
  3         8  
863 3         6 $self->{team}{$visitor}{games_played}++;
864 3         6 $self->{team}{$home_team}{games_played}++;
865 3         7 $self->{team}{$visitor}{points} -= $diff;
866 3         7 $self->{team}{$home_team}{points} += $diff;
867 3         6 $self->{team}{$visitor}{points_for} += $visit_score;
868 3         4 $self->{team}{$visitor}{points_against} += $home_score;
869 3         7 $self->{team}{$home_team}{points_for} += $home_score;
870 3         5 $self->{team}{$home_team}{points_against} += $visit_score;
871 3         4 push @{ $self->{played}{$visitor} }, $home_team;
  3         9  
872 3         3 push @{ $self->{played}{$home_team} }, $visitor;
  3         7  
873 3         10 $self->{total_games}++;
874             }
875 1         3 $self->{total_team} = scalar keys %{ $self->{team} };
  1         3  
876 1 50       6 carp("The number of teams in this data set is exceptionally large.")
877             if ( $self->{total_team} > $self->{warnTeam} );
878 1 50       4 carp("The number of games in this data set is exceptionally large.")
879             if ( $self->{total_games} > $self->{warnGame} );
880              
881 1         2 for my $t ( sort keys %{ $self->{team} } ) {
  1         7  
882 6         16 my $team_diff =
883             $self->{team}{$t}{points} / $self->{team}{$t}{games_played};
884 6         10 $self->{team}{$t}{mov} = $team_diff;
885 6   50     15 $self->{team}{$t}{wins} ||= 0;
886 6   50     27 $self->{team}{$t}{ties} ||= 0;
887 6   100     27 $self->{team}{$t}{losses} ||= 0;
888 6         30 $self->{team}{$t}{win_pct} = ($self->{team}{$t}{wins} + 0.5*$self->{team}{$t}{ties})/ $self->{team}{$t}{games_played};
889             }
890 1         8 $self->{loaded} = 1;
891 1         2 $self->{calc} = 0;
892 1         6 return $self->{loaded};
893             }
894              
895             =head1 DIAGNOSTICS
896              
897             =head2 accessors and calculations
898            
899             No data are loaded presently.
900              
901             Data need to be loaded before this value can be returned.
902              
903             No data are calculated presently.
904              
905             Data need to be loaded and simpleranking needs to be run first.
906              
907             =head2 load_data()
908              
909             You can only load data once into this object. Use add_data to add more data.
910              
911             Code attempts to use load_data more than once. Use add_data instead.
912              
913             Method load_data requires a reference to a games array.
914              
915             Either no data passed to load_data, or the wrong kind of data has been passed to load_data.
916             Arrays should be dereferenced: C<\@array>.
917              
918             The home score is undefined in array element X. Perhaps you have missed a comma?
919              
920             This happens when there are less than 3 commas in a data string passed to the method.
921              
922             The visitor score field in array element X needs to be a number.
923              
924             The second field in a game string needs to be a number.
925              
926             The home score field in array element X needs to be a number.
927              
928             The fourth field in a game string needs to be a number.
929              
930             Method load_data requires at least two games to analyze data.
931             Method load_data requires at least two teams.
932             Method load_data requires at least as many games as teams.
933             Method load_data requires team T to have played at least two games.
934              
935             There are certain minimum data requirements for this program to function.
936              
937             The number of teams in this data set is exceptionally large.
938              
939             Happens if you pass more than 1000 teams to this method.
940              
941             The number of games in this data set is exceptionally large.
942            
943             Happens if you pass more than 1,000,000 games to this method.
944              
945              
946             =head2 add_data()
947              
948             Method add_data requires a reference to a games array.
949              
950             Either no data passed to add_data, or the wrong kind of data has been passed to add_data.
951             Arrays should be dereferenced: C<\@array>.
952              
953             The home score is undefined in array element X. Perhaps you have missed a comma?
954            
955             This happens when there are less than 3 commas in a data string passed to the method.
956              
957             The visitor score field in array element X needs to be a number.
958              
959             The second field in a game string needs to be a number.
960              
961             The home score field in array element X needs to be a number.
962              
963             The fourth field in a game string needs to be a number.
964              
965             The number of teams in this data set is exceptionally large.
966              
967             Happens if you pass more than 1000 teams to this method.
968              
969             The number of games in this data set is exceptionally large.
970            
971             Happens if you pass more than 1,000,000 games to this method.
972              
973             =head1 CONFIGURATION AND ENVIRONMENT
974              
975             No specific issues to note.
976              
977             =head1 DEPENDENCIES
978              
979             To build, Test::More. The modules List::Util and Carp are needed to build and to run this code.
980              
981             =head1 INCOMPATIBILITIES
982              
983             None known at this time.
984              
985             =head1 AUTHOR
986              
987             David Myers, C<< >>
988              
989             =head1 REFERENCES
990              
991             algorithm: L
992             original Perl implementation: L
993             Pythagorean formula: L
994              
995             =head1 BUGS AND LIMITATIONS
996              
997             No known bugs at this time.
998              
999             Please report any bugs or feature requests to C, or through
1000             the web interface at L. I will be notified, and then you'll
1001             automatically be notified of progress on your bug as I make changes.
1002              
1003             The algorithm requires at least two teams, and at least two games per team to calculate a simple ranking. If you have N teams, a minimum of N games are required in order to do the simple ranking calculation. It could be more, depending on who has played whom.
1004              
1005             =head1 SUPPORT
1006              
1007             You can find documentation for this module with the perldoc command.
1008              
1009             perldoc Sport::Analytics::SimpleRanking
1010              
1011              
1012             You can also look for information at:
1013              
1014             =over 4
1015              
1016             =item * RT: CPAN's request tracker
1017              
1018             L
1019              
1020             =item * AnnoCPAN: Annotated CPAN documentation
1021              
1022             L
1023              
1024             =item * CPAN Ratings
1025              
1026             L
1027              
1028             =item * Search CPAN
1029              
1030             L
1031              
1032             =back
1033              
1034              
1035             =head1 ACKNOWLEDGEMENTS
1036              
1037             To Doug Drinen, who manages the Pro Football Reference site, and who has published and promoted the use of the simple rankings system. To GrandFather at Perl Monks, who suggested many improvements in the design of the first versions of this module.
1038              
1039             =head1 LICENSE AND COPYRIGHT
1040              
1041             Copyright (c) 2011 David Myers, C<< >>. All rights reserved.
1042              
1043             This program is free software; you can redistribute it and/or modify it
1044             under the terms of either: the GNU General Public License as published
1045             by the Free Software Foundation; or the Artistic License.
1046              
1047             See http://dev.perl.org/licenses/ for more information.
1048              
1049             =head2 Disclaimer
1050              
1051             To the maximum extent permitted by applicable law, the author of this module disclaims all warranties, either express or implied, including but not limited to implied warranties of merchantability and fitness for a particular purpose, with regard to the software and the accompanying documentation.
1052              
1053              
1054              
1055             =cut
1056              
1057             1; # End of Sport::Analytics::SimpleRanking