File Coverage

blib/lib/Math/SymbolicX/Statistics/Distributions.pm
Criterion Covered Total %
statement 102 134 76.1
branch 34 68 50.0
condition 2 6 33.3
subroutine 11 13 84.6
pod 0 5 0.0
total 149 226 65.9


line stmt bran cond sub pod time code
1             package Math::SymbolicX::Statistics::Distributions;
2              
3 3     3   1219644 use 5.006;
  3         18  
  3         150  
4 3     3   19 use strict;
  3         7  
  3         100  
5 3     3   16 use warnings;
  3         4  
  3         154  
6              
7             our $VERSION = '1.02';
8              
9 3     3   982 use Math::Symbolic qw/parse_from_string/;
  3         140979  
  3         201  
10 3     3   20 use Carp qw/confess cluck/;
  3         5  
  3         5735  
11              
12             require Exporter;
13             # Exporter stuff is implemented at the end of the module because
14             # we need to access the different distribution functions.
15              
16              
17              
18              
19             =head1 NAME
20              
21             Math::SymbolicX::Statistics::Distributions - Statistical Distributions
22              
23             =head1 SYNOPSIS
24              
25             use Math::SymbolicX::Statistics::Distributions ':all';
26            
27             #####################################################
28             # The following demonstrates the procedural interface
29            
30             # (included in :all)
31             use Math::SymbolicX::Statistics::Distributions ':functions';
32            
33             $dist = normal_distribution('mean', 'rmsd');
34             print $dist->value(mean => 5, rmsd => 2, x => 1);
35            
36             # similar:
37             $dist = gauss_distribution('mean', 'rmsd'); # same as normal_distribution
38             $dist = bivariate_normal_distribution( 'mean1', 'rmsd1',
39             'mean2', 'rmsd2',
40             'correlation );
41            
42             # plug in any expression: (y*2 will be mean, z^3 root mean square deviation)
43             $dist = normal_distribution('y*2', 'z^3');
44             print $dist->value(x => 0.5, y => 3, z => 0.2);
45            
46             # To generate the error function: (mean = 0; rmsd = 1)
47             $dist = normal_distribution(0, 1);
48             print $dist->value(x => 1);
49            
50             #########################################################
51             # The following demonstrates the parser/grammar interface
52             # We'll do the exact same as above with the other interface.
53            
54             # (included in :all)
55             use Math::SymbolicX::Statistics::Distributions ':grammar';
56             use Math::Symbolic qw/parse_from_string/;
57            
58             $dist = parse_from_string('normal()');
59             print $dist->value(mean => 5, rmsd => 2, x => 1);
60            
61             # similar:
62             $dist = parse_from_string('gauss(mean, rmsd)'); # same as normal()
63             $dist = parse_from_string( 'bivariate_normal(mean1, rmsd1,'
64             .'mean2, rmsd2,'
65             .'correlation )' );
66            
67             # plug in any expression: (y*2 will be mean, z^3 root mean square deviation)
68             $dist = parse_from_string('normal(y*2, z^3)');
69             print $dist->value(x => 0.5, y => 3, z => 0.2);
70            
71             # To generate the error function: (mean = 0; rmsd = 1)
72             $dist = parse_from_string('normal(0, 1)');
73             print $dist->value(x => 1);
74            
75             # same works for the keywords 'boltzmann', 'bose', 'fermi'
76              
77             =head1 DESCRIPTION
78              
79             This module offers easy access to formulas for a few often-used
80             distributions. For that, it uses the Math::Symbolic module which gives the
81             user an opportunity to manufacture distributions to his liking.
82              
83             The module can be used in two styles: It has a procedural interface which
84             is demonstrated in the first half of the synopsis. But it also
85             features a wholly different interface: It can modify the Math::Symbolic
86             parser so that you can use the distributions right inside strings
87             that will be parsed as Math::Symbolic trees. This is demonstrated for
88             very simple cases in the second half of the synopsis.
89              
90             All arguments in both interface styles are optional.
91             Whichever expression is used instead of, for examle C<'mean'>, is plugged
92             into the formula for the distribution as a Math::Symbolic tree.
93             Details on argument handling are explained below.
94              
95             Please see the section on I for details
96             on how to choose the interface style you want to use.
97              
98             The arguments for the grammar-interface version of the module
99             follow the same concept as for the function interface which is described
100             in L in detail. The only significant difference is that the
101             arguments must all be strings to be parsed as Math::Symbolic trees.
102             There is one exception: If the string 'undef' is passed as one argument
103             to the function, that string is converted to a real undef, but nevermind and
104             see below.
105              
106             =head2 Export
107              
108             By default, the module does not export any functions and does not modify
109             the Math::Symbolic parser. You have to explicitly request that does so
110             using the usual L semantics.
111              
112             If using the module without parameters
113             (C), you can access the
114             distributions via the fully qualified subroutine names such as
115             C. But that
116             would be annoying, no?
117              
118             You can choose to export any of the distribution functions (see below) by
119             specifying one or more function names:
120              
121             use Math::SymbolicX::Statistics::Distributions qw/gauss_distribution/;
122             # then:
123             $dist = gauss_distribution(...);
124              
125             You can also import all of them by using the ':functions' tag:
126              
127             use Math::SymbolicX::Statistics::Distributions qw/:functions/;
128             ...
129              
130             Alternatively, you can choose to modify the Math::Symbolic parser by using
131             any of the following keywords in the same way we used the function names
132             above.
133              
134             normal_grammar
135             gauss_grammar
136             bivariate_normal_grammar
137             cauchy_grammar
138             boltzmann_grammar
139             bose_grammar
140             fermi_grammar
141              
142             To add all the keywords (C, C, C,
143             C, C, C, and C
144             to the grammar, you can specify C<:grammar> instead.
145              
146             Finally, the module supports the exporter tag C<:all> to both export
147             all functions and add all keywords to the parser.
148              
149             =head2 Distributions
150              
151             The following is a list of distributions that can be generated using
152             this module.
153              
154             =over 2
155              
156             =cut
157              
158             =item Normal (Gauss) Distribution
159              
160             Normal (or Gauss) distributions are availlable through the functions
161             C or C which are equivalent.
162             The functions return the Math::Symbolic representation of a
163             gauss distribution.
164              
165             The gauss distribution has three parameters: The mean C, the
166             root mean square deviation C and the variable C.
167              
168             The functions take two optional arguments: The Math::Symbolic trees (or strings)
169             to be plugged into the formula for 1) C and 2) C.
170              
171             If any argument is undefined or omitted, the corresponding variable will
172             remain unchanged.
173              
174             The variable C always remains in the formula.
175              
176             Please refer to the literature referenced in the SEE ALSO section for
177             details.
178              
179             =cut
180              
181             {
182             my $parsed;
183             sub normal_distribution {
184 6     6 0 176 my ($mu, $sigma) = @_;
185 6 100       47 $mu = 'mu' if not defined $mu;
186 6 100       20 $sigma = 'sigma' if not defined $sigma;
187            
188             # parse arguments
189 6         23 $mu = _parse_arguments($mu , 'mu' );
190 6         14 $sigma = _parse_arguments($sigma, 'sigma');
191            
192             # Generate the template object tree
193 6 100       17 if (not defined $parsed) {
194 2         8 $parsed = parse_from_string(
195             'e^(-1*(x-mu)^2/(2*sigma^2))/(sigma*(2*pi)^0.5)'
196             );
197            
198             # Implement special numbers
199 2         124589 $parsed->implement(
200             e => Math::Symbolic::Constant->euler(),
201             pi => Math::Symbolic::Constant->pi(),
202             );
203             }
204              
205             # Always return a clone of the template object tree.
206 6         3067 my $distribution = $parsed->new();
207            
208              
209             # Implement specified variables in a separate step in case
210             # they contain e's and pi's.
211 6         2686 $distribution->implement(
212             sigma => $sigma,
213             mu => $mu,
214             );
215              
216 6         6229 return $distribution;
217             }
218             }
219              
220             *gauss_distribution = \&normal_distribution;
221              
222              
223              
224              
225              
226              
227             =item Bivariate Normal Distribution
228              
229             Bivariate normal distributions are availlable through the function
230             C.
231             The function returns the Math::Symbolic representation of a
232             bivariate normal distribution.
233              
234             The bivariate normal distribution has seven parameters:
235             The mean C of the first variable,
236             the root mean square deviation C of the first variable,
237             the mean C of the second variable,
238             the root mean square deviation C of the second variable,
239             the first variable C,
240             the second variable C,
241             and the correlation of the first and second variables, C.
242              
243             The function takes five optional arguments: The Math::Symbolic trees
244             (or strings) to be plugged into the formula for
245             1) C,
246             2) C,
247             3) C,
248             4) C, and
249             5) C.
250              
251             If any argument is undefined or omitted, the corresponding variable will
252             remain unchanged.
253              
254             The variables C and C always remain in the formula.
255              
256             Please refer to the literature referenced in the SEE ALSO section for
257             details.
258              
259             =cut
260              
261             {
262             my $parsed;
263             sub bivariate_normal_distribution {
264 3     3 0 71 my ($mu1, $sigma1, $mu2, $sigma2, $corr) = @_;
265 3 50       12 $mu1 = 'mu1' if not defined $mu1;
266 3 50       9 $sigma1 = 'sigma1' if not defined $sigma1;
267 3 50       9 $mu2 = 'mu2' if not defined $mu2;
268 3 50       14 $sigma2 = 'sigma2' if not defined $sigma2;
269 3 50       10 $corr = 'sigma12' if not defined $corr;
270            
271             # parse arguments
272 3         14 $mu1 = _parse_arguments($mu1 , 'mu1' );
273 3         10 $sigma1 = _parse_arguments($sigma1, 'sigma1' );
274 3         8 $mu2 = _parse_arguments($mu2 , 'mu2' );
275 3         12 $sigma2 = _parse_arguments($sigma2, 'sigma2' );
276 3         8 $corr = _parse_arguments($corr , 'sigma12' );
277            
278             # Generate the template object tree
279 3 100       10 if (not defined $parsed) {
280 2         12 $parsed = parse_from_string(<<'HERE');
281             e ^ (
282             (
283             2 * sigma12 * (x1-mu1) * (x2-mu2) / (sigma1*sigma2)^2
284             - ( (x1-mu1)/sigma1 )^2 - ( (x2-mu2)/sigma2 )^2
285             )
286             / (
287             2 - 2*(sigma12/sigma1/sigma2)^2
288             )
289             )
290             / (
291             2 * pi * sigma1 * sigma2
292             * ( 1 - (sigma12/sigma1/sigma2)^2 )^0.5
293             )
294             HERE
295            
296             # Implement special numbers
297 2         967858 $parsed->implement(
298             e => Math::Symbolic::Constant->euler(),
299             pi => Math::Symbolic::Constant->pi(),
300             );
301             }
302              
303             # Always return a clone of the template object tree.
304 3         6661 my $distribution = $parsed->new();
305            
306              
307             # Implement specified variables in a separate step in case
308             # they contain e's and pi's.
309 3         9788 $distribution->implement(
310             sigma1 => $sigma1,
311             mu1 => $mu1,
312             sigma2 => $sigma2,
313             mu2 => $mu2,
314             sigma12 => $corr,
315             );
316            
317 3         10119 return $distribution;
318             }
319             }
320              
321              
322              
323              
324              
325             =item Cauchy Distribution
326              
327             Cauchy distributions are availlable through the function
328             C.
329             The function returns the Math::Symbolic representation of a
330             cauchy distribution.
331              
332             The cauchy distribution has three parameters:
333             The median C,
334             the full width at half maximum C of the curve,
335             and the variable C.
336              
337             The function takes two optional arguments: The Math::Symbolic trees
338             (or strings) to be plugged into the formula for
339             1) C and
340             2) C.
341              
342             If any argument is undefined or omitted, the corresponding variable will
343             remain unchanged.
344              
345             The variable C always remains in the formula.
346              
347             Please refer to the literature referenced in the SEE ALSO section for
348             details.
349              
350             =cut
351              
352             {
353             my $parsed;
354             sub cauchy_distribution {
355 3     3 0 107 my ($median, $fwhm) = @_;
356 3 50       15 $median = 'm' if not defined $median;
357 3 50       9 $fwhm = 'lambda' if not defined $fwhm;
358            
359             # parse arguments
360 3         13 $median = _parse_arguments($median, 'm' );
361 3         7 $fwhm = _parse_arguments($fwhm, 'lambda');
362            
363             # Generate the template object tree
364 3 100       12 if (not defined $parsed) {
365 2         9 $parsed = parse_from_string(
366             'lambda/(2*pi*( (x-m)^2 + lambda^2/4 ))'
367             );
368            
369             # Implement special numbers
370 2         142174 $parsed->implement(
371             pi => Math::Symbolic::Constant->pi(),
372             );
373             }
374              
375             # Always return a clone of the template object tree.
376 3         1762 my $distribution = $parsed->new();
377            
378              
379             # Implement specified variables in a separate step in case
380             # they contain e's and pi's.
381 3         848 $distribution->implement(
382             lambda => $fwhm,
383             m => $median,
384             );
385              
386 3         2136 return $distribution;
387             }
388             }
389              
390              
391              
392             =item Boltzmann Distribution
393              
394             Boltzmann distributions are availlable through the function
395             C.
396             The function returns the Math::Symbolic representation of a
397             Boltzmann distribution.
398              
399             The Boltzmann distribution has four parameters:
400             The energy C,
401             the weighting factor C that describes the number of states at
402             energy C, the temperature C,
403             and the chemical potential C.
404              
405             The function takes fouroptional arguments: The Math::Symbolic trees
406             (or strings) to be plugged into the formula for
407             1) C,
408             2) C,
409             3) C, and
410             4) C
411              
412             If any argument is undefined or omitted, the corresponding variable will
413             remain unchanged.
414              
415             The formula used is: C.
416              
417             Please refer to the literature referenced in the SEE ALSO section for
418             details. Boltzmann's constant C is used as C<1.3807 * 10^-23 J/K>.
419              
420             =cut
421              
422             {
423             my $parsed;
424             sub boltzmann_distribution {
425 0     0 0 0 my ($E, $gs, $T, $mu) = @_;
426 0 0       0 $E = 'E' if not defined $E;
427 0 0       0 $gs = 'gs' if not defined $gs;
428 0 0       0 $T = 'T' if not defined $T;
429 0 0       0 $mu = 'mu' if not defined $mu;
430            
431             # parse arguments
432 0         0 $E = _parse_arguments($E , 'E' );
433 0         0 $gs = _parse_arguments($gs , 'gs' );
434 0         0 $T = _parse_arguments($T , 'T' );
435 0         0 $mu = _parse_arguments($mu , 'mu' );
436            
437             # Generate the template object tree
438 0 0       0 if (not defined $parsed) {
439 0         0 $parsed = parse_from_string(<<'HERE');
440             gs /
441             e ^ (
442             (E - mu) / (k_B * T)
443             )
444             HERE
445            
446             # Implement special numbers
447 0         0 $parsed->implement(
448             e => Math::Symbolic::Constant->euler(),
449             # pi => Math::Symbolic::Constant->pi(),
450             k_B => Math::Symbolic::Constant->new(1.3807e-23),
451             );
452             }
453              
454             # Always return a clone of the template object tree.
455 0         0 my $distribution = $parsed->new();
456            
457              
458             # Implement specified variables in a separate step in case
459             # they contain e's and pi's.
460 0         0 $distribution->implement(
461             E => $E,
462             gs => $gs,
463             T => $T,
464             mu => $mu,
465             );
466            
467 0         0 return $distribution;
468             }
469             }
470              
471              
472              
473             =item Fermi Distribution
474              
475             Fermi distributions are availlable through the function
476             C.
477             The function returns the Math::Symbolic representation of a
478             Fermi distribution.
479              
480             The Fermi distribution has four parameters:
481             The energy C,
482             the weighting factor C that describes the number of states at
483             energy C, the temperature C,
484             and the chemical potential C.
485              
486             The function takes fouroptional arguments: The Math::Symbolic trees
487             (or strings) to be plugged into the formula for
488             1) C,
489             2) C,
490             3) C, and
491             4) C
492              
493             If any argument is undefined or omitted, the corresponding variable will
494             remain unchanged.
495              
496             The formula used is: C.
497              
498             Please refer to the literature referenced in the SEE ALSO section for
499             details. Boltzmann's constant C is used as C<1.3807 * 10^-23 J/K>.
500              
501             =cut
502              
503             {
504             my $parsed;
505             sub fermi_distribution {
506 0     0 0 0 my ($E, $gs, $T, $mu) = @_;
507 0 0       0 $E = 'E' if not defined $E;
508 0 0       0 $gs = 'gs' if not defined $gs;
509 0 0       0 $T = 'T' if not defined $T;
510 0 0       0 $mu = 'mu' if not defined $mu;
511            
512             # parse arguments
513 0         0 $E = _parse_arguments($E , 'E' );
514 0         0 $gs = _parse_arguments($gs , 'gs' );
515 0         0 $T = _parse_arguments($T , 'T' );
516 0         0 $mu = _parse_arguments($mu , 'mu' );
517            
518             # Generate the template object tree
519 0 0       0 if (not defined $parsed) {
520 0         0 $parsed = parse_from_string(<<'HERE');
521             gs /
522             (
523             e ^ (
524             (E - mu) / (k_B * T)
525             )
526             + 1
527             )
528             HERE
529            
530             # Implement special numbers
531 0         0 $parsed->implement(
532             e => Math::Symbolic::Constant->euler(),
533             # pi => Math::Symbolic::Constant->pi(),
534             k_B => Math::Symbolic::Constant->new(1.3807e-23),
535             );
536             }
537              
538             # Always return a clone of the template object tree.
539 0         0 my $distribution = $parsed->new();
540            
541              
542             # Implement specified variables in a separate step in case
543             # they contain e's and pi's.
544 0         0 $distribution->implement(
545             E => $E,
546             gs => $gs,
547             T => $T,
548             mu => $mu,
549             );
550            
551 0         0 return $distribution;
552             }
553             }
554              
555              
556              
557              
558              
559              
560              
561              
562             sub _parse_arguments {
563 33     33   52 my $argument = shift;
564 33         52 my $name = shift;
565              
566 33 100       93 unless (ref($argument) =~ /^Math::Symbolic/) {
567 23         27 my $tmp;
568 23         29 eval {
569 23         82 $tmp = parse_from_string($argument)
570             };
571 23 50       81579 confess "Could not parse arguments: '$name' was\n"
572             ."'$argument'. Error was '$@'" if $@;
573 23         74 $argument = $tmp;
574             }
575              
576 33         67 return $argument;
577             }
578              
579              
580              
581              
582              
583             # Now follows all the exporter stuff!
584              
585             our @ISA = qw(Exporter);
586              
587             # this works as usual, but more follows
588             our %EXPORT_TAGS = (
589             'all' => [ qw(
590             gauss_distribution
591             normal_distribution
592             bivariate_normal_distribution
593             cauchy_distribution
594             boltzmann_distribution
595             bose_distribution
596             fermi_distribution
597             ) ],
598             'functions' => [ qw(
599             gauss_distribution
600             normal_distribution
601             bivariate_normal_distribution
602             cauchy_distribution
603             boltzmann_distribution
604             bose_distribution
605             fermi_distribution
606             ) ],
607             );
608              
609             # associate export names with function names and numbers of arguments.
610             my %GRAMMAR_EXTENSIONS = (
611             gauss_grammar =>
612             {name => 'gauss', args => 2, function => \&gauss_distribution},
613             normal_grammar => {name => 'normal', args => 2, function => \&gauss_distribution},
614             bivariate_normal_grammar => {name => 'bivariate_normal', args => 5, function => \&bivariate_normal_distribution},
615             cauchy_grammar => {name => 'cauchy', args => 2, function => \&cauchy_distribution},
616             boltzmann_grammar => {name => 'boltzmann', args => 4, function => \&boltzmann_distribution},
617             bose_grammar => {name => 'bose', args => 4, function => \&bose_distribution},
618             fermi_grammar => {name => 'fermi', args => 4, function => \&fermi_distribution},
619             );
620              
621             our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
622             our @EXPORT = qw();
623              
624             # We do some fancy stuff in import:
625             # If the grammar bits are wanted (either via :all, :grammar or the individual
626             # bits), we amend the parser and then hand control to the default import()
627             # from Exporter.
628              
629             sub import {
630 5     5   1834 my @args = @_;
631 5         8 my $class = shift;
632            
633             # cache bits that are to be imported.
634 5         9 my %import;
635              
636             # find all the grammar related stuff and leave the ordinary
637             # exporter function related stuff.
638 5         19 for (my $i = 0; $i <= $#args; $i++) {
639            
640             # grammar tag
641 10 100       51 if ($args[$i] eq ':grammar') {
    100          
    100          
642 1         8 %import = %GRAMMAR_EXTENSIONS;
643            
644             # remove from args so exporter doesn't hiccup.
645 1         2 splice(@args, $i, 1);
646            
647 1 50       4 last if $i == @args;
648 0         0 redo;
649             }
650             # all tag
651             elsif ($args[$i] eq ':all') {
652 1         5 %import = %GRAMMAR_EXTENSIONS;
653            
654 1 50       5 last if $i == @args;
655 1         3 next;
656             }
657             # individual tags
658             elsif (exists $GRAMMAR_EXTENSIONS{$args[$i]}) {
659 3         6 $import{$args[$i]} = undef;
660            
661             # remove from args so exporter doesn't hiccup.
662 3         8 splice(@args, $i, 1);
663              
664 3 50       11 last if $i == @args;
665 0         0 redo;
666             }
667             }
668              
669             # Now handle all the grammar related stuff
670 5         14 foreach my $import (keys %import) {
671 17         27390 require Math::SymbolicX::ParserExtensionFactory;
672              
673             # create new M::S function
674             Math::SymbolicX::ParserExtensionFactory->import(
675             $GRAMMAR_EXTENSIONS{$import}{name} => sub {
676             # argument checking
677 8     8   20357 my $args = shift;
678              
679 8         32 my $name = $GRAMMAR_EXTENSIONS{$import}{name};
680 8         20 my $noargs = $GRAMMAR_EXTENSIONS{$import}{args};
681 8         18 my $func=$GRAMMAR_EXTENSIONS{$import}{function};
682              
683 8         50 my @args = split /\s*,\s*/, $args;
684 8         16 my $no_args = @args;
685 8 50       27 confess(<<"HERE")
686             Too many arguments ($no_args > $noargs) to '$name()' while
687             parsing Math::Symbolic tree from string.
688             HERE
689             if $no_args > $GRAMMAR_EXTENSIONS{$import}{args};
690              
691             # individual argument checking
692 8         14 foreach (@args) {
693             # map "undef" to undef
694 18 100       57 if (/\s*undef\s*/io) {
695 8         37 $_ = undef;
696 8         12 next;
697             }
698              
699             # make sure the argument parses as M::S
700 10         14 my $tmp;
701 10         13 eval { $tmp = parse_from_string($_) };
  10         31  
702 10 50 33     38179 confess(<<"HERE")
703             Invalid argument ('$_') to '$name()' while
704             parsing Math::Symbolic tree from string. Error message (if any):
705             $@
706             HERE
707             if $@ or not defined $tmp;
708 10         26 $_ = $tmp;
709             }
710            
711             # function application
712 8         36 my $res;
713 8         12 eval { $res = $func->(@args); };
  8         23  
714 8 50 33     61 confess(<<"HERE") if $@ or not defined $res;
715             Unknown error applying '$name()' while
716             parsing Math::Symbolic tree from string. Error message (if any):
717             $@
718             HERE
719 8         281 return $res;
720             }
721 17         1954 );
722             }
723              
724             # I wonder whether this class is inheritable at all, but well, here
725             # goes...
726 5         31406 $class->export_to_level(1, @args);
727             }
728              
729              
730              
731              
732             1;
733             __END__