File Coverage

blib/lib/Text/NSP/Measures/4D.pm
Criterion Covered Total %
statement 201 684 29.3
branch 110 194 56.7
condition 6 9 66.6
subroutine 7 7 100.0
pod 3 3 100.0
total 327 897 36.4


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             Text::NSP::Measures::4D - Perl module that provides basic framework for
4             building measure of association for 4-grams.
5              
6             =head1 SYNOPSIS
7              
8             This module can be used as a foundation for building 4-dimensional
9             measures of association that can then be used by statistic.pl. In
10             particular this module provides methods that give convenient access to
11             4-d (i.e., 4-gram) frequency counts as created by count.pl, as well as
12             some degree of error handling that verifies the data.
13              
14             =head3 Basic Usage
15              
16             use Text::NSP::Measures::4D::MI::ll;
17              
18             $ll_value = calculateStatistic(
19             n1111=>8,
20             n1ppp=>306,
21             np1pp=>83,
22             npp1p=>83,
23             nppp1=>57,
24             n11pp=>8,
25             n1p1p=>8,
26             n1pp1=>8,
27             np11p=>83,
28             np1p1=>56,
29             npp11=>56,
30             n111p=>8,
31             n11p1=>8,
32             n1p11=>8,
33             np111=>56,
34             npppp=>15180);
35              
36             if( ($errorCode = getErrorCode()))
37             {
38             print STDERR $erroCode." - ".getErrorMessage()."\n";
39             }
40             else
41             {
42             print getStatisticName."value for 4-gram is ".$ll_value."\n";
43             }
44              
45             =head1 DESCRIPTION
46              
47             The methods in this module retrieve observed 4-gram frequency counts and
48             marginal totals, and also compute expected values. They also provide
49             support for error checking of the output produced by count.pl. These
50             methods are used in all the 4-gram (4d) measure modules provided in NSP.
51             If you are writing your own 4d measure, you can use these methods as well.
52              
53             With 4-gram or 4d measures we use a 4x4 contingency table to store the
54             frequency counts associated with each word in the trigram, as well as the
55             number of times the trigram occurs. The notation we employ is as follows:
56              
57             Marginal Frequencies:
58            
59             n1ppp = the number of ngrams where the first word is word1.
60             np1pp = the number of ngrams where the second word is word2.
61             npp1p = the number of ngrams where the third word is word3
62             nppp1 = the number of ngrams where the fourth word is word4
63             n2ppp = the number of ngrams where the first word is not word1.
64             np2pp = the number of ngrams where the second word is not word2.
65             npp2p = the number of ngrams where the third word is not word3.
66             nppp2 = the number of ngrams where the fourth words is not word4
67              
68             Observed Frequencies:
69              
70             n1111 = number of times word1, word2 and word3 occur together in
71             their respective positions, joint frequency.
72             n1112 = number of times word1, word 2 and word3 occur in their respective
73             positions but word4 does not.
74             n1121 = number of times word1, word2 and word4 occur in their respective
75             positions but word3 does not.
76             n1122 = number of times word1 and word2 occur in their repsective positions
77             but word3 and word4 do not.
78             n1211 = number of times word1, word3 and word4 occur in their respective
79             positions but word2 does not.
80             n1212 = number of times word1 and word3 occur in their respective positions
81             but word2 and word4 do not.
82             n1221 = number of times word1 and word4 occur in their respective positions
83             but word2 and word3 do not
84             n1222 = number of times word1 occurs in its respective position but word2,
85             word3 and word4 do not.
86             n2111 = number of times word2, word3 and word4 occur in their respective
87             positions but word1 does not.
88             n2112 = number of times word2 and word3 occur in their respective positions
89             but word1 and word4 do not.
90             n2121 = number of times word2 and word4 occur in their respective positions
91             but word1 and word3 do not.
92             n2122 = number of times word2 occurs in its respective position but word1,
93             word3 and word4 do not.
94             n2211 = number of times word3 and word4 occur in their respective positions
95             but word1 and word2 do not.
96             n2212 = number of times word3 occurs in its respective position but word1,
97             word2 and word4 do not.
98             n2221 = number of times word4 occurs in its respective position but word1,
99             word2, and word3 do not.
100             n2222 = number of times neither word1, word2, word3 or word4 occur in their
101             respective positions.
102              
103             Expected Frequencies:
104              
105             m1111 = expected number of times word1, word2 and word3 occur together in
106             their respective positions, joint frequency.
107             m1112 = expected number of times word1, word 2 and word3 occur in their respective
108             positions but word4 does not.
109             m1121 = expected number of times word1, word2 and word4 occur in their respective
110             positions but word3 does not.
111             m1122 = expected number of times word1 and word2 occur in their repsective positions
112             but word3 and word4 do not.
113             m1211 = expected number of times word1, word3 and word4 occur in their respective
114             positions but word2 does not.
115             m1212 = expected number of times word1 and word3 occur in their respective positions
116             but word2 and word4 do not.
117             m1221 = expected number of times word1 and word4 occur in their respective positions
118             but word2 and word3 do not
119             m1222 = expected number of times word1 occurs in its respective position but word2,
120             word3 and word4 do not.
121             m2111 = expected number of times word2, word3 and word4 occur in their respective
122             positions but word1 does not.
123             m2112 = expected number of times word2 and word3 occur in their respective positions
124             but word1 and word4 do not.
125             m2121 = expected number of times word2 and word4 occur in their respective positions
126             but word1 and word3 do not.
127             m2122 = expected number of times word2 occurs in its respective position but word1,
128             word3 and word4 do not.
129             m2211 = expected number of times word3 and word4 occur in their respective positions
130             but word1 and word2 do not.
131             m2212 = expected number of times word3 occurs in its respective position but word1,
132             word2 and word4 do not.
133             m2221 = expected number of times word4 occurs in its respective position but word1,
134             word2, and word3 do not.
135             m2222 = expected number of times neither word1, word2, word3 or word4 occur in their
136             respective positions.
137             =head2 Methods
138              
139             =over
140              
141             =cut
142              
143              
144             package Text::NSP::Measures::4D;
145              
146              
147 2     2   1173 use Text::NSP::Measures;
  2         5  
  2         310  
148 2     2   10 use strict;
  2         4  
  2         41  
149 2     2   10 use Carp;
  2         4  
  2         100  
150 2     2   10 use warnings;
  2         4  
  2         27596  
151             require Exporter;
152              
153              
154             our ($VERSION, @ISA, $marginals, @EXPORT);
155             our ($n1111, $n1112, $n1121, $n1122, $n1211, $n1212, $n1221, $n1222);
156             our ($n2111, $n2112, $n2121, $n2122, $n2211, $n2212, $n2221, $n2222);
157             our ($m1111, $m1112, $m1121, $m1122, $m1211, $m1212, $m1221, $m1222);
158             our ($m2111, $m2112, $m2121, $m2122, $m2211, $m2212, $m2221, $m2222);
159             our ($nppp1, $npp1p, $npp11, $np1pp, $np1p1, $np11p, $np111, $n1ppp);
160             our ($n1pp1, $n1p1p, $n1p11, $n11pp, $n11p1, $n111p, $npppp);
161             our ($nppp2, $npp2p, $npp22, $np2pp, $np2p2, $np22p, $np222, $n2ppp);
162             our ($n2pp2, $n2p2p, $n2p22, $n22pp, $n22p2, $n222p);
163             our ($np112, $np121, $np122, $np211, $np212, $np221);
164             our ($expected_values);
165             our ($n2p1p, $n1p2p, $n2pp1, $n1pp2, $npp21, $npp12, $n21pp, $n12pp);
166             our ($n22p1, $n21p2, $n21p1, $n12p2, $n12p1, $n11p2, $n2p21, $n2p11);
167             our ($n2p12, $n1p22, $n1p21, $n1p12, $np21p, $np12p, $np2p1, $np1p2);
168             our ($n221p, $n212p, $n211p, $n122p, $n121p, $n112p);
169             @ISA = qw(Exporter);
170              
171             @EXPORT = qw(initializeStatistic calculateStatistic
172             getErrorCode getErrorMessage getStatisticName
173             $n1111 $n1112 $n1121 $n1122 $n1211 $n1212 $n1221 $n1222
174             $n2111 $n2112 $n2121 $n2122 $n2211 $n2212 $n2221 $n2222
175             $m1111 $m1112 $m1121 $m1122 $m1211 $m1212 $m1221 $m1222
176             $m2111 $m2112 $m2121 $m2122 $m2211 $m2212 $m2221 $m2222
177             $nppp1 $npp1p $npp11 $np1pp $np1p1 $np11p $np111 $n1ppp
178             $n1pp1 $n1p1p $n1p11 $n11pp $n11p1 $n111p $npppp
179             $nppp2 $npp2p $npp22 $np2pp $np2p2 $np22p $np222 $n2ppp
180             $n2pp2 $n2p2p $n2p22 $n22pp $n22p2 $n222p
181             $np112 $np121 $np122 $np211 $np212 $np221 $expected_values
182             $errorCodeNumber $errorMessage);
183              
184             $VERSION = '0.97';
185              
186              
187             =item computeObservedValues($count_values) - A method to
188             compute observed values, and also to verify that the
189             computed Observed values are correct, That is they are
190             positive, less than the marginal totals and the total
191             bigram count.
192              
193             INPUT PARAMS : $count_values .. Reference to an hash consisting
194             of the count values passed to
195             the calculateStatistic() method.
196              
197             RETURN VALUES : 1/undef ..returns '1' to indicate success
198             and an undefined(NULL) value to indicate
199             failure.
200              
201             =cut
202              
203             sub computeObservedValues
204             {
205 16     16 1 20 my ($values) = @_;
206              
207 16         21 $n1111=$values->{n1111};
208 16         21 $n1ppp=$values->{n1ppp};
209 16         20 $np1pp=$values->{np1pp};
210 16         18 $npp1p=$values->{npp1p};
211 16         19 $nppp1=$values->{nppp1};
212 16         20 $n11pp=$values->{n11pp};
213 16         21 $n1p1p=$values->{n1p1p};
214 16         21 $n1pp1=$values->{n1pp1};
215 16         18 $np11p=$values->{np11p};
216 16         17 $np1p1=$values->{np1p1};
217 16         18 $npp11=$values->{npp11};
218 16         17 $n111p=$values->{n111p};
219 16         17 $n11p1=$values->{n11p1};
220 16         19 $n1p11=$values->{n1p11};
221 16         17 $np111=$values->{np111};
222 16         21 $npppp=$values->{npppp};
223              
224             # we do not have the model fully implemented yet
225             #$expected_values=$values->{expected_values};
226              
227             # Check that all the values are defined
228 16 100       34 if(!defined $values->{n1111})
229             {
230 1         2 $errorMessage = "Required 4-gram (1,1,1,1) not passed";
231 1         2 $errorCodeNumber = 200;
232 1         3 return;
233             }
234 15 100       28 if(!defined $values->{n1ppp})
235             {
236 1         2 $errorMessage = "Required 4-gram (1,p,p,p) not passed";
237 1         2 $errorCodeNumber = 200;
238 1         3 return;
239             }
240 14 100       28 if(!defined $values->{np1pp})
241             {
242 1         2 $errorMessage = "Required 4-gram (p,1,p,p) not passed";
243 1         2 $errorCodeNumber = 200;
244 1         3 return;
245             }
246 13 50       22 if(!defined $values->{npp1p})
247             {
248 0         0 $errorMessage = "Required 4-gram (p,p,1,p) not passed";
249 0         0 $errorCodeNumber = 200;
250 0         0 return;
251             }
252 13 100       26 if(!defined $values->{nppp1})
253             {
254 1         2 $errorMessage = "Required 4-gram (p,p,p,1) not passed";
255 1         2 $errorCodeNumber = 200;
256 1         3 return;
257             }
258 12 50       21 if(!defined $values->{n11pp})
259             {
260 0         0 $errorMessage = "Required 4-gram (1,1,p,p) not passed";
261 0         0 $errorCodeNumber = 200;
262 0         0 return;
263             }
264 12 50       20 if(!defined $values->{n1p1p})
265             {
266 0         0 $errorMessage = "Required 4-gram (1,p,1,p) not passed";
267 0         0 $errorCodeNumber = 200;
268 0         0 return;
269             }
270 12 50       21 if(!defined $values->{n1pp1})
271             {
272 0         0 $errorMessage = "Required 4-gram (1,p,p,1) not passed";
273 0         0 $errorCodeNumber = 200;
274 0         0 return;
275             }
276 12 50       20 if(!defined $values->{np11p})
277             {
278 0         0 $errorMessage = "Required 4-gram (p,1,1,p) not passed";
279 0         0 $errorCodeNumber = 200;
280 0         0 return;
281             }
282 12 50       28 if(!defined $values->{np1p1})
283             {
284 0         0 $errorMessage = "Required 4-gram (p,1,p,1) not passed";
285 0         0 $errorCodeNumber = 200;
286 0         0 return;
287             }
288 12 100       22 if(!defined $values->{npp11})
289             {
290 1         2 $errorMessage = "Required 4-gram (p,p,1,1) not passed";
291 1         1 $errorCodeNumber = 200;
292 1         4 return;
293             }
294 11 50       18 if(!defined $values->{n111p})
295             {
296 0         0 $errorMessage = "Required 4-gram (1,1,1,p) not passed";
297 0         0 $errorCodeNumber = 200;
298 0         0 return;
299             }
300 11 100       22 if(!defined $values->{n11p1})
301             {
302 1         9 $errorMessage = "Required 4-gram (1,1,p,1) not passed";
303 1         2 $errorCodeNumber = 200;
304 1         3 return;
305             }
306 10 50       18 if(!defined $values->{n1p11})
307             {
308 0         0 $errorMessage = "Required 4-gram (1,p,1,1) not passed";
309 0         0 $errorCodeNumber = 200;
310 0         0 return;
311             }
312 10 100       19 if(!defined $values->{np111})
313             {
314 1         2 $errorMessage = "Required 4-gram (p,1,1,1) not passed";
315 1         2 $errorCodeNumber = 200;
316 1         3 return;
317             }
318 9 100       17 if(!defined $values->{npppp})
319             {
320 1         3 $errorMessage = "Required 4-gram (p,p,p,p) not passed";
321 1         2 $errorCodeNumber = 200;
322 1         3 return;
323             }
324              
325             # n1111 should be greater than equal to zero
326 8 100       17 if ($n1111 <= 0) {
327 1         5 $errorMessage = "Frequency value ($n1111) must not be negative.";
328 1         1 $errorCodeNumber = 201; return;
  1         3  
329             }
330            
331             # n1111 frequency should be less than or equal to total 4grams
332 7 100       22 if ($n1111 > $npppp) {
333 1         3 $errorMessage = "Frequency value ($n1111) must not exceed total number of 4grams.";
334 1         1 $errorCodeNumber = 201; return;
  1         4  
335             }
336            
337             # joint frequency n1111 should be less than or equal to the marginal totals
338 6 100 66     48 if ($n1111 > $n1ppp || $n1111 > $np1pp || $n1111 > $npp1p || $n1111 > $nppp1) {
      66        
      66        
339 2         6 $errorMessage = "Frequency value of ngram ($n1111) must not exceed the marginal totals.";
340 2         3 $errorCodeNumber = 202; return;
  2         6  
341             }
342              
343             # n1ppp should be greater than equal to zero
344 4 50       7 if ($n1ppp <= 0) {
345 0         0 $errorMessage = "Frequency value ($n1ppp) must not be negative.";
346 0         0 $errorCodeNumber = 201; return;
  0         0  
347             }
348            
349             # n1ppp frequency should be less than or equal to total 4grams
350 4 50       9 if ($n1ppp > $npppp) {
351 0         0 $errorMessage = "Frequency value ($n1ppp) must not exceed total number of 4grams.";
352 0         0 $errorCodeNumber = 201; return;
  0         0  
353             }
354              
355             # np1pp should be greater than equal to zero
356 4 50       8 if ($np1pp <= 0) {
357 0         0 $errorMessage = "Frequency value ($np1pp) must not be negative.";
358 0         0 $errorCodeNumber = 201; return;
  0         0  
359             }
360            
361             # np1pp frequency should be less than or equal to total 4grams
362 4 50       9 if ($np1pp > $npppp) {
363 0         0 $errorMessage = "Frequency value ($np1pp) must not exceed total number of 4grams.";
364 0         0 $errorCodeNumber = 201; return;
  0         0  
365             }
366              
367             # npp1p should be greater than equal to zero
368 4 50       7 if ($npp1p <= 0) {
369 0         0 $errorMessage = "Frequency value ($npp1p) must not be negative.";
370 0         0 $errorCodeNumber = 201; return;
  0         0  
371             }
372            
373             # npp1p frequency should be less than or equal to total 4grams
374 4 50       8 if ($npp1p > $npppp) {
375 0         0 $errorMessage = "Frequency value ($npp1p) must not exceed total number of 4grams.";
376 0         0 $errorCodeNumber = 201; return;
  0         0  
377             }
378              
379             # nppp1 should be greater than equal to zero
380 4 50       7 if ($nppp1 <= 0) {
381 0         0 $errorMessage = "Frequency value ($nppp1) must not be negative.";
382 0         0 $errorCodeNumber = 201; return;
  0         0  
383             }
384            
385             # nppp1 frequency should be less than or equal to total 4grams
386 4 100       9 if ($nppp1 > $npppp) {
387 1         4 $errorMessage = "Frequency value ($nppp1) must not exceed total number of 4grams.";
388 1         2 $errorCodeNumber = 201; return;
  1         4  
389             }
390              
391             # n11pp should be greater than equal to zero
392 3 50       6 if ($n11pp <= 0) {
393 0         0 $errorMessage = "Frequency value ($n11pp) must not be negative.";
394 0         0 $errorCodeNumber = 201; return;
  0         0  
395             }
396            
397             # n11pp frequency should be less than or equal to total 4grams
398 3 50       11 if ($n11pp > $npppp) {
399 0         0 $errorMessage = "Frequency value ($n11pp) must not exceed total number of 4grams.";
400 0         0 $errorCodeNumber = 201; return;
  0         0  
401             }
402              
403             # n1p1p should be greater than equal to zero
404 3 50       6 if ($n1p1p <= 0) {
405 0         0 $errorMessage = "Frequency value ($n1p1p) must not be negative.";
406 0         0 $errorCodeNumber = 201; return;
  0         0  
407             }
408            
409             # n1p1p frequency should be less than or equal to total 4grams
410 3 50       8 if ($n1p1p > $npppp) {
411 0         0 $errorMessage = "Frequency value ($n1p1p) must not exceed total number of 4grams.";
412 0         0 $errorCodeNumber = 201; return;
  0         0  
413             }
414              
415             # n1pp1 should be greater than equal to zero
416 3 50       9 if ($n1pp1 <= 0) {
417 0         0 $errorMessage = "Frequency value ($n1pp1) must not be negative.";
418 0         0 $errorCodeNumber = 201; return;
  0         0  
419             }
420            
421             # n1pp1 frequency should be less than or equal to total 4grams
422 3 50       7 if ($n1pp1 > $npppp) {
423 0         0 $errorMessage = "Frequency value ($n1pp1) must not exceed total number of 4grams.";
424 0         0 $errorCodeNumber = 201; return;
  0         0  
425             }
426              
427             # np11p should be greater than equal to zero
428 3 50       5 if ($np11p <= 0) {
429 0         0 $errorMessage = "Frequency value ($np11p) must not be negative.";
430 0         0 $errorCodeNumber = 201; return;
  0         0  
431             }
432            
433             # np11p frequency should be less than or equal to total 4grams
434 3 50       7 if ($np11p > $npppp) {
435 0         0 $errorMessage = "Frequency value ($np11p) must not exceed total number of 4grams.";
436 0         0 $errorCodeNumber = 201; return;
  0         0  
437             }
438              
439             # np1p1 should be greater than equal to zero
440 3 50       5 if ($np1p1 <= 0) {
441 0         0 $errorMessage = "Frequency value ($np1p1) must not be negative.";
442 0         0 $errorCodeNumber = 201; return;
  0         0  
443             }
444            
445             # np1p1 frequency should be less than or equal to total 4grams
446 3 50       7 if ($np1p1 > $npppp) {
447 0         0 $errorMessage = "Frequency value ($np1p1) must not exceed total number of 4grams.";
448 0         0 $errorCodeNumber = 201; return;
  0         0  
449             }
450              
451             # npp11 should be greater than equal to zero
452 3 50       5 if ($npp11 <= 0) {
453 0         0 $errorMessage = "Frequency value ($npp11) must not be negative.";
454 0         0 $errorCodeNumber = 201; return;
  0         0  
455             }
456            
457             # npp11 frequency should be less than or equal to total 4grams
458 3 50       8 if ($npp11 > $npppp) {
459 0         0 $errorMessage = "Frequency value ($npp11) must not exceed total number of 4grams.";
460 0         0 $errorCodeNumber = 201; return;
  0         0  
461             }
462              
463             # n111p should be greater than equal to zero
464 3 50       5 if ($n111p <= 0) {
465 0         0 $errorMessage = "Frequency value ($n111p) must not be negative.";
466 0         0 $errorCodeNumber = 201; return;
  0         0  
467             }
468            
469             # n111p frequency should be less than or equal to total 4grams
470 3 50       7 if ($n111p > $npppp) {
471 0         0 $errorMessage = "Frequency value ($n111p) must not exceed total number of 4grams.";
472 0         0 $errorCodeNumber = 201; return;
  0         0  
473             }
474              
475             # n11p1 should be greater than equal to zero
476 3 50       6 if ($n11p1 <= 0) {
477 0         0 $errorMessage = "Frequency value ($n11p1) must not be negative.";
478 0         0 $errorCodeNumber = 201; return;
  0         0  
479             }
480            
481             # n11p1 frequency should be less than or equal to total 4grams
482 3 50       7 if ($n11p1 > $npppp) {
483 0         0 $errorMessage = "Frequency value ($n11p1) must not exceed total number of 4grams.";
484 0         0 $errorCodeNumber = 201; return;
  0         0  
485             }
486              
487             # n1p11 should be greater than equal to zero
488 3 50       6 if ($n1p11 <= 0) {
489 0         0 $errorMessage = "Frequency value ($n1p11) must not be negative.";
490 0         0 $errorCodeNumber = 201; return;
  0         0  
491             }
492            
493             # n1p11 frequency should be less than or equal to total 4grams
494 3 50       7 if ($n1p11 > $npppp) {
495 0         0 $errorMessage = "Frequency value ($n1p11) must not exceed total number of 4grams.";
496 0         0 $errorCodeNumber = 201; return;
  0         0  
497             }
498              
499             # np111 should be greater than equal to zero
500 3 50       5 if ($np111 <= 0) {
501 0         0 $errorMessage = "Frequency value ($np111) must not be negative.";
502 0         0 $errorCodeNumber = 201; return;
  0         0  
503             }
504            
505             # np111 frequency should be less than or equal to total 4grams
506 3 50       7 if ($np111 > $npppp) {
507 0         0 $errorMessage = "Frequency value ($np111) must not exceed total number of 4grams.";
508 0         0 $errorCodeNumber = 201; return;
  0         0  
509             }
510              
511              
512             # observed
513 3         8 $n1112=$n111p-$n1111;
514 3         9 $n1121=$n11p1-$n1111;
515              
516            
517 3         4 $n1122=$n11pp-$n1111-$n1121-$n1112;
518 3         4 $n2111=$np111-$n1111;
519              
520 3         4 $n1211=$n1p11-$n1111;
521 3         5 $n1212=$n1p1p-$n1111-$n1112-$n1211;
522 3         4 $n1221=$n1pp1-$n1111-$n1211-$n1121;
523 3         5 $n1222=$n1ppp-$n1111-$n1211-$n1121-$n1112;
524            
525 3         5 $n2112=$np11p-$n1111-$n2111-$n1112;
526 3         5 $n2121=$np1p1-$n1111-$n2111-$n1121;
527 3         55 $n2122=$np1pp-$n1111-$n2111-$n1121-$n1112;
528 3         5 $n2211=$npp11-$n1111-$n2111-$n1211;
529              
530 3         7 $n2212=$npp1p-$n1111-$n2111-$n1211-$n1112;
531              
532 3         4 $n2221=$nppp1-$n1111-$n2111-$n1211-$n1121;
533 3         4 $n2222=$npppp-$n1111-$n2111-$n1211-$n1121-$n1112;
534            
535            
536             # n1112 should be greater than equal to zero
537 3 50       10 if ($n1112 < 0) {
538 0         0 $errorMessage = "Frequency value n1112 ($n1112) must not be negative.";
539 0         0 $errorCodeNumber = 202; return;
  0         0  
540             }
541            
542             # n1112 frequency should be less than or equal to total4grams
543 3 50       43 if ($n1112 > $npppp) {
544 0         0 $errorMessage = "Frequency value n1112 ($n1112) must not exceed total number of 4grams.";
545 0         0 $errorCodeNumber = 202; return;
  0         0  
546             }
547            
548             # n1121 should be greater than equal to zero
549 3 50       7 if ($n1121 < 0) {
550 0         0 $errorMessage = "Frequency value n1121 ($n1121) must not be negative.";
551 0         0 $errorCodeNumber = 202; return;
  0         0  
552             }
553            
554             # n1121 frequency should be less than or equal to total 4grams
555 3 50       7 if ($n1121 > $npppp) {
556 0         0 $errorMessage = "Frequency value n1121 ($n1121) must not exceed total number of 4grams.";
557 0         0 $errorCodeNumber = 202; return;
  0         0  
558             }
559              
560             # n1122 should be greater than equal to zero
561 3 50       6 if ($n1122 < 0) {
562 0         0 $errorMessage = "Frequency value n1122 ($n1122) must not be negative.";
563 0         0 $errorCodeNumber = 202; return;
  0         0  
564             }
565            
566             # n1122 frequency should be less than or equal to total 4grams
567 3 50       6 if ($n1122 > $npppp) {
568 0         0 $errorMessage = "Frequency value n1122 ($n1122) must not exceed total number of 4grams.";
569 0         0 $errorCodeNumber = 202; return;
  0         0  
570             }
571              
572             # n1211 should be greater than equal to zero
573 3 50       6 if ($n1211 < 0) {
574 0         0 $errorMessage = "Frequency value n1211 ($n1211) must not be negative.";
575 0         0 $errorCodeNumber = 202; return;
  0         0  
576             }
577            
578             # n1211 frequency should be less than or equal to total 4grams
579 3 50       7 if ($n1211 > $npppp) {
580 0         0 $errorMessage = "Frequency value n1211 ($n1211) must not exceed total number of 4grams.";
581 0         0 $errorCodeNumber = 202; return;
  0         0  
582             }
583              
584             # n1221 should be greater than equal to zero
585 3 50       5 if ($n1221 < 0) {
586 0         0 $errorMessage = "Frequency value n1221 ($n1221) must not be negative.";
587 0         0 $errorCodeNumber = 202; return;
  0         0  
588             }
589            
590             # n1221 frequency should be less than or equal to total 4grams
591 3 50       6 if ($n1221 > $npppp) {
592 0         0 $errorMessage = "Frequency value n1221 ($n1221) must not exceed total number of 4grams.";
593 0         0 $errorCodeNumber = 202; return;
  0         0  
594             }
595              
596             # n1222 should be greater than equal to zero
597 3 50       6 if ($n1222 < 0) {
598 0         0 $errorMessage = "Frequency value n1222 ($n1222) must not be negative.";
599 0         0 $errorCodeNumber = 202; return;
  0         0  
600             }
601            
602             # n1222 frequency should be less than or equal to total 4grams
603 3 50       7 if ($n1222 > $npppp) {
604 0         0 $errorMessage = "Frequency value n1222 ($n1222) must not exceed total number of 4grams.";
605 0         0 $errorCodeNumber = 202; return;
  0         0  
606             }
607              
608             # n2111 should be greater than equal to zero
609 3 50       6 if ($n2111 < 0) {
610 0         0 $errorMessage = "Frequency value n2111 ($n2111) must not be negative.";
611 0         0 $errorCodeNumber = 202; return;
  0         0  
612             }
613            
614             # n2111 frequency should be less than or equal to total 4grams
615 3 50       6 if ($n2111 > $npppp) {
616 0         0 $errorMessage = "Frequency value n2111 ($n2111) must not exceed total number of 4grams.";
617 0         0 $errorCodeNumber = 202; return;
  0         0  
618             }
619              
620             # n2112 should be greater than equal to zero
621 3 100       8 if ($n2112 < 0) {
622 2         6 $errorMessage = "Frequency value n2112 ($n2112) must not be negative.";
623 2         2 $errorCodeNumber = 202; return;
  2         6  
624             }
625            
626             # n2112 frequency should be less than or equal to total 4grams
627 1 50       3 if ($n2112 > $npppp) {
628 0         0 $errorMessage = "Frequency value n2112 ($n2112) must not exceed total number of 4grams.";
629 0         0 $errorCodeNumber = 202; return;
  0         0  
630             }
631              
632             # n2121 should be greater than equal to zero
633 1 50       3 if ($n2121 < 0) {
634 0         0 $errorMessage = "Frequency value n2121 ($n2121) must not be negative.";
635 0         0 $errorCodeNumber = 202; return;
  0         0  
636             }
637            
638             # n2121 frequency should be less than or equal to total 4grams
639 1 50       3 if ($n2121 > $npppp) {
640 0         0 $errorMessage = "Frequency value n2121 ($n2121) must not exceed total number of 4grams.";
641 0         0 $errorCodeNumber = 202; return;
  0         0  
642             }
643              
644             # n2122 should be greater than equal to zero
645 1 50       4 if ($n2122 < 0) {
646 0         0 $errorMessage = "Frequency value n2122 ($n2122) must not be negative.";
647 0         0 $errorCodeNumber = 202; return;
  0         0  
648             }
649            
650             # n2122 frequency should be less than or equal to total 4grams
651 1 50       3 if ($n2122 > $npppp) {
652 0         0 $errorMessage = "Frequency value n2122 ($n2122) must not exceed total number of 4grams.";
653 0         0 $errorCodeNumber = 202; return;
  0         0  
654             }
655              
656              
657             # n2211 should be greater than equal to zero
658 1 50       3 if ($n2211 < 0) {
659 0         0 $errorMessage = "Frequency value n2211 ($n2211) must not be negative.";
660 0         0 $errorCodeNumber = 202; return;
  0         0  
661             }
662            
663             # n2211 frequency should be less than or equal to total 4grams
664 1 50       3 if ($n2211 > $npppp) {
665 0         0 $errorMessage = "Frequency value n2211 ($n2211) must not exceed total number of 4grams.";
666 0         0 $errorCodeNumber = 202; return;
  0         0  
667             }
668              
669              
670             # n2212 should be greater than equal to zero
671 1 50       3 if ($n2212 < 0) {
672 0         0 $errorMessage = "Frequency value n2212 ($n2212) must not be negative.";
673 0         0 $errorCodeNumber = 202; return;
  0         0  
674             }
675            
676             # n2212 frequency should be less than or equal to total 4grams
677 1 50       3 if ($n2212 > $npppp) {
678 0         0 $errorMessage = "Frequency value n2212 ($n2212) must not exceed total number of 4grams.";
679 0         0 $errorCodeNumber = 202; return;
  0         0  
680             }
681              
682             # n2221 should be greater than equal to zero
683 1 50       7 if ($n2221 < 0) {
684 0         0 $errorMessage = "Frequency value n2221 ($n2221) must not be negative.";
685 0         0 $errorCodeNumber = 202; return;
  0         0  
686             }
687            
688             # n2221 frequency should be less than or equal to total 4grams
689 1 50       8 if ($n2221 > $npppp) {
690 0         0 $errorMessage = "Frequency value n2221 ($n2221) must not exceed total number of 4grams.";
691 0         0 $errorCodeNumber = 202; return;
  0         0  
692             }
693              
694              
695             # n2222 should be greater than equal to zero
696 1 50       4 if ($n2222 < 0) {
697 0         0 $errorMessage = "Frequency value n2222 ($n2222) must not be negative.";
698 0         0 $errorCodeNumber = 202; return;
  0         0  
699             }
700            
701             # n2222 frequency should be less than or equal to total 4grams
702 1 50       2 if ($n2222 > $npppp) {
703 0         0 $errorMessage = "Frequency value n2222 ($n2222) must not exceed total number of 4grams.";
704 0         0 $errorCodeNumber = 202; return;
  0         0  
705             }
706              
707 1         5 return 1;
708             }
709              
710              
711              
712              
713              
714             =item computeExpectedValues($count_values) - A method to compute
715             expected values.
716              
717             INPUT PARAMS : $count_values .. Reference to an hash consisting
718             of the count output.
719              
720             RETURN VALUES : 1/undef ..returns '1' to indicate success
721             and an undefined(NULL) value to indicate
722             failure.
723              
724             =cut
725              
726             sub computeExpectedValues
727             {
728 1     1 1 3 my ($values)=@_;
729              
730 1 50       3 if(! (defined $expected_values) ) {
731 1         2 $expected_values = "0 1 2 3";
732             }
733              
734             # the expected values can be calculated based on
735             # a number of different models. I have the code
736             # for the models here but we do not have the option
737             # to change them implemented in the statistic.pl
738             #
739              
740              
741             # calculate the expected values for : "0 123" check
742 1 50       27 if($expected_values eq "0 123") {
    50          
    50          
    50          
    50          
    50          
    50          
    50          
    50          
    50          
    50          
    50          
    50          
743             #print "0 123\n";
744            
745 0         0 $np111 = $n1111 + $n2111;
746 0         0 $np112 = $n1112 + $n2112;
747 0         0 $np121 = $n1121 + $n2121;
748 0         0 $np122 = $n1122 + $n2122;
749 0         0 $np211 = $n1211 + $n2211;
750 0         0 $np212 = $n1212 + $n2212;
751 0         0 $np221 = $n1221 + $n2221;
752 0         0 $np222 = $n1222 + $n2222;
753              
754 0         0 $m1111=$n1ppp*$np111/($npppp);
755 0         0 $m1112=$n1ppp*$np112/($npppp);
756 0         0 $m1121=$n1ppp*$np121/($npppp);
757 0         0 $m1122=$n1ppp*$np122/($npppp);
758 0         0 $m1211=$n1ppp*$np211/($npppp);
759 0         0 $m1212=$n1ppp*$np212/($npppp);
760 0         0 $m1221=$n1ppp*$np221/($npppp);
761 0         0 $m1222=$n1ppp*$np222/($npppp);
762 0         0 $m2111=$n2ppp*$np111/($npppp);
763 0         0 $m2112=$n2ppp*$np112/($npppp);
764 0         0 $m2121=$n2ppp*$np121/($npppp);
765 0         0 $m2122=$n2ppp*$np122/($npppp);
766 0         0 $m2211=$n2ppp*$np211/($npppp);
767 0         0 $m2212=$n2ppp*$np212/($npppp);
768 0         0 $m2221=$n2ppp*$np221/($npppp);
769 0         0 $m2222=$n2ppp*$np222/($npppp);
770             }
771              
772             # calculate the expected values for : "01 2 3" check
773             elsif($expected_values eq "01 2 3") {
774             #print "01 2 3\n";
775              
776 0         0 $n12pp = $n1211 + $n1212 + $n1221 + $n1222;
777 0         0 $n21pp = $n2111 + $n2112 + $n2121 + $n2122;
778 0         0 $n22pp = $n2211 + $n2212 + $n2221 + $n2222;
779            
780 0         0 $m1111=$n11pp*$npp1p*$nppp1/($npppp**2);
781 0         0 $m1112=$n11pp*$npp1p*$nppp2/($npppp**2);
782 0         0 $m1121=$n11pp*$npp2p*$nppp1/($npppp**2);
783 0         0 $m1122=$n11pp*$npp2p*$nppp2/($npppp**2);
784 0         0 $m1211=$n12pp*$npp1p*$nppp1/($npppp**2);
785 0         0 $m1212=$n12pp*$npp1p*$nppp2/($npppp**2);
786 0         0 $m1221=$n12pp*$npp2p*$nppp1/($npppp**2);
787 0         0 $m1222=$n12pp*$npp2p*$nppp2/($npppp**2);
788 0         0 $m2111=$n21pp*$npp1p*$nppp1/($npppp**2);
789 0         0 $m2112=$n21pp*$npp1p*$nppp2/($npppp**2);
790 0         0 $m2121=$n21pp*$npp2p*$nppp1/($npppp**2);
791 0         0 $m2122=$n21pp*$npp2p*$nppp2/($npppp**2);
792 0         0 $m2211=$n22pp*$npp1p*$nppp1/($npppp**2);
793 0         0 $m2212=$n22pp*$npp1p*$nppp2/($npppp**2);
794 0         0 $m2221=$n22pp*$npp2p*$nppp1/($npppp**2);
795 0         0 $m2222=$n22pp*$npp2p*$nppp2/($npppp**2);
796            
797             }
798              
799             # calculate the expected values for : "0 1 23" check
800             elsif($expected_values eq "0 1 23") {
801             #print "0 1 23\n";
802            
803 0         0 $npp12 = $n1112 + $n1212 + $n2112 + $n2212;
804 0         0 $npp21 = $n1121 + $n1221 + $n2121 + $n2221;
805 0         0 $npp22 = $n1122 + $n1222 + $n2122 + $n2222;
806            
807 0         0 $m1111=$n1ppp*$np1pp*$npp11/($npppp**2);
808 0         0 $m1112=$n1ppp*$np1pp*$npp12/($npppp**2);
809 0         0 $m1121=$n1ppp*$np1pp*$npp21/($npppp**2);
810 0         0 $m1122=$n1ppp*$np1pp*$npp22/($npppp**2);
811 0         0 $m1211=$n1ppp*$np2pp*$npp11/($npppp**2);
812 0         0 $m1212=$n1ppp*$np2pp*$npp12/($npppp**2);
813 0         0 $m1221=$n1ppp*$np2pp*$npp21/($npppp**2);
814 0         0 $m1222=$n1ppp*$np2pp*$npp22/($npppp**2);
815 0         0 $m2111=$n2ppp*$np1pp*$npp11/($npppp**2);
816 0         0 $m2112=$n2ppp*$np1pp*$npp12/($npppp**2);
817 0         0 $m2121=$n2ppp*$np1pp*$npp21/($npppp**2);
818 0         0 $m2122=$n2ppp*$np1pp*$npp22/($npppp**2);
819 0         0 $m2211=$n2ppp*$np2pp*$npp11/($npppp**2);
820 0         0 $m2212=$n2ppp*$np2pp*$npp12/($npppp**2);
821 0         0 $m2221=$n2ppp*$np2pp*$npp21/($npppp**2);
822 0         0 $m2222=$n2ppp*$np2pp*$npp22/($npppp**2);
823             }
824             # calculate the expected values for : "0 12 3" check
825             elsif($expected_values eq "0 12 3") {
826             #print "0 12 3\n";
827            
828 0         0 $np12p = $n1121 + $n1122 + $n2121 + $n2122;
829 0         0 $np21p = $n1211 + $n1212 + $n2211 + $n2212;
830 0         0 $np22p = $n1221 + $n1222 + $n2221 + $n2222;
831            
832 0         0 $m1111=$n1ppp*$np11p*$nppp1/($npppp**2);
833 0         0 $m1112=$n1ppp*$np11p*$nppp2/($npppp**2);
834 0         0 $m1121=$n1ppp*$np12p*$nppp1/($npppp**2);
835 0         0 $m1122=$n1ppp*$np12p*$nppp2/($npppp**2);
836 0         0 $m1211=$n1ppp*$np21p*$nppp1/($npppp**2);
837 0         0 $m1212=$n1ppp*$np21p*$nppp2/($npppp**2);
838 0         0 $m1221=$n1ppp*$np22p*$nppp1/($npppp**2);
839 0         0 $m1222=$n1ppp*$np22p*$nppp2/($npppp**2);
840 0         0 $m2111=$n2ppp*$np11p*$nppp1/($npppp**2);
841 0         0 $m2112=$n2ppp*$np11p*$nppp2/($npppp**2);
842 0         0 $m2121=$n2ppp*$np12p*$nppp1/($npppp**2);
843 0         0 $m2122=$n2ppp*$np12p*$nppp2/($npppp**2);
844 0         0 $m2211=$n2ppp*$np21p*$nppp1/($npppp**2);
845 0         0 $m2212=$n2ppp*$np21p*$nppp2/($npppp**2);
846 0         0 $m2221=$n2ppp*$np22p*$nppp1/($npppp**2);
847 0         0 $m2222=$n2ppp*$np22p*$nppp2/($npppp**2);
848             }
849            
850             # calculate the expected values for : "012 3" check
851             elsif($expected_values eq "012 3") {
852             #print "012 3\n";
853            
854 0         0 $n112p = $n1121 + $n1122;
855 0         0 $n121p = $n1211 + $n1212;
856 0         0 $n122p = $n1221 + $n1222;
857 0         0 $n211p = $n2111 + $n2112;
858 0         0 $n212p = $n2121 + $n2122;
859 0         0 $n221p = $n2211 + $n2212;
860 0         0 $n222p = $n2221 + $n2222;
861            
862 0         0 $m1111=$n111p*$nppp1/($npppp);
863 0         0 $m1112=$n111p*$nppp2/($npppp);
864 0         0 $m1121=$n112p*$nppp1/($npppp);
865 0         0 $m1122=$n112p*$nppp2/($npppp);
866 0         0 $m1211=$n121p*$nppp1/($npppp);
867 0         0 $m1212=$n121p*$nppp2/($npppp);
868 0         0 $m1221=$n122p*$nppp1/($npppp);
869 0         0 $m1222=$n122p*$nppp2/($npppp);
870 0         0 $m2111=$n211p*$nppp1/($npppp);
871 0         0 $m2112=$n211p*$nppp2/($npppp);
872 0         0 $m2121=$n212p*$nppp1/($npppp);
873 0         0 $m2122=$n212p*$nppp2/($npppp);
874 0         0 $m2211=$n221p*$nppp1/($npppp);
875 0         0 $m2212=$n221p*$nppp2/($npppp);
876 0         0 $m2221=$n222p*$nppp1/($npppp);
877 0         0 $m2222=$n222p*$nppp2/($npppp);
878             }
879              
880             # calculate the expected values for : "0 13 2" check
881             elsif($expected_values eq "0 13 2") {
882             #print "0 13 2\n";
883              
884 0         0 $np1p2 = $n1112 + $n1122 + $n2112 + $n2122;
885 0         0 $np2p1 = $n1211 + $n1221 + $n2211 + $n2221;
886 0         0 $np2p2 = $n1212 + $n1222 + $n2212 + $n2222;
887              
888 0         0 $m1111=$n1ppp*$np1p1*$npp1p/($npppp**2);
889 0         0 $m1112=$n1ppp*$np1p2*$npp1p/($npppp**2);
890 0         0 $m1121=$n1ppp*$np1p1*$npp2p/($npppp**2);
891 0         0 $m1122=$n1ppp*$np1p2*$npp2p/($npppp**2);
892 0         0 $m1211=$n1ppp*$np2p1*$npp1p/($npppp**2);
893 0         0 $m1212=$n1ppp*$np2p2*$npp1p/($npppp**2);
894 0         0 $m1221=$n1ppp*$np2p1*$npp2p/($npppp**2);
895 0         0 $m1222=$n1ppp*$np2p2*$npp2p/($npppp**2);
896 0         0 $m2111=$n2ppp*$np1p1*$npp1p/($npppp**2);
897 0         0 $m2112=$n2ppp*$np1p2*$npp1p/($npppp**2);
898 0         0 $m2121=$n2ppp*$np1p1*$npp2p/($npppp**2);
899 0         0 $m2122=$n2ppp*$np1p2*$npp2p/($npppp**2);
900 0         0 $m2211=$n2ppp*$np2p1*$npp1p/($npppp**2);
901 0         0 $m2212=$n2ppp*$np2p2*$npp1p/($npppp**2);
902 0         0 $m2221=$n2ppp*$np2p1*$npp2p/($npppp**2);
903 0         0 $m2222=$n2ppp*$np2p2*$npp2p/($npppp**2);
904              
905             }
906              
907             # calculate the expected values for : "02 13"
908             elsif($expected_values eq "02 13") {
909             #print "02 13\n";
910            
911 0         0 $n1p2p = $n1121 + $n1122 + $n1221 + $n1222;
912 0         0 $n2p1p = $n2111 + $n2112 + $n2211 + $n2212;
913 0         0 $n2p2p = $n2121 + $n2122 + $n2221 + $n2222;
914            
915 0         0 $np1p2 = $n1112 + $n1122 + $n2112 + $n2122;
916 0         0 $np2p1 = $n1211 + $n1221 + $n2211 + $n2221;
917 0         0 $np2p2 = $n1212 + $n1222 + $n2212 + $n2222;
918            
919              
920 0         0 $m1111=$n1p1p*$np1p1/($npppp);
921 0         0 $m1112=$n1p1p*$np1p2/($npppp);
922 0         0 $m1121=$n1p2p*$np1p1/($npppp);
923 0         0 $m1122=$n1p2p*$np1p2/($npppp);
924 0         0 $m1211=$n1p1p*$np2p1/($npppp);
925 0         0 $m1212=$n1p1p*$np2p2/($npppp);
926 0         0 $m1221=$n1p2p*$np2p1/($npppp);
927 0         0 $m1222=$n1p2p*$np2p2/($npppp);
928 0         0 $m2111=$n2p1p*$np1p1/($npppp);
929 0         0 $m2112=$n2p1p*$np1p2/($npppp);
930 0         0 $m2121=$n2p2p*$np1p1/($npppp);
931 0         0 $m2122=$n2p2p*$np1p2/($npppp);
932 0         0 $m2211=$n2p1p*$np2p1/($npppp);
933 0         0 $m2212=$n2p1p*$np2p2/($npppp);
934 0         0 $m2221=$n2p2p*$np2p1/($npppp);
935 0         0 $m2222=$n2p2p*$np2p2/($npppp);
936            
937             }
938              
939             # calculate the expected values for : "03 12" check
940             elsif($expected_values eq "03 12") {
941             #print "03 12\n";
942            
943 0         0 $n1pp2 = $n1112 + $n1122 + $n1212 + $n1222;
944 0         0 $n2pp1 = $n2111 + $n2121 + $n2211 + $n2221;
945 0         0 $n2pp2 = $n2112 + $n2122 + $n2212 + $n2222;
946              
947 0         0 $np12p = $n1121 + $n1122 + $n2121 + $n2122;
948 0         0 $np21p = $n1211 + $n1212 + $n2211 + $n2212;
949 0         0 $np22p = $n1221 + $n1222 + $n2221 + $n2222;
950              
951 0         0 $m1111=$n1pp1*$np11p/($npppp);
952 0         0 $m1112=$n1pp2*$np11p/($npppp);
953 0         0 $m1121=$n1pp1*$np12p/($npppp);
954 0         0 $m1122=$n1pp2*$np12p/($npppp);
955 0         0 $m1211=$n1pp1*$np21p/($npppp);
956 0         0 $m1212=$n1pp2*$np21p/($npppp);
957 0         0 $m1221=$n1pp1*$np22p/($npppp);
958 0         0 $m1222=$n1pp2*$np22p/($npppp);
959 0         0 $m2111=$n2pp1*$np11p/($npppp);
960 0         0 $m2112=$n2pp2*$np11p/($npppp);
961 0         0 $m2121=$n2pp1*$np12p/($npppp);
962 0         0 $m2122=$n2pp2*$np12p/($npppp);
963 0         0 $m2211=$n2pp1*$np21p/($npppp);
964 0         0 $m2212=$n2pp2*$np21p/($npppp);
965 0         0 $m2221=$n2pp1*$np22p/($npppp);
966 0         0 $m2222=$n2pp2*$np22p/($npppp);
967             }
968              
969             # calculate the expected values for : "023 1" check
970             elsif($expected_values eq "023 1") {
971             #print "023 1\n";
972            
973 0         0 $n1p12 = $n1112 + $n1212;
974 0         0 $n1p21 = $n1121 + $n1221;
975 0         0 $n1p22 = $n1122 + $n1222;
976 0         0 $n2p11 = $n2111 + $n2211;
977 0         0 $n2p12 = $n2112 + $n2212;
978 0         0 $n2p21 = $n2121 + $n2221;
979 0         0 $n2p22 = $n2122 + $n2222;
980              
981 0         0 $m1111=$n1p11*$np1pp/($npppp);
982 0         0 $m1112=$n1p12*$np1pp/($npppp);
983 0         0 $m1121=$n1p21*$np1pp/($npppp);
984 0         0 $m1122=$n1p22*$np1pp/($npppp);
985 0         0 $m1211=$n1p11*$np2pp/($npppp);
986 0         0 $m1212=$n1p12*$np2pp/($npppp);
987 0         0 $m1221=$n1p21*$np2pp/($npppp);
988 0         0 $m1222=$n1p22*$np2pp/($npppp);
989 0         0 $m2111=$n2p11*$np1pp/($npppp);
990 0         0 $m2112=$n2p12*$np1pp/($npppp);
991 0         0 $m2121=$n2p21*$np1pp/($npppp);
992 0         0 $m2122=$n2p22*$np1pp/($npppp);
993 0         0 $m2211=$n2p11*$np2pp/($npppp);
994 0         0 $m2212=$n2p12*$np2pp/($npppp);
995 0         0 $m2221=$n2p21*$np2pp/($npppp);
996 0         0 $m2222=$n2p22*$np2pp/($npppp);
997             }
998            
999             # calculate the expected values for : "013 2" check
1000             elsif($expected_values eq "013 2") {
1001             #print "013 2\n";
1002            
1003 0         0 $n11p2 = $n1112 + $n1122;
1004 0         0 $n12p1 = $n1211 + $n1221;
1005 0         0 $n12p2 = $n1212 + $n1222;
1006 0         0 $n21p1 = $n2111 + $n2121;
1007 0         0 $n21p2 = $n2112 + $n2122;
1008 0         0 $n22p1 = $n2211 + $n2221;
1009 0         0 $n22p2 = $n2212 + $n2222;
1010              
1011 0         0 $m1111=$n11p1*$npp1p/($npppp);
1012 0         0 $m1112=$n11p2*$npp1p/($npppp);
1013 0         0 $m1121=$n11p1*$npp2p/($npppp);
1014 0         0 $m1122=$n11p2*$npp2p/($npppp);
1015 0         0 $m1211=$n12p1*$npp1p/($npppp);
1016 0         0 $m1212=$n12p2*$npp1p/($npppp);
1017 0         0 $m1221=$n12p1*$npp2p/($npppp);
1018 0         0 $m1222=$n12p2*$npp2p/($npppp);
1019 0         0 $m2111=$n21p1*$npp1p/($npppp);
1020 0         0 $m2112=$n21p2*$npp1p/($npppp);
1021 0         0 $m2121=$n21p1*$npp2p/($npppp);
1022 0         0 $m2122=$n21p2*$npp2p/($npppp);
1023 0         0 $m2211=$n22p1*$npp1p/($npppp);
1024 0         0 $m2212=$n22p2*$npp1p/($npppp);
1025 0         0 $m2221=$n22p1*$npp2p/($npppp);
1026 0         0 $m2222=$n22p2*$npp2p/($npppp);
1027             }
1028              
1029             # calculate the expected values for : "01 23" check
1030             elsif($expected_values eq "01 23") {
1031             #print "01 23\n";
1032            
1033 0         0 $n12pp = $n1211+ $n1212 + $n1221 + $n1222;
1034 0         0 $n21pp = $n2111+ $n2112 + $n2121 + $n2122;
1035 0         0 $n22pp = $n2211+ $n2212 + $n2221 + $n2222;
1036              
1037 0         0 $npp12 = $n1112 + $n1212 + $n2112 + $n2212;
1038 0         0 $npp21 = $n1121 + $n1221 + $n2121 + $n2221;
1039 0         0 $npp22 = $n1122 + $n1222 + $n2122 + $n2222;
1040            
1041              
1042 0         0 $m1111=$n11pp*$npp11/($npppp);
1043 0         0 $m1112=$n11pp*$npp12/($npppp);
1044 0         0 $m1121=$n11pp*$npp21/($npppp);
1045 0         0 $m1122=$n11pp*$npp22/($npppp);
1046 0         0 $m1211=$n12pp*$npp11/($npppp);
1047 0         0 $m1212=$n12pp*$npp12/($npppp);
1048 0         0 $m1221=$n12pp*$npp21/($npppp);
1049 0         0 $m1222=$n12pp*$npp22/($npppp);
1050 0         0 $m2111=$n21pp*$npp11/($npppp);
1051 0         0 $m2112=$n21pp*$npp12/($npppp);
1052 0         0 $m2121=$n21pp*$npp21/($npppp);
1053 0         0 $m2122=$n21pp*$npp22/($npppp);
1054 0         0 $m2211=$n22pp*$npp11/($npppp);
1055 0         0 $m2212=$n22pp*$npp12/($npppp);
1056 0         0 $m2221=$n22pp*$npp21/($npppp);
1057 0         0 $m2222=$n22pp*$npp22/($npppp);
1058             }
1059              
1060             # calculate the expected values for : "03 1 2" check
1061             elsif($expected_values eq "03 1 2") {
1062             #print "03 1 2\n";
1063            
1064 0         0 $n1pp2 = $n1112 + $n1122 + $n1212 + $n1222;
1065 0         0 $n2pp1 = $n2111 + $n2121 + $n2211 + $n2221;
1066 0         0 $n2pp2 = $n2112 + $n2122 + $n2212 + $n2222;
1067            
1068            
1069 0         0 $m1111=$n1pp1*$np1pp*$npp1p/($npppp**2);
1070 0         0 $m1112=$n1pp2*$np1pp*$npp1p/($npppp**2);
1071 0         0 $m1121=$n1pp1*$np1pp*$npp2p/($npppp**2);
1072 0         0 $m1122=$n1pp2*$np1pp*$npp2p/($npppp**2);
1073 0         0 $m1211=$n1pp1*$np2pp*$npp1p/($npppp**2);
1074 0         0 $m1212=$n1pp2*$np2pp*$npp1p/($npppp**2);
1075 0         0 $m1221=$n1pp1*$np2pp*$npp2p/($npppp**2);
1076 0         0 $m1222=$n1pp2*$np2pp*$npp2p/($npppp**2);
1077 0         0 $m2111=$n2pp1*$np1pp*$npp1p/($npppp**2);
1078 0         0 $m2112=$n2pp2*$np1pp*$npp1p/($npppp**2);
1079 0         0 $m2121=$n2pp1*$np1pp*$npp2p/($npppp**2);
1080 0         0 $m2122=$n2pp2*$np1pp*$npp2p/($npppp**2);
1081 0         0 $m2211=$n2pp1*$np2pp*$npp1p/($npppp**2);
1082 0         0 $m2212=$n2pp2*$np2pp*$npp1p/($npppp**2);
1083 0         0 $m2221=$n2pp1*$np2pp*$npp2p/($npppp**2);
1084 0         0 $m2222=$n2pp2*$np2pp*$npp2p/($npppp**2);
1085             }
1086              
1087             # calculate the expected values for : "02 1 3" check
1088             elsif($expected_values eq "02 1 3") {
1089             #print "02 1 3\n";
1090            
1091 0         0 $n1p2p = $n1121 + $n1122 + $n1221 + $n1222;
1092 0         0 $n2p1p = $n2111 + $n2112 + $n2211 + $n2212;
1093 0         0 $n2p2p = $n2121 + $n2122 + $n2221 + $n2222;
1094            
1095              
1096 0         0 $m1111=$n1p1p*$np1pp*$nppp1/($npppp**2);
1097 0         0 $m1112=$n1p1p*$np1pp*$nppp2/($npppp**2);
1098 0         0 $m1121=$n1p2p*$np1pp*$nppp1/($npppp**2);
1099 0         0 $m1122=$n1p2p*$np1pp*$nppp2/($npppp**2);
1100 0         0 $m1211=$n1p1p*$np2pp*$nppp1/($npppp**2);
1101 0         0 $m1212=$n1p1p*$np2pp*$nppp2/($npppp**2);
1102 0         0 $m1221=$n1p2p*$np2pp*$nppp1/($npppp**2);
1103 0         0 $m1222=$n1p2p*$np2pp*$nppp2/($npppp**2);
1104 0         0 $m2111=$n2p1p*$np1pp*$nppp1/($npppp**2);
1105 0         0 $m2112=$n2p1p*$np1pp*$nppp2/($npppp**2);
1106 0         0 $m2121=$n2p2p*$np1pp*$nppp1/($npppp**2);
1107 0         0 $m2122=$n2p2p*$np1pp*$nppp2/($npppp**2);
1108 0         0 $m2211=$n2p1p*$np2pp*$nppp1/($npppp**2);
1109 0         0 $m2212=$n2p1p*$np2pp*$nppp2/($npppp**2);
1110 0         0 $m2221=$n2p2p*$np2pp*$nppp1/($npppp**2);
1111 0         0 $m2222=$n2p2p*$np2pp*$nppp2/($npppp**2);
1112             }
1113            
1114             else {
1115              
1116             #print "0 1 2 3\n";
1117              
1118 1         6 $m1111=$n1ppp*$np1pp*$npp1p*$nppp1/($npppp**3);
1119 1         3 $m1112=$n1ppp*$np1pp*$npp1p*$nppp2/($npppp**3);
1120 1         4 $m1121=$n1ppp*$np1pp*$npp2p*$nppp1/($npppp**3);
1121 1         3 $m1122=$n1ppp*$np1pp*$npp2p*$nppp2/($npppp**3);
1122 1         2 $m1211=$n1ppp*$np2pp*$npp1p*$nppp1/($npppp**3);
1123 1         4 $m1212=$n1ppp*$np2pp*$npp1p*$nppp2/($npppp**3);
1124 1         3 $m1221=$n1ppp*$np2pp*$npp2p*$nppp1/($npppp**3);
1125 1         3 $m1222=$n1ppp*$np2pp*$npp2p*$nppp2/($npppp**3);
1126 1         7 $m2111=$n2ppp*$np1pp*$npp1p*$nppp1/($npppp**3);
1127 1         2 $m2112=$n2ppp*$np1pp*$npp1p*$nppp2/($npppp**3);
1128 1         2 $m2121=$n2ppp*$np1pp*$npp2p*$nppp1/($npppp**3);
1129 1         3 $m2122=$n2ppp*$np1pp*$npp2p*$nppp2/($npppp**3);
1130 1         3 $m2211=$n2ppp*$np2pp*$npp1p*$nppp1/($npppp**3);
1131 1         3 $m2212=$n2ppp*$np2pp*$npp1p*$nppp2/($npppp**3);
1132 1         3 $m2221=$n2ppp*$np2pp*$npp2p*$nppp1/($npppp**3);
1133 1         3 $m2222=$n2ppp*$np2pp*$npp2p*$nppp2/($npppp**3);
1134             }
1135              
1136 1         4 return 1;
1137             }
1138              
1139              
1140             =item computeMarginalTotals($marginal_values) - This method
1141             computes the marginal totals from the valuescomputed by the count.pl
1142             program and are passed to the calculateStatistic() method.
1143              
1144             INPUT PARAMS : $count_values .. Reference to an hash consisting
1145             of the frequency combination
1146             output.
1147              
1148             RETURN VALUES : 1/undef ..returns '1' to indicate success
1149             and an undefined(NULL) value to indicate
1150             failure.
1151              
1152             =cut
1153              
1154             sub computeMarginalTotals
1155             {
1156              
1157 1     1 1 2 my ($values)=@_;
1158              
1159             # marginal values
1160 1         2 $np112 = $n1112 + $n2112;
1161 1         4 $np121 = $n1121 + $n2121;
1162 1         2 $np122 = $n1122 + $n2122;
1163 1         2 $np211 = $n1211 + $n2211;
1164 1         1 $np212 = $n1212 + $n2212;
1165 1         2 $np221 = $n1221 + $n2221;
1166 1         2 $np222 = $n1222 + $n2222;
1167 1         2 $n2ppp=$npppp-$n1ppp;
1168 1         3 $np2pp=$npppp-$np1pp;
1169 1         2 $npp2p=$npppp-$npp1p;
1170 1         2 $nppp2=$npppp-$nppp1;
1171              
1172             # n1ppp should be greater than or equal to zero
1173 1 50       3 if ($n1ppp <= 0) {
1174 0         0 $errorMessage = "Marginal total value ($n1ppp) must not be negative.";
1175 0         0 $errorCodeNumber = 203; return;
  0         0  
1176             }
1177            
1178             # n1ppp should be less than or equal to total4grams
1179 1 50       3 if ($n1ppp > $npppp) {
1180 0         0 $errorMessage = "Marginal total value ($n1ppp) must not exceed total number of 4grams ($npppp).";
1181 0         0 $errorCodeNumber = 204; return;
  0         0  
1182             }
1183            
1184             # np1pp should be greater than or equal to zero
1185 1 50       3 if ($np1pp <= 0) {
1186 0         0 $errorMessage = "Marginal total value ($np1pp) must not be negative.";
1187 0         0 $errorCodeNumber = 205; return;
  0         0  
1188             }
1189            
1190             # np1pp should be less than or equal to total4grams
1191 1 50       3 if ($np1pp > $npppp) {
1192 0         0 $errorMessage = "Marginal total value ($np1pp) must not exceed total number of 4grams.";
1193 0         0 $errorCodeNumber = 206; return;
  0         0  
1194             }
1195            
1196             # npp1p should be greater than or equal to zero
1197 1 50       3 if ($npp1p <= 0) {
1198 0         0 $errorMessage = "Marginal total value ($npp1p) must not be negative.";
1199 0         0 $errorCodeNumber = 207; return;
  0         0  
1200             }
1201            
1202             # npp1p should be less than or equal to total4grams
1203 1 50       3 if ($npp1p > $npppp) {
1204 0         0 $errorMessage = "Marginal total value ($npp1p) must not exceed total number of 4grams.";
1205 0         0 $errorCodeNumber = 208; return;
  0         0  
1206             }
1207            
1208             # nppp1p should be greater than or equal to zero
1209 1 50       12 if ($nppp1 <= 0) {
1210 0         0 $errorMessage = "Marginal total value ($nppp1) must not be negative.";
1211 0         0 $errorCodeNumber = 209; return;
  0         0  
1212             }
1213            
1214             # nppp1p should be less than or equal to total4grams
1215 1 50       3 if ($nppp1 > $npppp) {
1216 0         0 $errorMessage = "Marginal total value ($nppp1) must not exceed total number of 4grams.";
1217 0         0 $errorCodeNumber = 210; return;
  0         0  
1218             }
1219            
1220 1         3 return 1;
1221             }
1222              
1223             1;
1224             __END__