line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Data::Mining::Apriori;
|
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
87819
|
use 5.010001;
|
|
1
|
|
|
|
|
4
|
|
4
|
1
|
|
|
1
|
|
5
|
use strict;
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
22
|
|
5
|
1
|
|
|
1
|
|
5
|
use warnings;
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
57
|
|
6
|
1
|
|
|
1
|
|
641
|
use Algorithm::Combinatorics qw(subsets variations);
|
|
1
|
|
|
|
|
4551
|
|
|
1
|
|
|
|
|
94
|
|
7
|
1
|
|
|
1
|
|
867
|
no if $] >= 5.017011, warnings => 'experimental::smartmatch';
|
|
1
|
|
|
|
|
16
|
|
|
1
|
|
|
|
|
7
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our $VERSION = 0.17;
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
my$self;
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
$" = ', ';
|
14
|
|
|
|
|
|
|
$| = 1;
|
15
|
|
|
|
|
|
|
$SIG{'INT'} = \&stop;
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
sub new{
|
18
|
1
|
|
|
1
|
1
|
4475
|
my $type = shift;
|
19
|
1
|
|
33
|
|
|
8
|
my $class = ref($type)||$type;
|
20
|
1
|
|
|
|
|
18
|
$self = {
|
21
|
|
|
|
|
|
|
totalTransactions => 0,
|
22
|
|
|
|
|
|
|
metrics => {
|
23
|
|
|
|
|
|
|
minSupport => 0.01,
|
24
|
|
|
|
|
|
|
minConfidence => 0.10,
|
25
|
|
|
|
|
|
|
minLift => undef,
|
26
|
|
|
|
|
|
|
minLeverage => undef,
|
27
|
|
|
|
|
|
|
minConviction => undef,
|
28
|
|
|
|
|
|
|
minCoverage => undef,
|
29
|
|
|
|
|
|
|
minCorrelation => undef,
|
30
|
|
|
|
|
|
|
minCosine => undef,
|
31
|
|
|
|
|
|
|
minLaplace => undef,
|
32
|
|
|
|
|
|
|
minJaccard => undef
|
33
|
|
|
|
|
|
|
},
|
34
|
|
|
|
|
|
|
precision => 3,
|
35
|
|
|
|
|
|
|
output => undef,
|
36
|
|
|
|
|
|
|
pathOutputFiles => "",
|
37
|
|
|
|
|
|
|
messages => undef,
|
38
|
|
|
|
|
|
|
keyItemsDescription => undef,
|
39
|
|
|
|
|
|
|
keyItemsTransactions => undef,
|
40
|
|
|
|
|
|
|
limitRules => undef,
|
41
|
|
|
|
|
|
|
limitSubsets => undef,
|
42
|
|
|
|
|
|
|
numberSubsets => 0,
|
43
|
|
|
|
|
|
|
frequentItemset => [],
|
44
|
|
|
|
|
|
|
associationRules => undef,
|
45
|
|
|
|
|
|
|
implications => undef,
|
46
|
|
|
|
|
|
|
largeItemsetLength => 2,
|
47
|
|
|
|
|
|
|
rule => 0
|
48
|
|
|
|
|
|
|
};
|
49
|
1
|
|
|
|
|
4
|
bless($self,$class);
|
50
|
1
|
|
|
|
|
3
|
return $self;
|
51
|
|
|
|
|
|
|
}
|
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
sub validate_data{
|
54
|
|
|
|
|
|
|
(defined $self->{keyItemsDescription})
|
55
|
0
|
0
|
|
0
|
0
|
|
or die('Error: $apriori->{keyItemsDescription} is not defined!');
|
56
|
|
|
|
|
|
|
(defined $self->{keyItemsTransactions})
|
57
|
0
|
0
|
|
|
|
|
or die('Error: $apriori->{keyItemsTransactions} is not defined!');
|
58
|
|
|
|
|
|
|
(defined $self->{metrics}{minSupport})
|
59
|
0
|
0
|
|
|
|
|
or die('Error: $apriori->{metrics}{minSupport} is not defined!');
|
60
|
|
|
|
|
|
|
(defined $self->{metrics}{minConfidence})
|
61
|
0
|
0
|
|
|
|
|
or die('Error: $apriori->{metrics}{minConfidence} is not defined!');
|
62
|
|
|
|
|
|
|
}
|
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
sub insert_key_items_transaction{
|
65
|
0
|
0
|
|
0
|
1
|
|
(scalar(@_)==2)
|
66
|
|
|
|
|
|
|
or die('Error: $apriori->insert_key_items_transaction(\@items) missing parameter key items to array reference!');
|
67
|
0
|
0
|
|
|
|
|
(ref($_[1]) eq "ARRAY")
|
68
|
|
|
|
|
|
|
or die('Error: $apriori->insert_key_items_transaction(\@items) parameter key items is not an array reference!');
|
69
|
0
|
|
|
|
|
|
my@items=sort(@{$_[1]});
|
|
0
|
|
|
|
|
|
|
70
|
0
|
|
|
|
|
|
my @itemsets=subsets(\@items);
|
71
|
0
|
|
|
|
|
|
foreach my$itemset(@itemsets){
|
72
|
0
|
|
|
|
|
|
$self->{keyItemsTransactions}{"@{$itemset}"}++;
|
|
0
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
}
|
74
|
0
|
|
|
|
|
|
$self->{totalTransactions}++;
|
75
|
|
|
|
|
|
|
}
|
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
sub input_data_file{
|
78
|
0
|
0
|
|
0
|
1
|
|
(scalar(@_)==3)
|
79
|
|
|
|
|
|
|
or die('Error: $apriori->input_data_file("datafile.txt",",") missing parameters path to data file and/or item separator!');
|
80
|
0
|
|
|
|
|
|
my$file=$_[1];
|
81
|
0
|
|
|
|
|
|
my$separator=$_[2];
|
82
|
0
|
0
|
|
|
|
|
(-e $file)
|
83
|
|
|
|
|
|
|
or die("Error: the file \"$file\" does not exists!");
|
84
|
0
|
0
|
|
|
|
|
(-r $file)
|
85
|
|
|
|
|
|
|
or die("Error: the file \"$file\" is not readable!");
|
86
|
0
|
0
|
|
|
|
|
(-T $file)
|
87
|
|
|
|
|
|
|
or die("Error: the file \"$file\" is not a text file!");
|
88
|
0
|
0
|
|
|
|
|
open(FILE,"<$file")
|
89
|
|
|
|
|
|
|
or die("Error: $!");
|
90
|
0
|
|
|
|
|
|
while(my$line=){
|
91
|
0
|
|
|
|
|
|
$line=~s/\r\n|\n//;
|
92
|
0
|
|
|
|
|
|
my@items=split($separator,$line);
|
93
|
0
|
0
|
|
|
|
|
if(scalar(@items)>=2){
|
94
|
0
|
|
|
|
|
|
$self->insert_key_items_transaction(\@items);
|
95
|
|
|
|
|
|
|
}
|
96
|
|
|
|
|
|
|
}
|
97
|
0
|
|
|
|
|
|
close(FILE);
|
98
|
|
|
|
|
|
|
}
|
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
sub quantity_possible_rules{
|
101
|
0
|
|
|
0
|
1
|
|
$self->validate_data;
|
102
|
0
|
|
|
|
|
|
return ((3**scalar(keys(%{$self->{keyItemsDescription}})))-(2**(scalar(keys(%{$self->{keyItemsDescription}}))+1))+1);
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
}
|
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
sub generate_rules{
|
106
|
0
|
|
|
0
|
1
|
|
$self->validate_data;
|
107
|
0
|
0
|
|
|
|
|
if($self->{messages}){
|
108
|
0
|
|
|
|
|
|
print "\n${\scalar(keys(%{$self->{keyItemsDescription}}))} items, ${\$self->quantity_possible_rules} possible rules";
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
}
|
110
|
0
|
|
|
|
|
|
my @largeItemsetLengthOne = grep{(($self->{keyItemsTransactions}{$_}/$self->{totalTransactions})*100)>=$self->{metrics}{minSupport}}keys(%{$self->{keyItemsDescription}});
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
111
|
0
|
|
|
|
|
|
$self->association_rules(\@largeItemsetLengthOne);
|
112
|
|
|
|
|
|
|
}
|
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
sub association_rules{
|
115
|
0
|
|
|
0
|
1
|
|
my @largeItemset = @{$_[1]};
|
|
0
|
|
|
|
|
|
|
116
|
0
|
|
|
|
|
|
my @variations = variations(\@largeItemset,$self->{largeItemsetLength});
|
117
|
0
|
|
|
|
|
|
my @frequentItemset;
|
118
|
0
|
0
|
|
|
|
|
if($self->{messages}){
|
119
|
0
|
|
|
|
|
|
print "\nLarge itemset of length $self->{largeItemsetLength}, ${\scalar(@largeItemset)} items ";
|
|
0
|
|
|
|
|
|
|
120
|
0
|
|
|
|
|
|
print "\nProcessing ...";
|
121
|
|
|
|
|
|
|
}
|
122
|
|
|
|
|
|
|
VARIATIONS:
|
123
|
0
|
|
|
|
|
|
foreach my$variation(@variations){
|
124
|
0
|
|
|
|
|
|
my@candidateItemset=@{$variation};
|
|
0
|
|
|
|
|
|
|
125
|
0
|
|
|
|
|
|
my@antecedent;
|
126
|
|
|
|
|
|
|
my@consequent;
|
127
|
0
|
|
|
|
|
|
for(my$antecedentLength=0;$antecedentLength<$#candidateItemset;$antecedentLength++){
|
128
|
0
|
|
|
|
|
|
push@antecedent,$candidateItemset[$antecedentLength];
|
129
|
0
|
|
|
|
|
|
@consequent=();
|
130
|
0
|
|
|
|
|
|
for(my$consequentLength=($antecedentLength+1);$consequentLength<=$#candidateItemset;$consequentLength++){
|
131
|
0
|
|
|
|
|
|
push@consequent,$candidateItemset[$consequentLength];
|
132
|
|
|
|
|
|
|
}
|
133
|
0
|
|
|
|
|
|
@antecedent=sort@antecedent;
|
134
|
0
|
|
|
|
|
|
@consequent=sort@consequent;
|
135
|
0
|
0
|
|
|
|
|
next if("@consequent"~~@{$self->{implications}{"@antecedent"}});
|
|
0
|
|
|
|
|
|
|
136
|
0
|
0
|
0
|
|
|
|
last VARIATIONS if(defined $self->{limitSubsets} && $self->{numberSubsets} == $self->{limitSubsets});
|
137
|
0
|
|
|
|
|
|
$self->{numberSubsets}++;
|
138
|
0
|
|
|
|
|
|
push @{$self->{implications}{"@antecedent"}},"@consequent";
|
|
0
|
|
|
|
|
|
|
139
|
0
|
|
|
|
|
|
my@implication;
|
140
|
0
|
|
|
|
|
|
push@implication,@antecedent,@consequent;
|
141
|
0
|
|
|
|
|
|
@implication=sort(@implication);
|
142
|
0
|
0
|
|
|
|
|
next unless $self->{keyItemsTransactions}{"@antecedent"};
|
143
|
0
|
|
|
|
|
|
my$supportAntecedent=($self->{keyItemsTransactions}{"@antecedent"}/$self->{totalTransactions});
|
144
|
0
|
0
|
|
|
|
|
next unless $self->{keyItemsTransactions}{"@implication"};
|
145
|
0
|
|
|
|
|
|
my$supportConsequent=($self->{keyItemsTransactions}{"@implication"}/$self->{totalTransactions});
|
146
|
0
|
|
|
|
|
|
my $support = $supportConsequent;
|
147
|
0
|
0
|
|
|
|
|
next if($support < $self->{metrics}{minSupport});
|
148
|
0
|
|
|
|
|
|
my $confidence = ($supportConsequent/$supportAntecedent);
|
149
|
0
|
0
|
0
|
|
|
|
next if(defined $self->{metrics}{minConfidence} && $confidence < $self->{metrics}{minConfidence});
|
150
|
0
|
|
|
|
|
|
my $lift = ($support/($supportAntecedent*$supportConsequent));
|
151
|
0
|
0
|
0
|
|
|
|
next if(defined $self->{metrics}{minLift} && $lift < $self->{metrics}{minLift});
|
152
|
0
|
|
|
|
|
|
my $leverage = ($support-($supportAntecedent*$supportConsequent));
|
153
|
0
|
0
|
0
|
|
|
|
next if(defined $self->{metrics}{minLeverage} && $leverage < $self->{metrics}{minLeverage});
|
154
|
0
|
0
|
|
|
|
|
my $conviction = ((1-$supportConsequent)==0)?"NaN":((1-$confidence)==0)?"NaN":((1-$supportConsequent)/(1-$confidence));
|
|
|
0
|
|
|
|
|
|
155
|
0
|
0
|
0
|
|
|
|
next if(defined $self->{metrics}{minConviction} && $conviction < $self->{metrics}{minConviction});
|
156
|
0
|
|
|
|
|
|
my $coverage = $supportAntecedent;
|
157
|
0
|
0
|
0
|
|
|
|
next if(defined $self->{metrics}{minCoverage} && $coverage < $self->{metrics}{minCoverage});
|
158
|
0
|
|
|
|
|
|
my $correlation = (($support-($supportAntecedent*$supportConsequent))/sqrt($supportAntecedent*(1-$supportAntecedent)*$supportConsequent*(1-$supportConsequent)));
|
159
|
0
|
0
|
0
|
|
|
|
next if(defined $self->{metrics}{minCorrelation} && $correlation < $self->{metrics}{minCorrelation});
|
160
|
0
|
|
|
|
|
|
my $cosine = ($support/sqrt($supportAntecedent*$supportConsequent));
|
161
|
0
|
0
|
0
|
|
|
|
next if(defined $self->{metrics}{minCosine} && $cosine < $self->{metrics}{minCosine});
|
162
|
0
|
|
|
|
|
|
my $laplace = (($support+1)/($supportAntecedent+2));
|
163
|
0
|
0
|
0
|
|
|
|
next if(defined $self->{metrics}{minLaplace} && $laplace < $self->{metrics}{minLaplace});
|
164
|
0
|
|
|
|
|
|
my $jaccard = ($support/($supportAntecedent+$supportConsequent-$support));
|
165
|
0
|
0
|
0
|
|
|
|
next if(defined $self->{metrics}{minJaccard} && $jaccard < $self->{metrics}{minJaccard});
|
166
|
0
|
|
|
|
|
|
$self->{rule}++;
|
167
|
0
|
|
|
|
|
|
$support = sprintf("%.$self->{precision}f", $support);
|
168
|
0
|
|
|
|
|
|
$confidence = sprintf("%.$self->{precision}f", $confidence);
|
169
|
0
|
|
|
|
|
|
$lift = sprintf("%.$self->{precision}f", $lift);
|
170
|
0
|
|
|
|
|
|
$leverage = sprintf("%.$self->{precision}f", $leverage);
|
171
|
0
|
0
|
|
|
|
|
$conviction = sprintf("%.$self->{precision}f", $conviction)if($conviction ne "NaN");
|
172
|
0
|
|
|
|
|
|
$coverage = sprintf("%.$self->{precision}f", $coverage);
|
173
|
0
|
|
|
|
|
|
$correlation = sprintf("%.$self->{precision}f", $correlation);
|
174
|
0
|
|
|
|
|
|
$cosine = sprintf("%.$self->{precision}f", $cosine);
|
175
|
0
|
|
|
|
|
|
$laplace = sprintf("%.$self->{precision}f", $laplace);
|
176
|
0
|
|
|
|
|
|
$jaccard = sprintf("%.$self->{precision}f", $jaccard);
|
177
|
|
|
|
|
|
|
$self->{associationRules}{$self->{rule}} = {
|
178
|
0
|
|
|
|
|
|
implication => "{ @antecedent } => { @consequent }",
|
179
|
|
|
|
|
|
|
support => $support,
|
180
|
|
|
|
|
|
|
confidence => $confidence,
|
181
|
|
|
|
|
|
|
lift => $lift,
|
182
|
|
|
|
|
|
|
leverage => $leverage,
|
183
|
|
|
|
|
|
|
conviction => $conviction,
|
184
|
|
|
|
|
|
|
coverage => $coverage,
|
185
|
|
|
|
|
|
|
correlation => $correlation,
|
186
|
|
|
|
|
|
|
cosine => $cosine,
|
187
|
|
|
|
|
|
|
laplace => $laplace,
|
188
|
|
|
|
|
|
|
jaccard => $jaccard,
|
189
|
|
|
|
|
|
|
items => [@antecedent, @consequent]
|
190
|
|
|
|
|
|
|
};
|
191
|
0
|
|
|
|
|
|
my@items=grep{!($_~~@frequentItemset)}@implication;
|
|
0
|
|
|
|
|
|
|
192
|
0
|
|
|
|
|
|
push@frequentItemset,@items;
|
193
|
0
|
0
|
0
|
|
|
|
last VARIATIONS if(defined $self->{limitRules} && $self->{rule} == $self->{limitRules});
|
194
|
|
|
|
|
|
|
}
|
195
|
|
|
|
|
|
|
}
|
196
|
0
|
0
|
|
|
|
|
if($self->{messages}){
|
197
|
0
|
|
|
|
|
|
print "\nFrequent itemset: { @frequentItemset }, ${\scalar(@frequentItemset)} items ";
|
|
0
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
}
|
199
|
0
|
0
|
|
|
|
|
if(defined $self->{associationRules}){
|
200
|
0
|
|
|
|
|
|
@{$self->{frequentItemset}}=@frequentItemset;
|
|
0
|
|
|
|
|
|
|
201
|
0
|
|
|
|
|
|
$self->output;
|
202
|
|
|
|
|
|
|
}
|
203
|
|
|
|
|
|
|
return if((defined $self->{limitRules} && $self->{rule} == $self->{limitRules})
|
204
|
0
|
0
|
0
|
|
|
|
||(defined $self->{limitSubsets} && $self->{numberSubsets} == $self->{limitSubsets}));
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
205
|
0
|
0
|
|
|
|
|
if(scalar(@frequentItemset)>=($self->{largeItemsetLength}+1)){
|
206
|
0
|
|
|
|
|
|
$self->{largeItemsetLength}++;
|
207
|
0
|
|
|
|
|
|
$self->{associationRules} = undef;
|
208
|
0
|
|
|
|
|
|
$self->association_rules(\@frequentItemset);
|
209
|
|
|
|
|
|
|
}
|
210
|
|
|
|
|
|
|
}
|
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
sub stop{
|
213
|
0
|
0
|
|
0
|
0
|
|
if($self->{messages}){
|
214
|
0
|
|
|
|
|
|
print "\nStopping ...";
|
215
|
0
|
0
|
|
|
|
|
$self->output if $self->{associationRules};
|
216
|
0
|
|
|
|
|
|
print "\nExit? (Y/N): ";
|
217
|
0
|
|
|
|
|
|
my $answer = ;
|
218
|
0
|
|
|
|
|
|
chomp($answer);
|
219
|
0
|
0
|
|
|
|
|
if($answer =~ /^y$/i){
|
220
|
0
|
|
|
|
|
|
exit;
|
221
|
|
|
|
|
|
|
}
|
222
|
|
|
|
|
|
|
else{
|
223
|
0
|
|
|
|
|
|
print "Processing ...";
|
224
|
|
|
|
|
|
|
}
|
225
|
|
|
|
|
|
|
}
|
226
|
|
|
|
|
|
|
else{
|
227
|
0
|
0
|
|
|
|
|
$self->output if $self->{associationRules};
|
228
|
0
|
|
|
|
|
|
exit;
|
229
|
|
|
|
|
|
|
}
|
230
|
|
|
|
|
|
|
}
|
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
sub output{
|
233
|
0
|
0
|
|
0
|
1
|
|
if($self->{output}){
|
234
|
0
|
0
|
|
|
|
|
if($self->{output}==1){
|
|
|
0
|
|
|
|
|
|
235
|
0
|
|
|
|
|
|
$self->file;
|
236
|
|
|
|
|
|
|
}
|
237
|
|
|
|
|
|
|
elsif($self->{output}==2){
|
238
|
0
|
|
|
|
|
|
$self->excel;
|
239
|
|
|
|
|
|
|
}
|
240
|
|
|
|
|
|
|
}
|
241
|
|
|
|
|
|
|
}
|
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
sub file{
|
244
|
0
|
0
|
|
0
|
0
|
|
if($self->{messages}){
|
245
|
0
|
|
|
|
|
|
print "\nExporting to file $self->{pathOutputFiles}output_large_itemset_length_$self->{largeItemsetLength}.txt ...";
|
246
|
|
|
|
|
|
|
}
|
247
|
0
|
0
|
|
|
|
|
open(FILE,">$self->{pathOutputFiles}output_large_itemset_length_$self->{largeItemsetLength}.txt")
|
248
|
|
|
|
|
|
|
or die("\nError: $self->{pathOutputFiles}output_large_itemset_length_$self->{largeItemsetLength}.txt $!");
|
249
|
0
|
|
|
|
|
|
print FILE "Rules\tSupport\tConfidence";
|
250
|
0
|
|
|
|
|
|
my@headings=('Lift', 'Leverage', 'Conviction', 'Coverage', 'Correlation', 'Cosine', 'Laplace', 'Jaccard');
|
251
|
0
|
|
|
|
|
|
my@metrics;
|
252
|
0
|
|
|
|
|
|
foreach my$metric(@headings){
|
253
|
0
|
0
|
|
|
|
|
push@metrics,$metric if defined $self->{metrics}{"min$metric"};
|
254
|
|
|
|
|
|
|
}
|
255
|
0
|
|
|
|
|
|
foreach my$metric(@metrics){
|
256
|
0
|
|
|
|
|
|
print FILE "\t$metric";
|
257
|
|
|
|
|
|
|
}
|
258
|
0
|
|
|
|
|
|
print FILE "\n";
|
259
|
0
|
|
|
|
|
|
foreach my$rule(sort{$a<=>$b}keys(%{$self->{associationRules}})){
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
260
|
0
|
|
|
|
|
|
$self->{associationRules}{$rule}{support}=~s/\./,/;
|
261
|
0
|
|
|
|
|
|
$self->{associationRules}{$rule}{confidence}=~s/\./,/;
|
262
|
0
|
|
|
|
|
|
foreach my$metric(@metrics){
|
263
|
0
|
|
|
|
|
|
$self->{associationRules}{$rule}{lc$metric}=~s/\./,/;
|
264
|
|
|
|
|
|
|
}
|
265
|
0
|
|
|
|
|
|
print FILE "R$rule\t$self->{associationRules}{$rule}{support}\t$self->{associationRules}{$rule}{confidence}";
|
266
|
0
|
|
|
|
|
|
foreach my$metric(@metrics){
|
267
|
0
|
|
|
|
|
|
print FILE "\t$self->{associationRules}{$rule}{${\lc$metric}}";
|
|
0
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
}
|
269
|
0
|
|
|
|
|
|
print FILE "\n";
|
270
|
|
|
|
|
|
|
}
|
271
|
0
|
|
|
|
|
|
print FILE "\n";
|
272
|
0
|
|
|
|
|
|
foreach my$rule(sort{$a<=>$b}keys(%{$self->{associationRules}})){
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
273
|
0
|
|
|
|
|
|
print FILE "Rule R$rule: $self->{associationRules}{$rule}{implication}\n";
|
274
|
0
|
|
|
|
|
|
print FILE "Support: $self->{associationRules}{$rule}{support}\n";
|
275
|
0
|
|
|
|
|
|
print FILE "Confidence: $self->{associationRules}{$rule}{confidence}\n";
|
276
|
0
|
|
|
|
|
|
foreach my$metric(@metrics){
|
277
|
0
|
|
|
|
|
|
print FILE "$metric: $self->{associationRules}{$rule}{${\lc$metric}}\n";
|
|
0
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
}
|
279
|
0
|
|
|
|
|
|
print FILE "Items:\n";
|
280
|
0
|
|
|
|
|
|
foreach my$item(@{$self->{associationRules}{$rule}{items}}){
|
|
0
|
|
|
|
|
|
|
281
|
0
|
|
|
|
|
|
print FILE "$item $self->{keyItemsDescription}{$item}\n";
|
282
|
|
|
|
|
|
|
}
|
283
|
0
|
|
|
|
|
|
print FILE "\n";
|
284
|
|
|
|
|
|
|
}
|
285
|
0
|
|
|
|
|
|
print FILE "Frequent itemset: { @{$self->{frequentItemset}} }\n";
|
|
0
|
|
|
|
|
|
|
286
|
0
|
|
|
|
|
|
print FILE "Items:\n";
|
287
|
0
|
|
|
|
|
|
foreach my$item(@{$self->{frequentItemset}}){
|
|
0
|
|
|
|
|
|
|
288
|
0
|
|
|
|
|
|
print FILE "$item $self->{keyItemsDescription}{$item}\n";
|
289
|
|
|
|
|
|
|
}
|
290
|
0
|
|
|
|
|
|
close(FILE);
|
291
|
|
|
|
|
|
|
}
|
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
sub excel{
|
294
|
0
|
|
|
0
|
0
|
|
require Excel::Writer::XLSX;
|
295
|
0
|
0
|
|
|
|
|
if($self->{messages}){
|
296
|
0
|
|
|
|
|
|
print "\nExporting to excel $self->{pathOutputFiles}output_large_itemset_length_$self->{largeItemsetLength}.xlsx ...";
|
297
|
|
|
|
|
|
|
}
|
298
|
0
|
0
|
|
|
|
|
my $workbook = Excel::Writer::XLSX->new("$self->{pathOutputFiles}output_large_itemset_length_$self->{largeItemsetLength}.xlsx")
|
299
|
|
|
|
|
|
|
or die("\nError: $self->{pathOutputFiles}output_large_itemset_length_$self->{largeItemsetLength}.xlsx $!");
|
300
|
0
|
|
|
|
|
|
my $worksheet = $workbook->add_worksheet();
|
301
|
0
|
|
|
|
|
|
my $bold = $workbook->add_format(bold => 1);
|
302
|
0
|
|
|
|
|
|
my $headings = ['Rules', 'Support', 'Confidence'];
|
303
|
0
|
|
|
|
|
|
my@metrics=('Lift', 'Leverage', 'Conviction', 'Coverage', 'Correlation', 'Cosine', 'Laplace', 'Jaccard');
|
304
|
0
|
|
|
|
|
|
foreach my$metric(@metrics){
|
305
|
0
|
0
|
|
|
|
|
push@{$headings},$metric if defined $self->{metrics}{"min$metric"};
|
|
0
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
}
|
307
|
0
|
|
|
|
|
|
@metrics=@{$headings}[3..$#{$headings}];
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
308
|
0
|
|
|
|
|
|
my(@rules,@support,@confidence,@lift,@leverage,@conviction,@coverage,@correlation,@cosine,@laplace,@jaccard);
|
309
|
0
|
|
|
|
|
|
foreach my$rule(sort{$a<=>$b}keys(%{$self->{associationRules}})){
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
310
|
0
|
|
|
|
|
|
push @rules,"R$rule";
|
311
|
0
|
|
|
|
|
|
push @support,$self->{associationRules}{$rule}{support};
|
312
|
0
|
|
|
|
|
|
push @confidence,$self->{associationRules}{$rule}{confidence};
|
313
|
0
|
0
|
|
|
|
|
push @lift,$self->{associationRules}{$rule}{lift} if defined $self->{metrics}{minLift};
|
314
|
0
|
0
|
|
|
|
|
push @leverage,$self->{associationRules}{$rule}{leverage} if defined $self->{metrics}{minLeverage};
|
315
|
0
|
0
|
|
|
|
|
push @conviction,$self->{associationRules}{$rule}{conviction} if defined $self->{metrics}{minConviction};
|
316
|
0
|
0
|
|
|
|
|
push @coverage,$self->{associationRules}{$rule}{coverage} if defined $self->{metrics}{minCoverage};
|
317
|
0
|
0
|
|
|
|
|
push @correlation,$self->{associationRules}{$rule}{correlation} if defined $self->{metrics}{minCorrelation};
|
318
|
0
|
0
|
|
|
|
|
push @cosine,$self->{associationRules}{$rule}{cosine} if defined $self->{metrics}{minCosine};
|
319
|
0
|
0
|
|
|
|
|
push @laplace,$self->{associationRules}{$rule}{laplace} if defined $self->{metrics}{minLaplace};
|
320
|
0
|
0
|
|
|
|
|
push @jaccard,$self->{associationRules}{$rule}{jaccard} if defined $self->{metrics}{minJaccard};
|
321
|
|
|
|
|
|
|
}
|
322
|
0
|
|
|
|
|
|
my$line=(scalar(@rules)+1);
|
323
|
0
|
|
|
|
|
|
my@data=(\@rules,\@support,\@confidence);
|
324
|
0
|
0
|
|
|
|
|
push @data,\@lift if defined $self->{metrics}{minLift};
|
325
|
0
|
0
|
|
|
|
|
push @data,\@leverage if defined $self->{metrics}{minLeverage};
|
326
|
0
|
0
|
|
|
|
|
push @data,\@conviction if defined $self->{metrics}{minConviction};
|
327
|
0
|
0
|
|
|
|
|
push @data,\@coverage if defined $self->{metrics}{minCoverage};
|
328
|
0
|
0
|
|
|
|
|
push @data,\@correlation if defined $self->{metrics}{minCorrelation};
|
329
|
0
|
0
|
|
|
|
|
push @data,\@cosine if defined $self->{metrics}{minCosine};
|
330
|
0
|
0
|
|
|
|
|
push @data,\@laplace if defined $self->{metrics}{minLaplace};
|
331
|
0
|
0
|
|
|
|
|
push @data,\@jaccard if defined $self->{metrics}{minJaccard};
|
332
|
0
|
|
|
|
|
|
$worksheet->write('A1', $headings, $bold);
|
333
|
0
|
|
|
|
|
|
$worksheet->write('A2', \@data);
|
334
|
0
|
|
|
|
|
|
my$chart=$workbook->add_chart(type =>'column', embedded=>1);
|
335
|
0
|
|
|
|
|
|
my@columns=('B'..'M');
|
336
|
0
|
|
|
|
|
|
my$i=0;
|
337
|
0
|
|
|
|
|
|
$chart->add_series(
|
338
|
|
|
|
|
|
|
name => 'Support',
|
339
|
|
|
|
|
|
|
categories => '=Sheet1!$A$2:$A$'.$line,
|
340
|
|
|
|
|
|
|
values => '=Sheet1!$'.$columns[$i].'$2:$'.$columns[$i].'$'.$line,
|
341
|
|
|
|
|
|
|
);
|
342
|
0
|
|
|
|
|
|
$i++;
|
343
|
0
|
|
|
|
|
|
$chart->add_series(
|
344
|
|
|
|
|
|
|
name => 'Confidence',
|
345
|
|
|
|
|
|
|
categories => '=Sheet1!$A$2:$A$'.$line,
|
346
|
|
|
|
|
|
|
values => '=Sheet1!$'.$columns[$i].'$2:$'.$columns[$i].'$'.$line,
|
347
|
|
|
|
|
|
|
);
|
348
|
0
|
|
|
|
|
|
foreach my$metric(@metrics){
|
349
|
0
|
|
|
|
|
|
$i++;
|
350
|
0
|
|
|
|
|
|
$chart->add_series(
|
351
|
|
|
|
|
|
|
name => $metric,
|
352
|
|
|
|
|
|
|
categories => '=Sheet1!$A$2:$A$'.$line,
|
353
|
|
|
|
|
|
|
values => '=Sheet1!$'.$columns[$i].'$2:$'.$columns[$i].'$'.$line,
|
354
|
|
|
|
|
|
|
);
|
355
|
|
|
|
|
|
|
}
|
356
|
0
|
|
|
|
|
|
$worksheet->insert_chart($columns[($i+2)].'2', $chart);
|
357
|
0
|
|
|
|
|
|
$line+=2;
|
358
|
0
|
|
|
|
|
|
my $urlFormat = $workbook->add_format(
|
359
|
|
|
|
|
|
|
color => 'blue',
|
360
|
|
|
|
|
|
|
);
|
361
|
0
|
|
|
|
|
|
my$url=2;
|
362
|
0
|
|
|
|
|
|
foreach my$rule(sort{$a<=>$b}keys(%{$self->{associationRules}})){
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
363
|
0
|
|
|
|
|
|
$worksheet->write_url("A$line","internal:Sheet1!A$url",$urlFormat,"<");
|
364
|
0
|
|
|
|
|
|
$line++;
|
365
|
0
|
|
|
|
|
|
$worksheet->write_url("A$url","internal:Sheet1!A$line",$urlFormat,"R$rule");
|
366
|
0
|
|
|
|
|
|
$worksheet->write("A$line","Rule R$rule: $self->{associationRules}{$rule}{implication}");
|
367
|
0
|
|
|
|
|
|
$line++;
|
368
|
0
|
|
|
|
|
|
$worksheet->write("A$line","Support: $self->{associationRules}{$rule}{support}");
|
369
|
0
|
|
|
|
|
|
$line++;
|
370
|
0
|
|
|
|
|
|
$worksheet->write("A$line","Confidence: $self->{associationRules}{$rule}{confidence}");
|
371
|
0
|
|
|
|
|
|
$line++;
|
372
|
0
|
|
|
|
|
|
foreach my$metric(@metrics){
|
373
|
0
|
|
|
|
|
|
$worksheet->write("A$line","$metric: $self->{associationRules}{$rule}{${\lc$metric}}");
|
|
0
|
|
|
|
|
|
|
374
|
0
|
|
|
|
|
|
$line++;
|
375
|
|
|
|
|
|
|
}
|
376
|
0
|
|
|
|
|
|
$worksheet->write("A$line","Items:");
|
377
|
0
|
|
|
|
|
|
$line++;
|
378
|
0
|
|
|
|
|
|
foreach my$item(@{$self->{associationRules}{$rule}{items}}){
|
|
0
|
|
|
|
|
|
|
379
|
0
|
|
|
|
|
|
$worksheet->write("A$line","$item $self->{keyItemsDescription}{$item}");
|
380
|
0
|
|
|
|
|
|
$line++;
|
381
|
|
|
|
|
|
|
}
|
382
|
0
|
|
|
|
|
|
$line++;
|
383
|
0
|
|
|
|
|
|
$url++;
|
384
|
|
|
|
|
|
|
}
|
385
|
0
|
|
|
|
|
|
$worksheet->write("A$line","Frequent itemset: { @{$self->{frequentItemset}} }");
|
|
0
|
|
|
|
|
|
|
386
|
0
|
|
|
|
|
|
$line++;
|
387
|
0
|
|
|
|
|
|
$worksheet->write("A$line","Items:");
|
388
|
0
|
|
|
|
|
|
$line++;
|
389
|
0
|
|
|
|
|
|
foreach my$item(@{$self->{frequentItemset}}){
|
|
0
|
|
|
|
|
|
|
390
|
0
|
|
|
|
|
|
$worksheet->write("A$line","$item $self->{keyItemsDescription}{$item}");
|
391
|
0
|
|
|
|
|
|
$line++;
|
392
|
|
|
|
|
|
|
}
|
393
|
0
|
|
|
|
|
|
$workbook->close;
|
394
|
|
|
|
|
|
|
}
|
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
return 1;
|
397
|
|
|
|
|
|
|
__END__
|