File Coverage

blib/lib/NanoB2B/NER/Wekaman.pm

Criterion	Covered	Total	%
statement	12	65	18.4
branch	0	16	0.0
condition			n/a
subroutine	4	7	57.1
pod	0	2	0.0
total	16	90	17.7

line	stmt	bran	sub	pod	time	code
1						#!/usr/bin/perl
2						# NanoB2B-NER::NER::Wekaman
3						#
4						# Runs the ARFF files created by Arffman into weka accuracy files
5						# Version 1.5
6						#
7						# Program by Milk
8
9						package NanoB2B::NER::Wekaman;
10
11	1		1		9	use NanoB2B::UniversalRoutines;
	1				3
	1				37
12	1		1		6	use File::Path qw(make_path); #makes sub directories
	1				2
	1				94
13	1		1		9	use strict;
	1				3
	1				31
14	1		1		8	use warnings;
	1				2
	1				722
15
16						#### GLOBAL VARIABLES ####
17
18						#option variables
19						my $program_dir;
20						my $classifier = "weka.classifiers.bayes.NaiveBayes";
21						my $weka_size = "Xmx4G";
22						my @features;
23						my $buckets = 10;
24						my $debug = 0;
25
26
27						#universal subroutines object
28						my %uniParams = ();
29						my $uniSub;
30
31
32						#### A SIDEKICK IS RECRUITED ####
33
34						# construction method to create a new Wekaman object
35						# input : $directory <-- the name of the directory for the files
36						# $features <-- the set of features to run on [e.g. omtpcs]
37						# \$type <-- the weka algorithm to run the set on [e.g. weka.classifiers.functions.SMO]
38						# \$weka_size <-- the size to for the memory allocation in the weka parameter [e.g. -Xmx6G]
39						# \$buckets <-- the number of buckets used for the k-fold cross validation
40						# \$debug <-- the set of features to run on [e.g. omtpcs]
41						# output : $self <-- an instance of the Wekaman object
42						sub new {
43						#grab class and parameters
44	0		0	0		my $self = {};
45	0					my $class = shift;
46	0	0				return undef if(ref $class);
47	0					my $params = shift;
48
49						#bless this object
50	0					bless $self, $class;
51	0					$self->_init($params);
52
53						#retrieve parameters for universal-routines
54	0					$uniParams{'debug'} = $debug;
55	0					$uniSub = NanoB2B::UniversalRoutines->new(\%uniParams);
56
57						#return the object
58	0					return $self;
59						}
60						# method to initialize the NanoB2B::NER::Wekaman object.
61						# input : $parameters <- reference to a hash
62						# output:
63						sub _init {
64	0		0			my $self = shift;
65	0					my $params = shift;
66
67	0	0				$params = {} if(!defined $params);
68
69						# get some of the parameters
70	0					my $diroption = $params->{'directory'};
71	0					my $ftsoption = $params->{'features'};
72	0					my $bucketsNumoption = $params->{'buckets'};
73	0					my $typeoption = $params->{'type'};
74	0					my $sizeoption = $params->{'weka_size'};
75	0					my $debugoption = $params->{'debug'};
76
77						#set the global variables
78	0	0				if(defined $debugoption){$debug = $debugoption;}
	0
79	0	0				if(defined $diroption){$program_dir = $diroption;}
	0
80	0	0				if(defined $bucketsNumoption){$buckets = $bucketsNumoption;}
	0
81	0	0				if(defined $ftsoption){@features = split(' ', $ftsoption); }
	0
82	0	0				if(defined $typeoption){$classifier = $typeoption};
	0
83	0	0				if(defined $sizeoption){$weka_size = $sizeoption};
	0
84						}
85
86
87						############### NOW BACK TO THE WEKAMAN ################
88
89						# runs the arff files through weka
90						# input : $name <-- the name of the file to run through weka
91						# output: (weka files)
92						sub weka_file{
93	0		0	0		my $self = shift;
94	0					my $name = shift;
95
96	0					$name = lc($name);
97
98						#split them up by sets
99	0					my @sets = ();
100	0					my $item = "_";
101	0					foreach my $fs (@features){
102	0					my $abbrev = substr($fs, 0, 1); #add to abbreviations for the name
103	0					$item .= $abbrev;
104	0					push(@sets, $item);
105						}
106
107						#get the ending part of the classifier for the weka dir name
108	0					my @b = split(/\./, $classifier);
109	0					my $weka_dir = $b[$#b];
110
111						#run each set through weka and save the accuracy file
112	0					foreach my $set(@sets){
113						#set up the new folder
114	0					my $direct = "$program_dir/_WEKAS/$weka_dir/$name" . "_WEKA_DATA/$set";
115	0					make_path($direct);
116
117						#prep the output accuracy file and the test and train files
118	0					my $acc = "WEKAMAN-$name/$set/";
119	0					for(my $a = 1; $a <= $buckets; $a++){
120	0					$\| = 1;
121	0					$uniSub->printColorDebug("cyan", ("\r" . "$name - $set -- $a"));
122	0					my $TRAIN = "$program_dir/_ARFF/$name" . "_ARFF/$set/_train/$name" . "_train-$a.arff";
123	0					my $TEST = "$program_dir/_ARFF/$name" . "_ARFF/$set/_test/$name" . "_test-$a.arff";
124	0					my $WEK = $direct . "/$name" . "_accuracy_$a";
125
126						#run weka and output
127	0					system "java $weka_size $classifier -T $TEST -t $TRAIN > $WEK";
128						}
129	0					$uniSub->printDebug("\n");
130						}
131						}
132
133						1;