File Coverage

blib/lib/NanoB2B/NER/Modelman.pm

Criterion	Covered	Total	%
statement	12	63	19.0
branch	0	16	0.0
condition			n/a
subroutine	4	7	57.1
pod	0	2	0.0
total	16	88	18.1

line	stmt	bran	sub	pod	time	code
1						#!/usr/bin/perl
2						# NanoB2B-NER::NER::Modelman
3						#
4						# Turns the ARFF Train files into models and loads models with ARFF Test files
5						# Version 1.0
6						#
7						# Program by Milk
8
9						package NanoB2B::NER::Modelman;
10
11	1		1		7	use NanoB2B::UniversalRoutines;
	1				2
	1				27
12	1		1		5	use File::Path qw(make_path); #makes sub directories
	1				2
	1				41
13	1		1		13	use strict;
	1				7
	1				21
14	1		1		5	use warnings;
	1				3
	1				642
15
16						#### GLOBAL VARIABLES ####
17
18						#option variables
19						my $program_dir;
20						my $classifier = "weka.classifiers.bayes.NaiveBayes";
21						my $weka_size = "Xmx4G";
22						my @features;
23						my $buckets = 10;
24						my $debug = 0;
25
26
27						#hardcoded for now can be programmer later
28						my $C_val = 0.25;
29						my $M_val = 2;
30
31
32						#universal subroutines object
33						my %uniParams = ();
34						my $uniSub;
35
36
37						#### A CIVILLIAN IS SAVED ####
38
39						# construction method to create a new Wekaman object
40						# input : $directory <-- the name of the directory for the files
41						# $features <-- the set of features to run on [e.g. omtpcs]
42						# \$type <-- the weka algorithm to run the set on [e.g. weka.classifiers.functions.SMO]
43						# \$weka_size <-- the size to for the memory allocation in the weka parameter [e.g. -Xmx6G]
44						# \$buckets <-- the number of buckets used for the k-fold cross validation
45						# \$debug <-- the set of features to run on [e.g. omtpcs]
46						# output : $self <-- an instance of the Wekaman object
47						sub new {
48						#grab class and parameters
49	0		0	0		my $self = {};
50	0					my $class = shift;
51	0	0				return undef if(ref $class);
52	0					my $params = shift;
53
54						#bless this object
55	0					bless $self, $class;
56	0					$self->_init($params);
57
58						#retrieve parameters for universal-routines
59	0					$uniParams{'debug'} = $debug;
60	0					$uniSub = NanoB2B::UniversalRoutines->new(\%uniParams);
61
62						#return the object
63	0					return $self;
64						}
65						# method to initialize the NanoB2B::NER::Wekaman object.
66						# input : $parameters <- reference to a hash
67						# output:
68						sub _init {
69	0		0			my $self = shift;
70	0					my $params = shift;
71
72	0	0				$params = {} if(!defined $params);
73
74						# get some of the parameters
75	0					my $diroption = $params->{'directory'};
76	0					my $ftsoption = $params->{'features'};
77	0					my $bucketsNumoption = $params->{'buckets'};
78	0					my $typeoption = $params->{'type'};
79	0					my $sizeoption = $params->{'weka_size'};
80	0					my $debugoption = $params->{'debug'};
81
82						#set the global variables
83	0	0				if(defined $debugoption){$debug = $debugoption;}
	0
84	0	0				if(defined $diroption){$program_dir = $diroption;}
	0
85	0	0				if(defined $bucketsNumoption){$buckets = $bucketsNumoption;}
	0
86	0	0				if(defined $ftsoption){@features = split(' ', $ftsoption); }
	0
87	0	0				if(defined $typeoption){$classifier = $typeoption};
	0
88	0	0				if(defined $sizeoption){$weka_size = $sizeoption};
	0
89						}
90
91
92						############### I'M AN EVERYDAY AVERAGE MODELMAN ################
93
94						# runs the arff files through weka to export models
95						# input : $name <-- the name of the file to run through weka - model maker
96						# output: (model files)
97						sub make_model_file{
98	0		0	0		my $self = shift;
99	0					my $name = shift;
100
101	0					$name = lc($name);
102
103						#split them up by sets
104	0					my @sets = ();
105	0					my $item = "_";
106	0					foreach my $fs (@features){
107	0					my $abbrev = substr($fs, 0, 1); #add to abbreviations for the name
108	0					$item .= $abbrev;
109	0					push(@sets, $item);
110						}
111
112						#get the ending part of the classifier for the weka dir name
113	0					my @b = split(/\./, $classifier);
114	0					my $weka_dir = $b[$#b];
115
116						#run each set through weka and save the accuracy file
117	0					foreach my $set(@sets){
118						#set up the new folder
119	0					my $direct = "$program_dir/_MODELS/$weka_dir/$name" . "_MODEL_DATA/$set";
120	0					make_path($direct);
121
122						#prep the output accuracy file and the test and train files
123	0					for(my $a = 1; $a <= $buckets; $a++){
124	0					$\| = 1;
125	0					$uniSub->printColorDebug("cyan", ("\r" . "$name - $set -- $a"));
126	0					my $TRAIN = "$program_dir/_ARFF/$name" . "_ARFF/$set/_train/$name" . "_train-$a.arff";
127	0					my $WEK = $direct . "/$name" . "_model_$a";
128
129						#run weka-modelling and output
130	0					system "java $weka_size $classifier -C $C_val -t $TRAIN -d $direct";
131						}
132	0					$uniSub->printDebug("\n");
133						}
134						}
135
136
137
138						1;