File Coverage

blib/lib/NanoB2B/NER/Wekaman.pm
Criterion Covered Total %
statement 12 65 18.4
branch 0 16 0.0
condition n/a
subroutine 4 7 57.1
pod 0 2 0.0
total 16 90 17.7


line stmt bran cond sub pod time code
1             #!/usr/bin/perl
2             # NanoB2B-NER::NER::Wekaman
3             #
4             # Runs the ARFF files created by Arffman into weka accuracy files
5             # Version 1.5
6             #
7             # Program by Milk
8              
9             package NanoB2B::NER::Wekaman;
10              
11 1     1   6 use NanoB2B::UniversalRoutines;
  1         2  
  1         26  
12 1     1   4 use File::Path qw(make_path); #makes sub directories
  1         2  
  1         40  
13 1     1   5 use strict;
  1         2  
  1         25  
14 1     1   4 use warnings;
  1         2  
  1         462  
15              
16             #### GLOBAL VARIABLES ####
17              
18             #option variables
19             my $program_dir;
20             my $classifier = "weka.classifiers.bayes.NaiveBayes";
21             my $weka_size = "Xmx4G";
22             my @features;
23             my $buckets = 10;
24             my $debug = 0;
25              
26              
27             #universal subroutines object
28             my %uniParams = ();
29             my $uniSub;
30              
31              
32             #### A SIDEKICK IS RECRUITED ####
33              
34             # construction method to create a new Wekaman object
35             # input : $directory <-- the name of the directory for the files
36             # $features <-- the set of features to run on [e.g. omtpcs]
37             # \$type <-- the weka algorithm to run the set on [e.g. weka.classifiers.functions.SMO]
38             # \$weka_size <-- the size to for the memory allocation in the weka parameter [e.g. -Xmx6G]
39             # \$buckets <-- the number of buckets used for the k-fold cross validation
40             # \$debug <-- the set of features to run on [e.g. omtpcs]
41             # output : $self <-- an instance of the Wekaman object
42             sub new {
43             #grab class and parameters
44 0     0 0   my $self = {};
45 0           my $class = shift;
46 0 0         return undef if(ref $class);
47 0           my $params = shift;
48              
49             #bless this object
50 0           bless $self, $class;
51 0           $self->_init($params);
52              
53             #retrieve parameters for universal-routines
54 0           $uniParams{'debug'} = $debug;
55 0           $uniSub = NanoB2B::UniversalRoutines->new(\%uniParams);
56              
57             #return the object
58 0           return $self;
59             }
60             # method to initialize the NanoB2B::NER::Wekaman object.
61             # input : $parameters <- reference to a hash
62             # output:
63             sub _init {
64 0     0     my $self = shift;
65 0           my $params = shift;
66              
67 0 0         $params = {} if(!defined $params);
68              
69             # get some of the parameters
70 0           my $diroption = $params->{'directory'};
71 0           my $ftsoption = $params->{'features'};
72 0           my $bucketsNumoption = $params->{'buckets'};
73 0           my $typeoption = $params->{'type'};
74 0           my $sizeoption = $params->{'weka_size'};
75 0           my $debugoption = $params->{'debug'};
76              
77             #set the global variables
78 0 0         if(defined $debugoption){$debug = $debugoption;}
  0            
79 0 0         if(defined $diroption){$program_dir = $diroption;}
  0            
80 0 0         if(defined $bucketsNumoption){$buckets = $bucketsNumoption;}
  0            
81 0 0         if(defined $ftsoption){@features = split(' ', $ftsoption); }
  0            
82 0 0         if(defined $typeoption){$classifier = $typeoption};
  0            
83 0 0         if(defined $sizeoption){$weka_size = $sizeoption};
  0            
84             }
85              
86              
87             ############### NOW BACK TO THE WEKAMAN ################
88              
89             # runs the arff files through weka
90             # input : $name <-- the name of the file to run through weka
91             # output: (weka files)
92             sub weka_file{
93 0     0 0   my $self = shift;
94 0           my $name = shift;
95              
96 0           $name = lc($name);
97              
98             #split them up by sets
99 0           my @sets = ();
100 0           my $item = "_";
101 0           foreach my $fs (@features){
102 0           my $abbrev = substr($fs, 0, 1); #add to abbreviations for the name
103 0           $item .= $abbrev;
104 0           push(@sets, $item);
105             }
106              
107             #get the ending part of the classifier for the weka dir name
108 0           my @b = split(/\./, $classifier);
109 0           my $weka_dir = $b[$#b];
110              
111             #run each set through metamap and save the accuracy file
112 0           foreach my $set(@sets){
113             #set up the new folder
114 0           my $direct = "$program_dir/_WEKAS/$weka_dir/$name" . "_WEKA_DATA/$set";
115 0           make_path($direct);
116              
117             #prep the output accuracy file and the test and train files
118 0           my $acc = "WEKAMAN-$name/$set/";
119 0           for(my $a = 1; $a <= $buckets; $a++){
120 0           $| = 1;
121 0           $uniSub->printColorDebug("cyan", ("\r" . "$name - $set -- $a"));
122 0           my $TRAIN = "$program_dir/_ARFF/$name" . "_ARFF/$set/_train/$name" . "_train-$a.arff";
123 0           my $TEST = "$program_dir/_ARFF/$name" . "_ARFF/$set/_test/$name" . "_test-$a.arff";
124 0           my $WEK = $direct . "/$name" . "_accuracy_$a";
125              
126             #run weka and output
127 0           system "java $weka_size $classifier -T $TEST -t $TRAIN > $WEK";
128             }
129 0           $uniSub->printDebug("\n");
130             }
131             }
132              
133             1;