line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#!usr/bin/perl |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
###################################################################################### |
4
|
|
|
|
|
|
|
# # |
5
|
|
|
|
|
|
|
# Author: Clint Cuffy # |
6
|
|
|
|
|
|
|
# Date: 06/16/2016 # |
7
|
|
|
|
|
|
|
# Revised: 02/06/2017 # |
8
|
|
|
|
|
|
|
# UMLS Similarity Word2Phrase Executable Interface Module # |
9
|
|
|
|
|
|
|
# # |
10
|
|
|
|
|
|
|
###################################################################################### |
11
|
|
|
|
|
|
|
# # |
12
|
|
|
|
|
|
|
# Description: # |
13
|
|
|
|
|
|
|
# ============ # |
14
|
|
|
|
|
|
|
# Perl "word2phrase" executable interface for UMLS Similarity # |
15
|
|
|
|
|
|
|
# Features: # |
16
|
|
|
|
|
|
|
# ========= # |
17
|
|
|
|
|
|
|
# Supports Word2Phrase Training Using Standard Options # |
18
|
|
|
|
|
|
|
# # |
19
|
|
|
|
|
|
|
###################################################################################### |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
package Word2vec::Word2phrase; |
23
|
|
|
|
|
|
|
|
24
|
4
|
|
|
4
|
|
43739
|
use strict; |
|
4
|
|
|
|
|
15
|
|
|
4
|
|
|
|
|
104
|
|
25
|
4
|
|
|
4
|
|
42
|
use warnings; |
|
4
|
|
|
|
|
9
|
|
|
4
|
|
|
|
|
99
|
|
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
# Standard Package(s) |
28
|
4
|
|
|
4
|
|
16
|
use Cwd; |
|
4
|
|
|
|
|
8
|
|
|
4
|
|
|
|
|
229
|
|
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
|
31
|
4
|
|
|
4
|
|
27
|
use vars qw($VERSION); |
|
4
|
|
|
|
|
24
|
|
|
4
|
|
|
|
|
192
|
|
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
$VERSION = '0.02'; |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
###################################################################################### |
37
|
|
|
|
|
|
|
# Constructor |
38
|
|
|
|
|
|
|
###################################################################################### |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
BEGIN |
41
|
|
|
|
4
|
|
|
{ |
42
|
|
|
|
|
|
|
# CONSTRUCTOR : DO SOMETHING HERE |
43
|
|
|
|
|
|
|
} |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
###################################################################################### |
47
|
|
|
|
|
|
|
# Deconstructor |
48
|
|
|
|
|
|
|
###################################################################################### |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
END |
51
|
|
|
|
4
|
|
|
{ |
52
|
|
|
|
|
|
|
# DECONSTRUCTOR : DO SOMETHING HERE |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
###################################################################################### |
57
|
|
|
|
|
|
|
# new Class Operator |
58
|
|
|
|
|
|
|
###################################################################################### |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
sub new |
61
|
|
|
|
|
|
|
{ |
62
|
1
|
|
|
1
|
1
|
68
|
my $class = shift; |
63
|
1
|
|
|
|
|
8
|
my $self = { |
64
|
|
|
|
|
|
|
# Private Member Variables |
65
|
|
|
|
|
|
|
_debugLog => shift, # Boolean (Binary): 0 = False, 1 = True |
66
|
|
|
|
|
|
|
_writeLog => shift, # Boolean (Binary): 0 = False, 1 = True |
67
|
|
|
|
|
|
|
_trainFilePath => shift, # String |
68
|
|
|
|
|
|
|
_outputFilePath => shift, # String |
69
|
|
|
|
|
|
|
_minCount => shift, # Int |
70
|
|
|
|
|
|
|
_threshold => shift, # Int |
71
|
|
|
|
|
|
|
_setW2PDebug => shift, # Int |
72
|
|
|
|
|
|
|
_workingDir => shift, # String |
73
|
|
|
|
|
|
|
_word2PhraseExeDir => shift, # String |
74
|
|
|
|
|
|
|
_overwriteOldFile => shift, # Int |
75
|
|
|
|
|
|
|
}; |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# Set debug log variable to false if not defined |
78
|
1
|
50
|
|
|
|
5
|
$self->{ _debugLog } = 0 if !defined ( $self->{ _debugLog } ); |
79
|
1
|
50
|
|
|
|
3
|
$self->{ _writeLog } = 0 if !defined ( $self->{ _writeLog } ); |
80
|
1
|
50
|
|
|
|
4
|
$self->{ _trainFilePath } = "" if !defined ( $self->{ _trainFilePath } ); |
81
|
1
|
50
|
|
|
|
3
|
$self->{ _outputFilePath } = "" if !defined ( $self->{ _outputFilePath } ); |
82
|
1
|
50
|
|
|
|
3
|
$self->{ _minCount } = 5 if !defined ( $self->{ _minCount } ); |
83
|
1
|
50
|
|
|
|
3
|
$self->{ _threshold } = 100 if !defined ( $self->{ _threshold } ); |
84
|
1
|
50
|
|
|
|
4
|
$self->{ _setW2PDebug } = 2 if !defined ( $self->{ _setW2PDebug } ); |
85
|
1
|
50
|
|
|
|
8
|
$self->{ _workingDir } = Cwd::getcwd() if !defined ( $self->{ _workingDir } ); |
86
|
1
|
50
|
|
|
|
3
|
$self->{ _overwriteOldFile } = 0 if !defined ( $self->{ _overwriteOldFile } ); |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
# Try To Locate Word2Vec Executable Files Path |
90
|
1
|
|
|
|
|
3
|
for my $dir ( @INC ) |
91
|
|
|
|
|
|
|
{ |
92
|
11
|
50
|
|
|
|
119
|
$self->{ _word2PhraseExeDir } = "$dir/External/Word2vec" if ( -e "$dir/External/Word2vec" ); # Test Directory |
93
|
11
|
50
|
|
|
|
88
|
$self->{ _word2PhraseExeDir } = "$dir/../External/Word2vec" if ( -e "$dir/../External/Word2vec" ); # Dev Directory |
94
|
11
|
50
|
|
|
|
114
|
$self->{ _word2PhraseExeDir } = "$dir/../../External/Word2vec" if ( -e "$dir/../../External/Word2vec" ); # Dev Directory |
95
|
11
|
100
|
|
|
|
100
|
$self->{ _word2PhraseExeDir } = "$dir/Word2vec/External/Word2vec" if ( -e "$dir/Word2vec/External/Word2vec" ); # Release Directory |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
# Open File Handler if checked variable is true |
99
|
1
|
50
|
|
|
|
5
|
if( $self->{ _writeLog } ) |
100
|
|
|
|
|
|
|
{ |
101
|
0
|
|
|
|
|
0
|
open( $self->{ _fileHandle }, '>:encoding(UTF-8)', 'Word2phraseLog.txt' ); |
102
|
0
|
|
|
|
|
0
|
$self->{ _fileHandle }->autoflush( 1 ); # Auto-flushes writes to log file |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
1
|
|
|
|
|
2
|
bless $self, $class; |
106
|
|
|
|
|
|
|
|
107
|
1
|
|
|
|
|
5
|
$self->WriteLog( "New - Debug On" ); |
108
|
1
|
50
|
|
|
|
4
|
$self->WriteLog( "New - Word2Phrase Executable Directory Found" ) if defined( $self->{ _word2PhraseExeDir } ); |
109
|
1
|
50
|
|
|
|
5
|
$self->WriteLog( "New - Setting Word2Phrase Executable Directory To: \"" . $self->{ _word2PhraseExeDir } . "\"" ) if defined( $self->{ _word2PhraseExeDir } ); |
110
|
|
|
|
|
|
|
|
111
|
1
|
|
|
|
|
3
|
return $self; |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
###################################################################################### |
116
|
|
|
|
|
|
|
# DESTROY |
117
|
|
|
|
|
|
|
###################################################################################### |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
sub DESTROY |
120
|
|
|
|
|
|
|
{ |
121
|
1
|
|
|
1
|
|
2
|
my ( $self ) = @_; |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
# Close FileHandle |
124
|
1
|
50
|
|
|
|
39
|
close( $self->{ _fileHandle } ) if( $self->{ _fileHandle } ); |
125
|
|
|
|
|
|
|
} |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
###################################################################################### |
129
|
|
|
|
|
|
|
# Module Functions |
130
|
|
|
|
|
|
|
###################################################################################### |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
sub ExecuteTraining |
133
|
|
|
|
|
|
|
{ |
134
|
0
|
|
|
0
|
1
|
0
|
my ( $self, $trainFilePath, $outputFilePath, $minCount, $threshold, $debug, $overwrite ) = @_; |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
# Pre-Training Check(s) |
137
|
0
|
|
|
|
|
0
|
my $executableFileDir = $self->GetWord2PhraseExeDir() . "/word2phrase"; |
138
|
0
|
0
|
|
|
|
0
|
$executableFileDir .= ".exe" if $self->GetOSType() eq "MSWin32"; |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
# Override Train File Path Member Variable With Specified Train File Parameter |
141
|
0
|
0
|
|
|
|
0
|
$self->WriteLog( "ExecuteTraining - \"TrainFilePath\" Parameter Specified / Overriding Member Variable" ) if defined( $trainFilePath ); |
142
|
0
|
0
|
|
|
|
0
|
$trainFilePath = $self->GetTrainFilePath() if !defined( $trainFilePath ); |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
# Override Output File Path Member Variable With Specified Train File Parameter |
145
|
0
|
0
|
|
|
|
0
|
$self->WriteLog( "ExecuteTraining - \"OutputFilePath\" Parameter Specified / Overriding Member Variable" ) if defined( $outputFilePath ); |
146
|
0
|
0
|
|
|
|
0
|
$outputFilePath = $self->GetOutputFilePath() if !defined( $outputFilePath ); |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
# Override Overwrite Member Variable With Specified Train File Parameter |
149
|
0
|
0
|
|
|
|
0
|
$self->WriteLog( "ExecuteTraining - \"Overwrite\" Parameter Specified / Overriding Member Variable" ) if defined( $overwrite ); |
150
|
0
|
0
|
|
|
|
0
|
$overwrite = $self->GetOverwriteOldFile() if !defined( $overwrite ); |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
# Check For 'word2phrase' Executable and trainFile |
153
|
0
|
0
|
|
|
|
0
|
$self->WriteLog( "ExecuteTraining - Error: \"word2phrase\" Executable File Cannot Be Found" ) if !( -e $executableFileDir ); |
154
|
0
|
0
|
|
|
|
0
|
return -1 if !( -e $executableFileDir ); |
155
|
0
|
0
|
|
|
|
0
|
$self->WriteLog( "ExecuteTraining - Error: Training File Not Found" ) if !( -e "$trainFilePath" ); |
156
|
0
|
0
|
|
|
|
0
|
$self->WriteLog( "ExecuteTraining - Error: Training File Size = 0 bytes - No Data In Training File" ) if ( -z "$trainFilePath" ); |
157
|
0
|
0
|
0
|
|
|
0
|
return -1 if !( -e "$trainFilePath" ) || ( -z "$trainFilePath" ); |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
# Checks For Existing Output File And Returns -1 If Overwrite Option Is Not Enabled |
160
|
0
|
0
|
0
|
|
|
0
|
$self->WriteLog( "ExecuteTraining - Warning: \"$outputFilePath\" Already Exists - Canceling Training" ) if ( -e "$outputFilePath" && $overwrite == 0 ); |
161
|
0
|
0
|
0
|
|
|
0
|
$self->WriteLog( "ExecuteTraining - Try Enabling \"Overwrite\" Option or Delete \"$outputFilePath\" In Working Directory" ) if ( -e "$outputFilePath" && $overwrite == 0 ); |
162
|
0
|
0
|
0
|
|
|
0
|
return -1 if ( -e "$outputFilePath" && $overwrite == 0 ); |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
# Fetch Other Training Parameters |
165
|
0
|
0
|
|
|
|
0
|
$self->WriteLog( "ExecuteTraining - \"MinCount\" Parameter Defined / Overriding Member Variable" ) if defined( $minCount ); |
166
|
0
|
0
|
|
|
|
0
|
$minCount = $self->GetMinCount() if !defined( $minCount ); |
167
|
|
|
|
|
|
|
|
168
|
0
|
0
|
|
|
|
0
|
$self->WriteLog( "ExecuteTraining - \"Threshold\" Parameter Defined / Overriding Member Variable" ) if defined( $threshold ); |
169
|
0
|
0
|
|
|
|
0
|
$threshold = $self->GetThreshold() if !defined( $threshold ); |
170
|
|
|
|
|
|
|
|
171
|
0
|
0
|
|
|
|
0
|
$self->WriteLog( "ExecuteTraining - \"Debug\" Parameter Defined / Overriding Member Variable" ) if defined( $debug ); |
172
|
0
|
0
|
|
|
|
0
|
$debug = $self->GetW2PDebug() if !defined( $debug ); |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
# Setting Up Command String |
175
|
0
|
|
|
|
|
0
|
my $command = "\"$executableFileDir\" "; |
176
|
0
|
|
|
|
|
0
|
$command .= ( "-train \"" . $trainFilePath. "\" " ); |
177
|
0
|
|
|
|
|
0
|
$command .= ( "-output \"" . $outputFilePath . "\" " ); |
178
|
0
|
|
|
|
|
0
|
$command .= ( "-min-count " . $minCount . " " ); |
179
|
0
|
|
|
|
|
0
|
$command .= ( "-threshold " . $threshold . " " ); |
180
|
0
|
|
|
|
|
0
|
$command .= ( "-debug " . $debug . " " ); |
181
|
|
|
|
|
|
|
|
182
|
0
|
|
|
|
|
0
|
$self->WriteLog( "Executing Command: $command" ); |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
# Execute External System Command To Train "word2vec" |
185
|
|
|
|
|
|
|
# Execute command without capturing program output |
186
|
0
|
|
|
|
|
0
|
my $result = system( "$command" ); |
187
|
|
|
|
|
|
|
|
188
|
0
|
|
|
|
|
0
|
print "\n"; |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
# Post-Training Check(s) |
191
|
0
|
0
|
|
|
|
0
|
$self->WriteLog( "ExecuteTraining - Error: Unable To Spawn Executable File - Try Running '--clean' Command And Re-compile Executables" ) if ( $result == 65280 ); |
192
|
|
|
|
|
|
|
|
193
|
0
|
0
|
|
|
|
0
|
$self->WriteLog( "ExecuteTraining - Error: Word2Phrase Output File Does Not Exist" ) if !( -e "$outputFilePath" ); |
194
|
0
|
0
|
|
|
|
0
|
$self->WriteLog( "ExecuteTraining - Error: Word2Phrase Output File Size = Zero" ) if ( -z "$outputFilePath" ); |
195
|
0
|
0
|
0
|
|
|
0
|
$result = -1 if ( !( -e "$outputFilePath" ) || ( -z "$outputFilePath" ) ); |
196
|
|
|
|
|
|
|
|
197
|
0
|
0
|
0
|
|
|
0
|
$self->WriteLog( "ExecuteTraining - Training Successful" ) if $result == 0 && ( -e "$outputFilePath" ); |
198
|
0
|
0
|
|
|
|
0
|
$self->WriteLog( "ExecuteTraining - Training Unsuccessful" ) if $result != 0; |
199
|
|
|
|
|
|
|
|
200
|
0
|
|
|
|
|
0
|
return $result; |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
sub ExecuteStringTraining |
204
|
|
|
|
|
|
|
{ |
205
|
0
|
|
|
0
|
1
|
0
|
my ( $self, $trainingStr, $outputFilePath, $minCount, $threshold, $debug, $overwrite ) = @_; |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
# Check(s) |
208
|
0
|
0
|
|
|
|
0
|
$self->WriteLog( "ExecuteStringTraining - Error: Training String Is Not Defined" ) if !defined( $trainingStr ); |
209
|
0
|
0
|
|
|
|
0
|
return -1 if !defined( $trainingStr ); |
210
|
|
|
|
|
|
|
|
211
|
0
|
0
|
|
|
|
0
|
$self->WriteLog( "ExecuteStringTraining - Error: Training String Is Empty" ) if ( $trainingStr eq "" ); |
212
|
0
|
0
|
|
|
|
0
|
return -1 if ( $trainingStr eq "" ); |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
# Save Training String To Temporary File |
215
|
0
|
|
|
|
|
0
|
my $result = 0; |
216
|
|
|
|
|
|
|
|
217
|
0
|
|
|
|
|
0
|
$self->WriteLog( "ExecuteStringTraining - Saving Training String To Temporary File At Working Directory: \"" . $self->GetWorkingDir() . "\"" ); |
218
|
|
|
|
|
|
|
|
219
|
0
|
|
|
|
|
0
|
my $tempFilePath = $self->GetWorkingDir() . "/w2ptemp.txt"; |
220
|
0
|
0
|
|
|
|
0
|
open( my $fileHandle, ">:encoding(utf8)", "$tempFilePath" ) or $result = -1; |
221
|
|
|
|
|
|
|
|
222
|
0
|
0
|
|
|
|
0
|
$self->WriteLog( "ExecuteStringTraining - Error Creating File Handle : $!" ) if ( $result == -1 ); |
223
|
0
|
0
|
|
|
|
0
|
return -1 if ( $result == -1 ); |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
# Print Training String Data To File |
226
|
0
|
0
|
|
|
|
0
|
print( $fileHandle "$trainingStr" ) if defined( $fileHandle ); |
227
|
|
|
|
|
|
|
|
228
|
0
|
|
|
|
|
0
|
close( $fileHandle ); |
229
|
0
|
|
|
|
|
0
|
undef( $fileHandle ); |
230
|
|
|
|
|
|
|
|
231
|
0
|
|
|
|
|
0
|
$self->WriteLog( "ExecuteStringTraining - Temporary Training String File Saved" ); |
232
|
|
|
|
|
|
|
|
233
|
0
|
|
|
|
|
0
|
$result = $self->ExecuteTraining( "$tempFilePath", $outputFilePath, $minCount, $threshold, $debug, $overwrite ); |
234
|
|
|
|
|
|
|
|
235
|
0
|
|
|
|
|
0
|
$self->WriteLog( "ExecuteStringTraining - Removing Temporary Training String Data File" ); |
236
|
0
|
|
|
|
|
0
|
unlink( $tempFilePath ); |
237
|
|
|
|
|
|
|
|
238
|
0
|
0
|
|
|
|
0
|
$self->WriteLog( "ExecuteStringTraining - Finished" ) if ( $result == 0 ); |
239
|
0
|
0
|
0
|
|
|
0
|
$self->WriteLog( "ExecuteStringTraining - Finished With Errors" ) if ( $result == -1 && $self->GetWriteLog() == 0 ); |
240
|
0
|
0
|
0
|
|
|
0
|
$self->WriteLog( "ExecuteStringTraining - Finished With Errors / See Log File For Details" ) if ( $result == -1 && $self->GetWriteLog() == 1 ) ; |
241
|
|
|
|
|
|
|
|
242
|
0
|
|
|
|
|
0
|
return $result; |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
sub GetOSType |
246
|
|
|
|
|
|
|
{ |
247
|
0
|
|
|
0
|
1
|
0
|
my ( $self ) = @_; |
248
|
0
|
|
|
|
|
0
|
return $^O; |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
###################################################################################### |
253
|
|
|
|
|
|
|
# Accessors |
254
|
|
|
|
|
|
|
###################################################################################### |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
sub GetDebugLog |
257
|
|
|
|
|
|
|
{ |
258
|
4
|
|
|
4
|
1
|
389
|
my ( $self ) = @_; |
259
|
4
|
50
|
|
|
|
13
|
$self->{ _debugLog } = 0 if !defined ( $self->{ _debugLog } ); |
260
|
4
|
|
|
|
|
13
|
return $self->{ _debugLog }; |
261
|
|
|
|
|
|
|
} |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
sub GetWriteLog |
264
|
|
|
|
|
|
|
{ |
265
|
4
|
|
|
4
|
1
|
7
|
my ( $self ) = @_; |
266
|
4
|
50
|
|
|
|
10
|
$self->{ _writeLog } = 0 if !defined ( $self->{ _writeLog } ); |
267
|
4
|
|
|
|
|
10
|
return $self->{ _writeLog }; |
268
|
|
|
|
|
|
|
} |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
sub GetFileHandle |
271
|
|
|
|
|
|
|
{ |
272
|
1
|
|
|
1
|
1
|
2
|
my ( $self ) = @_; |
273
|
1
|
50
|
|
|
|
10
|
$self->{ _fileHandle } = undef if !defined ( $self->{ _fileHandle } ); |
274
|
1
|
|
|
|
|
4
|
return $self->{ _fileHandle }; |
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
sub GetTrainFilePath |
278
|
|
|
|
|
|
|
{ |
279
|
2
|
|
|
2
|
1
|
5
|
my ( $self ) = @_; |
280
|
2
|
50
|
|
|
|
5
|
$self->{ _trainFilePath } = "" if !defined ( $self->{ _trainFilePath } ); |
281
|
2
|
|
|
|
|
6
|
return $self->{ _trainFilePath }; |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
sub GetOutputFilePath |
285
|
|
|
|
|
|
|
{ |
286
|
2
|
|
|
2
|
1
|
4
|
my ( $self ) = @_; |
287
|
2
|
50
|
|
|
|
5
|
$self->{ _outputFilePath } = "" if !defined ( $self->{ _outputFilePath } ); |
288
|
2
|
|
|
|
|
7
|
return $self->{ _outputFilePath }; |
289
|
|
|
|
|
|
|
} |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
sub GetMinCount |
292
|
|
|
|
|
|
|
{ |
293
|
2
|
|
|
2
|
1
|
4
|
my ( $self ) = @_; |
294
|
2
|
50
|
|
|
|
5
|
$self->{ _minCount } = 5 if !defined ( $self->{ _minCount } ); |
295
|
2
|
|
|
|
|
6
|
return $self->{ _minCount }; |
296
|
|
|
|
|
|
|
} |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
sub GetThreshold |
299
|
|
|
|
|
|
|
{ |
300
|
2
|
|
|
2
|
1
|
4
|
my ( $self ) = @_; |
301
|
2
|
50
|
|
|
|
6
|
$self->{ _threshold } = 100 if !defined ( $self->{ _threshold } ); |
302
|
2
|
|
|
|
|
5
|
return $self->{ _threshold }; |
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
sub GetW2PDebug |
306
|
|
|
|
|
|
|
{ |
307
|
2
|
|
|
2
|
1
|
11
|
my ( $self ) = @_; |
308
|
2
|
50
|
|
|
|
6
|
$self->{ _setW2PDebug } = 2 if !defined ( $self->{ _setW2PDebug } ); |
309
|
2
|
|
|
|
|
31
|
return $self->{ _setW2PDebug }; |
310
|
|
|
|
|
|
|
} |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
sub GetWorkingDir |
313
|
|
|
|
|
|
|
{ |
314
|
3
|
|
|
3
|
1
|
6
|
my ( $self ) = @_; |
315
|
3
|
50
|
|
|
|
8
|
$self->{ _workingDir } = Cwd::getcwd() if !defined ( $self->{ _workingDir } ); |
316
|
3
|
|
|
|
|
13
|
return $self->{ _workingDir }; |
317
|
|
|
|
|
|
|
} |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
sub GetWord2PhraseExeDir |
320
|
|
|
|
|
|
|
{ |
321
|
3
|
|
|
3
|
1
|
18
|
my ( $self ) = @_; |
322
|
3
|
50
|
|
|
|
9
|
$self->{ _word2PhraseExeDir } = Cwd::getcwd() if !defined ( $self->{ _word2PhraseExeDir } ); |
323
|
3
|
|
|
|
|
12
|
return $self->{ _word2PhraseExeDir }; |
324
|
|
|
|
|
|
|
} |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
sub GetOverwriteOldFile |
327
|
|
|
|
|
|
|
{ |
328
|
2
|
|
|
2
|
1
|
6
|
my ( $self ) = @_; |
329
|
2
|
50
|
|
|
|
8
|
$self->{ _overwriteOldFile } = 0 if !defined ( $self->{ _overwriteOldFile } ); |
330
|
2
|
|
|
|
|
8
|
return $self->{ _overwriteOldFile }; |
331
|
|
|
|
|
|
|
} |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
###################################################################################### |
335
|
|
|
|
|
|
|
# Mutators |
336
|
|
|
|
|
|
|
###################################################################################### |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
sub SetTrainFilePath |
339
|
|
|
|
|
|
|
{ |
340
|
2
|
|
|
2
|
1
|
5
|
my ( $self, $temp ) = @_; |
341
|
2
|
50
|
|
|
|
7
|
return $self->{ _trainFilePath } = $temp if defined ( $temp ); |
342
|
|
|
|
|
|
|
} |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
sub SetOutputFilePath |
345
|
|
|
|
|
|
|
{ |
346
|
2
|
|
|
2
|
1
|
4
|
my ( $self, $temp ) = @_; |
347
|
2
|
50
|
|
|
|
7
|
return $self->{ _outputFilePath } = $temp if defined ( $temp ); |
348
|
|
|
|
|
|
|
} |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
sub SetMinCount |
351
|
|
|
|
|
|
|
{ |
352
|
2
|
|
|
2
|
1
|
4
|
my ( $self, $temp ) = @_; |
353
|
2
|
50
|
|
|
|
6
|
return $self->{ _minCount } = $temp if defined ( $temp ); |
354
|
|
|
|
|
|
|
} |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
sub SetThreshold |
357
|
|
|
|
|
|
|
{ |
358
|
2
|
|
|
2
|
1
|
5
|
my ( $self, $temp ) = @_; |
359
|
2
|
50
|
|
|
|
6
|
return $self->{ _threshold } = $temp if defined ( $temp ); |
360
|
|
|
|
|
|
|
} |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
sub SetW2PDebug |
363
|
|
|
|
|
|
|
{ |
364
|
1
|
|
|
1
|
1
|
2
|
my ( $self, $temp ) = @_; |
365
|
1
|
50
|
|
|
|
4
|
return $self->{ _setW2PDebug } = $temp if defined ( $temp ); |
366
|
|
|
|
|
|
|
} |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
sub SetWorkingDir |
369
|
|
|
|
|
|
|
{ |
370
|
2
|
|
|
2
|
1
|
8
|
my ( $self, $dir ) = @_; |
371
|
2
|
50
|
|
|
|
8
|
return $self->{ _workingDir } = $dir if defined ( $dir ); |
372
|
|
|
|
|
|
|
} |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
sub SetWord2PhraseExeDir |
375
|
|
|
|
|
|
|
{ |
376
|
2
|
|
|
2
|
1
|
11
|
my ( $self, $dir ) = @_; |
377
|
2
|
50
|
|
|
|
10
|
return $self->{ _word2PhraseExeDir } = $dir if defined ( $dir ); |
378
|
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
sub SetOverwriteOldFile |
381
|
|
|
|
|
|
|
{ |
382
|
2
|
|
|
2
|
1
|
5
|
my ( $self, $dir ) = @_; |
383
|
2
|
50
|
|
|
|
6
|
return $self->{ _overwriteOldFile } = $dir if defined ( $dir ); |
384
|
|
|
|
|
|
|
} |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
###################################################################################### |
388
|
|
|
|
|
|
|
# Debug Functions |
389
|
|
|
|
|
|
|
###################################################################################### |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
sub GetTime |
392
|
|
|
|
|
|
|
{ |
393
|
0
|
|
|
0
|
1
|
0
|
my ( $self ) = @_; |
394
|
0
|
|
|
|
|
0
|
my( $sec, $min, $hour ) = localtime(); |
395
|
|
|
|
|
|
|
|
396
|
0
|
0
|
|
|
|
0
|
if( $hour < 10 ) |
397
|
|
|
|
|
|
|
{ |
398
|
0
|
|
|
|
|
0
|
$hour = "0$hour"; |
399
|
|
|
|
|
|
|
} |
400
|
|
|
|
|
|
|
|
401
|
0
|
0
|
|
|
|
0
|
if( $min < 10 ) |
402
|
|
|
|
|
|
|
{ |
403
|
0
|
|
|
|
|
0
|
$min = "0$min"; |
404
|
|
|
|
|
|
|
} |
405
|
|
|
|
|
|
|
|
406
|
0
|
0
|
|
|
|
0
|
if( $sec < 10 ) |
407
|
|
|
|
|
|
|
{ |
408
|
0
|
|
|
|
|
0
|
$sec = "0$sec"; |
409
|
|
|
|
|
|
|
} |
410
|
|
|
|
|
|
|
|
411
|
0
|
|
|
|
|
0
|
return "$hour:$min:$sec"; |
412
|
|
|
|
|
|
|
} |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
sub GetDate |
415
|
|
|
|
|
|
|
{ |
416
|
0
|
|
|
0
|
1
|
0
|
my ( $self ) = @_; |
417
|
0
|
|
|
|
|
0
|
my ( $sec, $min, $hour, $mday, $mon, $year ) = localtime(); |
418
|
|
|
|
|
|
|
|
419
|
0
|
|
|
|
|
0
|
$mon += 1; |
420
|
0
|
|
|
|
|
0
|
$year += 1900; |
421
|
|
|
|
|
|
|
|
422
|
0
|
|
|
|
|
0
|
return "$mon/$mday/$year"; |
423
|
|
|
|
|
|
|
} |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
sub WriteLog |
426
|
|
|
|
|
|
|
{ |
427
|
3
|
|
|
3
|
1
|
6
|
my ( $self ) = shift; |
428
|
3
|
|
|
|
|
6
|
my $string = shift; |
429
|
3
|
|
|
|
|
4
|
my $printNewLine = shift; |
430
|
|
|
|
|
|
|
|
431
|
3
|
50
|
|
|
|
22
|
return if !defined ( $string ); |
432
|
3
|
50
|
|
|
|
9
|
$printNewLine = 1 if !defined ( $printNewLine ); |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
|
435
|
3
|
50
|
|
|
|
8
|
if( $self->GetDebugLog() ) |
436
|
|
|
|
|
|
|
{ |
437
|
0
|
0
|
|
|
|
0
|
if( ref ( $self ) ne "Word2vec::Word2phrase" ) |
438
|
|
|
|
|
|
|
{ |
439
|
0
|
|
|
|
|
0
|
print( GetDate() . " " . GetTime() . " - word2phrase: Cannot Call WriteLog() From Outside Module!\n" ); |
440
|
0
|
|
|
|
|
0
|
return; |
441
|
|
|
|
|
|
|
} |
442
|
|
|
|
|
|
|
|
443
|
0
|
0
|
|
|
|
0
|
$string = "" if !defined ( $string ); |
444
|
0
|
|
|
|
|
0
|
print GetDate() . " " . GetTime() . " - word2phrase::$string"; |
445
|
0
|
0
|
|
|
|
0
|
print "\n" if( $printNewLine != 0 ); |
446
|
|
|
|
|
|
|
} |
447
|
|
|
|
|
|
|
|
448
|
3
|
50
|
|
|
|
10
|
if( $self->GetWriteLog() ) |
449
|
|
|
|
|
|
|
{ |
450
|
0
|
0
|
|
|
|
|
if( ref ( $self ) ne "Word2vec::Word2phrase" ) |
451
|
|
|
|
|
|
|
{ |
452
|
0
|
|
|
|
|
|
print( GetDate() . " " . GetTime() . " - word2phrase: Cannot Call WriteLog() From Outside Module!\n" ); |
453
|
0
|
|
|
|
|
|
return; |
454
|
|
|
|
|
|
|
} |
455
|
|
|
|
|
|
|
|
456
|
0
|
|
|
|
|
|
my $fileHandle = $self->GetFileHandle(); |
457
|
|
|
|
|
|
|
|
458
|
0
|
0
|
|
|
|
|
if( defined( $fileHandle ) ) |
459
|
|
|
|
|
|
|
{ |
460
|
0
|
|
|
|
|
|
print( $fileHandle GetDate() . " " . GetTime() . " - word2phrase::$string" ); |
461
|
0
|
0
|
|
|
|
|
print( $fileHandle "\n" ) if( $printNewLine != 0 ); |
462
|
|
|
|
|
|
|
} |
463
|
|
|
|
|
|
|
} |
464
|
|
|
|
|
|
|
} |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
#################### All Modules Are To Output "1"(True) at EOF ###################### |
467
|
|
|
|
|
|
|
1; |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
=head1 NAME |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
Word2vec::Word2phrase - word2vec's word2phrase wrapper module. |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
=head1 SYNOPSIS |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
use Word2vec::Word2phrase; |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
479
|
|
|
|
|
|
|
$w2p->SetMinCount( 12 ); |
480
|
|
|
|
|
|
|
$w2p->SetMaxCount( 20 ); |
481
|
|
|
|
|
|
|
$w2p->SetTrainFilePath( "textCorpus.txt" ); |
482
|
|
|
|
|
|
|
$w2p->SetOutputFilePath( "phraseTextCorpus.txt" ); |
483
|
|
|
|
|
|
|
$w2p->ExecuteTraining(); |
484
|
|
|
|
|
|
|
undef( $w2p ); |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
# or |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
489
|
|
|
|
|
|
|
$w2p->ExecuteTraining( $trainFilePath, $outputFilePath, $minCount, $threshold, $debug, $overwrite ); |
490
|
|
|
|
|
|
|
undef( $w2p ); |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
=head1 DESCRIPTION |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
Word2vec::Word2phrase is a word2vec package tool that "compoundifies" bi-grams in a text corpus based on a minimum and maximum frequency. |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
=head2 Main Functions |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
=head3 new |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
Description: |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
Returns a new 'Word2vec::Word2phrase' module object. |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
Note: Specifying no parameters implies default options. |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
Default Parameters: |
507
|
|
|
|
|
|
|
debugLog = 0 |
508
|
|
|
|
|
|
|
writeLog = 0 |
509
|
|
|
|
|
|
|
trainFilePath = "" |
510
|
|
|
|
|
|
|
outputFilePath = "" |
511
|
|
|
|
|
|
|
minCount = 5 |
512
|
|
|
|
|
|
|
threshold = 100 |
513
|
|
|
|
|
|
|
setW2PDebug = 2 |
514
|
|
|
|
|
|
|
workingDir = Current Directory |
515
|
|
|
|
|
|
|
word2PhraseExeDir = Word2Phrase Executable Directory |
516
|
|
|
|
|
|
|
overwriteOldFile = 0 |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
Input: |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
$debugLog -> Instructs module to print debug statements to the console. (1 = True / 0 = False) |
521
|
|
|
|
|
|
|
$writeLog -> Instructs module to print debug statements to a log file. (1 = True / 0 = False) |
522
|
|
|
|
|
|
|
$trainFilePath -> Specifies the training text corpus for word2phrase training. (String) |
523
|
|
|
|
|
|
|
$outputFilePath -> Specifies the output path for post word2phrase training. (String) |
524
|
|
|
|
|
|
|
$minCount -> Specifies the minimum range value for bi-gram 'compoundification'. (Positive Integer) |
525
|
|
|
|
|
|
|
$threshold -> Specifies the maximum range value for bi-gram 'compoundification'. (Positive Integer) |
526
|
|
|
|
|
|
|
$setW2PDebug -> Specifies the word2phrase debug information parameter value to show during training. (Integer) |
527
|
|
|
|
|
|
|
$workingDir -> Specifies the current working directory. (String) |
528
|
|
|
|
|
|
|
$word2PhraseExeDir -> Specifies word2phrase executable directory. (String) |
529
|
|
|
|
|
|
|
$overwriteOldFile -> Instructs the module to either overwrite any existing data with the same output file name and path. ( '1' or '0' ) |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
Note: It is not recommended to specify all new() parameters, as it has not been thoroughly tested. |
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
Output: |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
Word2vec::Word2phrase object. |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
Example: |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
use Word2vec::Word2phrase; |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
undef( $w2p ); |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
=head3 DESTROY |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
Description: |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
Removes member variables and file handle from memory. |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
Input: |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
None |
554
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
Output: |
556
|
|
|
|
|
|
|
|
557
|
|
|
|
|
|
|
None |
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
Example: |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
use Word2vec::Word2phrase; |
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
$w2p->DESTROY(); |
566
|
|
|
|
|
|
|
undef( $w2p ); |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
=head3 ExecuteTraining |
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
Description: |
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
Executes word2phrase training based on parameters. Parameter variables have higher precedence than member variables. |
573
|
|
|
|
|
|
|
Any parameter specified will override its respective member variable. |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
Note: If no parameters are specified, this module executes word2phrase training based on preset member |
576
|
|
|
|
|
|
|
variables. Returns string regarding training status. |
577
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
Input: |
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
$trainFilePath -> Training text corpus file path |
581
|
|
|
|
|
|
|
$outputFilePath -> Vector binary file path |
582
|
|
|
|
|
|
|
$minCount -> Minimum bi-gram frequency (Positive Integer) |
583
|
|
|
|
|
|
|
$threshold -> Maximum bi-gram frequency (Positive Integer) |
584
|
|
|
|
|
|
|
$debug -> Displays word2phrase debug information during training. (0 = None, 1 = Show Debug Information, 2 = Show Even More Debug Information) |
585
|
|
|
|
|
|
|
$overwrite -> Overwrites old training file when executing training. (0 = False / 1 = True) |
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
Output: |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
$value -> '0' = Successful / '-1' = Un-successful |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
Example: |
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
use Word2vec::Word2phrase; |
594
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
596
|
|
|
|
|
|
|
$w2p->SetMinCount( 12 ); |
597
|
|
|
|
|
|
|
$w2p->SetMaxCount( 20 ); |
598
|
|
|
|
|
|
|
$w2p->SetTrainFilePath( "textCorpus.txt" ); |
599
|
|
|
|
|
|
|
$w2p->SetOutputFilePath( "phraseTextCorpus.txt" ); |
600
|
|
|
|
|
|
|
$w2p->ExecuteTraining(); |
601
|
|
|
|
|
|
|
undef( $w2p ); |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
# Or |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
use Word2vec::Word2phrase; |
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
608
|
|
|
|
|
|
|
$w2p->ExecuteTraining( "textCorpus.txt", "phraseTextCorpus.txt", 12, 20, 2, 1 ); |
609
|
|
|
|
|
|
|
undef( $w2p ); |
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
=head3 ExecuteStringTraining |
612
|
|
|
|
|
|
|
|
613
|
|
|
|
|
|
|
Description: |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
Executes word2phrase training based on parameters. Parameter variables have higher precedence than member variables. |
616
|
|
|
|
|
|
|
Any parameter specified will override its respective member variable. |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
Note: If no parameters are specified, this module executes word2phrase training based on preset member |
619
|
|
|
|
|
|
|
variables. Returns string regarding training status. |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
Input: |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
$trainingString -> String to train |
624
|
|
|
|
|
|
|
$outputFilePath -> Vector binary file path |
625
|
|
|
|
|
|
|
$minCount -> Minimum bi-gram frequency (Positive Integer) |
626
|
|
|
|
|
|
|
$threshold -> Maximum bi-gram frequency (Positive Integer) |
627
|
|
|
|
|
|
|
$debug -> Displays word2phrase debug information during training. (0 = None, 1 = Show Debug Information, 2 = Show Even More Debug Information) |
628
|
|
|
|
|
|
|
$overwrite -> Overwrites old training file when executing training. (0 = False / 1 = True) |
629
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
Output: |
631
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
$value -> '0' = Successful / '-1' = Un-successful |
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
Example: |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
use Word2vec::Word2phrase; |
637
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
639
|
|
|
|
|
|
|
$w2p->SetMinCount( 12 ); |
640
|
|
|
|
|
|
|
$w2p->SetMaxCount( 20 ); |
641
|
|
|
|
|
|
|
$w2p->SetTrainFilePath( "large string to train here" ); |
642
|
|
|
|
|
|
|
$w2p->SetOutputFilePath( "phraseTextCorpus.txt" ); |
643
|
|
|
|
|
|
|
$w2p->ExecuteTraining(); |
644
|
|
|
|
|
|
|
undef( $w2p ); |
645
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
# Or |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
use Word2vec::Word2phrase; |
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
651
|
|
|
|
|
|
|
$w2p->ExecuteTraining( "large string to train here", "phraseTextCorpus.txt", 12, 20, 2, 1 ); |
652
|
|
|
|
|
|
|
undef( $w2p ); |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
=head3 GetOSType |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
Description: |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
Returns the operating system type string. |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
Input: |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
None |
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
Output: |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
$string -> Operating system string. |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
Example: |
669
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
use Word2vec::Word2phrase; |
671
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
673
|
|
|
|
|
|
|
my $operatingSystem = $w2p->GetOSType(); |
674
|
|
|
|
|
|
|
print( "Operating System: $operatingSystem\n" ) if defined( $operatingSystem ); |
675
|
|
|
|
|
|
|
undef( $w2p ); |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
=head2 Accessor Functions |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
=head3 GetDebugLog |
680
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
Description: |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
Returns the _debugLog member variable set during Word2vec::Word2phrase object initialization of new function. |
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
Input: |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
None |
688
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
Output: |
690
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
$value -> 0 = False, 1 = True |
692
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
Example: |
694
|
|
|
|
|
|
|
|
695
|
|
|
|
|
|
|
use Word2vec::Word2phrase; |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
698
|
|
|
|
|
|
|
my $debugLog = $w2p->GetDebugLog(); |
699
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
print( "Debug Logging Enabled\n" ) if $debugLog == 1; |
701
|
|
|
|
|
|
|
print( "Debug Logging Disabled\n" ) if $debugLog == 0; |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
undef( $w2p ); |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
=head3 GetWriteLog |
706
|
|
|
|
|
|
|
|
707
|
|
|
|
|
|
|
Description: |
708
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
Returns the _writeLog member variable set during Word2vec::Word2phrase object initialization of new function. |
710
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
Input: |
712
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
None |
714
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
Output: |
716
|
|
|
|
|
|
|
|
717
|
|
|
|
|
|
|
$value -> 0 = False, 1 = True |
718
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
Example: |
720
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
use Word2vec::Word2phrase; |
722
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
724
|
|
|
|
|
|
|
my $writeLog = $w2p->GetWriteLog(); |
725
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
print( "Write Logging Enabled\n" ) if $writeLog == 1; |
727
|
|
|
|
|
|
|
print( "Write Logging Disabled\n" ) if $writeLog == 0; |
728
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
undef( $w2p ); |
730
|
|
|
|
|
|
|
|
731
|
|
|
|
|
|
|
=head3 GetFileHandle |
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
Description: |
734
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
Returns file handle used by WriteLog() method. |
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
Input: |
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
None |
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
Output: |
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
$fileHandle -> Returns file handle blob used by 'WriteLog()' function or undefined. |
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
Example: |
746
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
=head3 GetTrainFilePath |
750
|
|
|
|
|
|
|
|
751
|
|
|
|
|
|
|
Description: |
752
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
Returns (string) training file path. |
754
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
Input: |
756
|
|
|
|
|
|
|
|
757
|
|
|
|
|
|
|
None |
758
|
|
|
|
|
|
|
|
759
|
|
|
|
|
|
|
Output: |
760
|
|
|
|
|
|
|
|
761
|
|
|
|
|
|
|
$string -> word2phrase training file path |
762
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
Example: |
764
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
use Word2vec::Word2phrase; |
766
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
768
|
|
|
|
|
|
|
my $filePath = $w2p->GetTrainFilePath(); |
769
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
print( "Output File Path: $filePath\n" ) if defined( $filePath ); |
771
|
|
|
|
|
|
|
undef( $w2p ); |
772
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
=head3 GetOutputFilePath |
774
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
Description: |
776
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
Returns (string) output file path. |
778
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
Input: |
780
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
None |
782
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
Output: |
784
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
$string -> word2phrase output file path |
786
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
Example: |
788
|
|
|
|
|
|
|
|
789
|
|
|
|
|
|
|
use Word2vec::Word2phrase; |
790
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
792
|
|
|
|
|
|
|
my $filePath = $w2p->GetOutputFilePath(); |
793
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
print( "Output File Path: $filePath\n" ) if defined( $filePath ); |
795
|
|
|
|
|
|
|
undef( $w2p ); |
796
|
|
|
|
|
|
|
|
797
|
|
|
|
|
|
|
=head3 GetMinCount |
798
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
Description: |
800
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
Returns (integer) minimum bi-gram range. |
802
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
Input: |
804
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
None |
806
|
|
|
|
|
|
|
|
807
|
|
|
|
|
|
|
Output: |
808
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
$value -> Minimum bi-gram frequency (Positive Integer) |
810
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
Example: |
812
|
|
|
|
|
|
|
|
813
|
|
|
|
|
|
|
use Word2vec::Word2phrase; |
814
|
|
|
|
|
|
|
|
815
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
816
|
|
|
|
|
|
|
my $mincount = $w2p->GetMinCount(); |
817
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
print( "MinCount: $mincount\n" ) if defined( $mincount ); |
819
|
|
|
|
|
|
|
undef( $w2p ); |
820
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
=head3 GetThreshold |
822
|
|
|
|
|
|
|
|
823
|
|
|
|
|
|
|
Description: |
824
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
Returns (integer) maximum bi-gram range. |
826
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
Input: |
828
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
None |
830
|
|
|
|
|
|
|
|
831
|
|
|
|
|
|
|
Output: |
832
|
|
|
|
|
|
|
|
833
|
|
|
|
|
|
|
$value -> Maximum bi-gram frequency (Positive Integer) |
834
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
Example: |
836
|
|
|
|
|
|
|
|
837
|
|
|
|
|
|
|
use Word2vec::Word2phrase; |
838
|
|
|
|
|
|
|
|
839
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
840
|
|
|
|
|
|
|
my $mincount = $w2p->GetThreshold(); |
841
|
|
|
|
|
|
|
|
842
|
|
|
|
|
|
|
print( "MinCount: $mincount\n" ) if defined( $mincount ); |
843
|
|
|
|
|
|
|
undef( $w2p ); |
844
|
|
|
|
|
|
|
|
845
|
|
|
|
|
|
|
=head3 GetW2PDebug |
846
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
Description: |
848
|
|
|
|
|
|
|
|
849
|
|
|
|
|
|
|
Returns word2phrase debug parameter value. |
850
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
Input: |
852
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
None |
854
|
|
|
|
|
|
|
|
855
|
|
|
|
|
|
|
Output: |
856
|
|
|
|
|
|
|
|
857
|
|
|
|
|
|
|
$value -> 0 = No debugging, 1 = Show debugging, 2 = Show even more debugging |
858
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
Example: |
860
|
|
|
|
|
|
|
|
861
|
|
|
|
|
|
|
use Word2vec::Word2phrase; |
862
|
|
|
|
|
|
|
|
863
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
864
|
|
|
|
|
|
|
my $w2pdebug = $w2p->GetW2PDebug(); |
865
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
print( "Word2Phrase Debug Level: $w2pdebug\n" ) if defined( $w2pdebug ); |
867
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
undef( $w2p ); |
869
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
=head3 GetWorkingDir |
871
|
|
|
|
|
|
|
|
872
|
|
|
|
|
|
|
Description: |
873
|
|
|
|
|
|
|
|
874
|
|
|
|
|
|
|
Returns (string) working directory path. |
875
|
|
|
|
|
|
|
|
876
|
|
|
|
|
|
|
Input: |
877
|
|
|
|
|
|
|
|
878
|
|
|
|
|
|
|
None |
879
|
|
|
|
|
|
|
|
880
|
|
|
|
|
|
|
Output: |
881
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
$string -> Current working directory path |
883
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
Example: |
885
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
use Word2vec::Word2phrase; |
887
|
|
|
|
|
|
|
|
888
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
889
|
|
|
|
|
|
|
my $workingDir = $w2p->GetWorkingDir(); |
890
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
print( "Working Directory: $workingDir\n" ) if defined( $workingDir ); |
892
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
undef( $w2p ); |
894
|
|
|
|
|
|
|
|
895
|
|
|
|
|
|
|
=head3 GetWord2PhraseExeDir |
896
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
Description: |
898
|
|
|
|
|
|
|
|
899
|
|
|
|
|
|
|
Returns (string) word2phrase executable directory path. |
900
|
|
|
|
|
|
|
|
901
|
|
|
|
|
|
|
Input: |
902
|
|
|
|
|
|
|
|
903
|
|
|
|
|
|
|
None |
904
|
|
|
|
|
|
|
|
905
|
|
|
|
|
|
|
Output: |
906
|
|
|
|
|
|
|
|
907
|
|
|
|
|
|
|
$string -> Word2Phrase executable directory path |
908
|
|
|
|
|
|
|
|
909
|
|
|
|
|
|
|
Example: |
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
use Word2vec::Word2phrase; |
912
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
914
|
|
|
|
|
|
|
my $workingDir = $w2p->GetWord2PhraseExeDir(); |
915
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
print( "Word2Phrase Executable Directory: $workingDir\n" ) if defined( $workingDir ); |
917
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
undef( $w2p ); |
919
|
|
|
|
|
|
|
|
920
|
|
|
|
|
|
|
=head3 GetOverwriteOldFile |
921
|
|
|
|
|
|
|
|
922
|
|
|
|
|
|
|
Description: |
923
|
|
|
|
|
|
|
|
924
|
|
|
|
|
|
|
Returns the current value of the overwrite training file variable. |
925
|
|
|
|
|
|
|
|
926
|
|
|
|
|
|
|
Input: |
927
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
None |
929
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
Output: |
931
|
|
|
|
|
|
|
|
932
|
|
|
|
|
|
|
$value -> 1 = True/Overwrite or 0 = False/Append to current file |
933
|
|
|
|
|
|
|
|
934
|
|
|
|
|
|
|
Example: |
935
|
|
|
|
|
|
|
|
936
|
|
|
|
|
|
|
use Word2vec::Word2phrase; |
937
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
939
|
|
|
|
|
|
|
my $overwrite = $w2p->GetOverwriteOldFile(); |
940
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
if defined( $overwrite ) |
942
|
|
|
|
|
|
|
{ |
943
|
|
|
|
|
|
|
print( "Overwrite Old File: " ); |
944
|
|
|
|
|
|
|
print( "Yes\n" ) if $overwrite == 1; |
945
|
|
|
|
|
|
|
print( "No\n" ) if $overwrite == 0; |
946
|
|
|
|
|
|
|
} |
947
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
undef( $w2p ); |
949
|
|
|
|
|
|
|
|
950
|
|
|
|
|
|
|
=head2 Mutator Functions |
951
|
|
|
|
|
|
|
|
952
|
|
|
|
|
|
|
=head3 SetTrainFilePath |
953
|
|
|
|
|
|
|
|
954
|
|
|
|
|
|
|
Description: |
955
|
|
|
|
|
|
|
|
956
|
|
|
|
|
|
|
Sets training file path. |
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
Input: |
959
|
|
|
|
|
|
|
|
960
|
|
|
|
|
|
|
$string -> Training file path |
961
|
|
|
|
|
|
|
|
962
|
|
|
|
|
|
|
Output: |
963
|
|
|
|
|
|
|
|
964
|
|
|
|
|
|
|
None |
965
|
|
|
|
|
|
|
|
966
|
|
|
|
|
|
|
Example: |
967
|
|
|
|
|
|
|
|
968
|
|
|
|
|
|
|
use Word2vec::Word2phrase; |
969
|
|
|
|
|
|
|
|
970
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
971
|
|
|
|
|
|
|
$w2p->SetTrainFilePath( "filePath" ); |
972
|
|
|
|
|
|
|
|
973
|
|
|
|
|
|
|
undef( $w2p ); |
974
|
|
|
|
|
|
|
|
975
|
|
|
|
|
|
|
=head3 SetOutputFilePath |
976
|
|
|
|
|
|
|
|
977
|
|
|
|
|
|
|
Description: |
978
|
|
|
|
|
|
|
|
979
|
|
|
|
|
|
|
Sets word2phrase output file path. |
980
|
|
|
|
|
|
|
|
981
|
|
|
|
|
|
|
Input: |
982
|
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
$string -> word2phrase output file path |
984
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
Output: |
986
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
None |
988
|
|
|
|
|
|
|
|
989
|
|
|
|
|
|
|
Example: |
990
|
|
|
|
|
|
|
|
991
|
|
|
|
|
|
|
use Word2vec::Word2phrase; |
992
|
|
|
|
|
|
|
|
993
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
994
|
|
|
|
|
|
|
$w2p->SetOutputFilePath( "filePath" ); |
995
|
|
|
|
|
|
|
|
996
|
|
|
|
|
|
|
undef( $w2p ); |
997
|
|
|
|
|
|
|
|
998
|
|
|
|
|
|
|
=head3 SetMinCount |
999
|
|
|
|
|
|
|
|
1000
|
|
|
|
|
|
|
Description: |
1001
|
|
|
|
|
|
|
|
1002
|
|
|
|
|
|
|
Sets minimum range value. |
1003
|
|
|
|
|
|
|
|
1004
|
|
|
|
|
|
|
Input: |
1005
|
|
|
|
|
|
|
|
1006
|
|
|
|
|
|
|
$value -> Minimum frequency value (Positive integer) |
1007
|
|
|
|
|
|
|
|
1008
|
|
|
|
|
|
|
Output: |
1009
|
|
|
|
|
|
|
|
1010
|
|
|
|
|
|
|
None |
1011
|
|
|
|
|
|
|
|
1012
|
|
|
|
|
|
|
Example: |
1013
|
|
|
|
|
|
|
|
1014
|
|
|
|
|
|
|
use Word2vec::Word2phrase: |
1015
|
|
|
|
|
|
|
|
1016
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
1017
|
|
|
|
|
|
|
$w2p->SetMinCount( 1 ); |
1018
|
|
|
|
|
|
|
|
1019
|
|
|
|
|
|
|
undef( $w2p ); |
1020
|
|
|
|
|
|
|
|
1021
|
|
|
|
|
|
|
=head3 SetThreshold |
1022
|
|
|
|
|
|
|
|
1023
|
|
|
|
|
|
|
Description: |
1024
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
Sets maximum range value. |
1026
|
|
|
|
|
|
|
|
1027
|
|
|
|
|
|
|
Input: |
1028
|
|
|
|
|
|
|
|
1029
|
|
|
|
|
|
|
$value -> Maximum frequency value (Positive integer) |
1030
|
|
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
Output: |
1032
|
|
|
|
|
|
|
|
1033
|
|
|
|
|
|
|
None |
1034
|
|
|
|
|
|
|
|
1035
|
|
|
|
|
|
|
Example: |
1036
|
|
|
|
|
|
|
|
1037
|
|
|
|
|
|
|
use Word2vec::Word2phrase: |
1038
|
|
|
|
|
|
|
|
1039
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
1040
|
|
|
|
|
|
|
$w2p->SetThreshold( 100 ); |
1041
|
|
|
|
|
|
|
|
1042
|
|
|
|
|
|
|
undef( $w2p ); |
1043
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
=head3 SetW2PDebug |
1045
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
Description: |
1047
|
|
|
|
|
|
|
|
1048
|
|
|
|
|
|
|
Sets word2phrase debug parameter. |
1049
|
|
|
|
|
|
|
|
1050
|
|
|
|
|
|
|
Input: |
1051
|
|
|
|
|
|
|
|
1052
|
|
|
|
|
|
|
$value -> word2phrase debug parameter (0 = No debug info, 1 = Show debug info, 2 = Show more debug info.) |
1053
|
|
|
|
|
|
|
|
1054
|
|
|
|
|
|
|
Output: |
1055
|
|
|
|
|
|
|
|
1056
|
|
|
|
|
|
|
None |
1057
|
|
|
|
|
|
|
|
1058
|
|
|
|
|
|
|
Example: |
1059
|
|
|
|
|
|
|
|
1060
|
|
|
|
|
|
|
use Word2vec::Word2phrase: |
1061
|
|
|
|
|
|
|
|
1062
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
1063
|
|
|
|
|
|
|
$w2p->SetW2PDebug( 2 ); |
1064
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
undef( $w2p ); |
1066
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
=head3 SetWorkingDir |
1068
|
|
|
|
|
|
|
|
1069
|
|
|
|
|
|
|
Description: |
1070
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
Sets working directory path. |
1072
|
|
|
|
|
|
|
|
1073
|
|
|
|
|
|
|
Input: |
1074
|
|
|
|
|
|
|
|
1075
|
|
|
|
|
|
|
$string -> Current working directory path. |
1076
|
|
|
|
|
|
|
|
1077
|
|
|
|
|
|
|
Output: |
1078
|
|
|
|
|
|
|
|
1079
|
|
|
|
|
|
|
None |
1080
|
|
|
|
|
|
|
|
1081
|
|
|
|
|
|
|
Example: |
1082
|
|
|
|
|
|
|
|
1083
|
|
|
|
|
|
|
use Word2vec::Word2phrase: |
1084
|
|
|
|
|
|
|
|
1085
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
1086
|
|
|
|
|
|
|
$w2p->SetWorkingDir( "filePath" ); |
1087
|
|
|
|
|
|
|
|
1088
|
|
|
|
|
|
|
undef( $w2p ); |
1089
|
|
|
|
|
|
|
|
1090
|
|
|
|
|
|
|
=head3 SetWord2PhraseExeDir |
1091
|
|
|
|
|
|
|
|
1092
|
|
|
|
|
|
|
Description: |
1093
|
|
|
|
|
|
|
|
1094
|
|
|
|
|
|
|
Sets word2phrase executable file directory path. |
1095
|
|
|
|
|
|
|
|
1096
|
|
|
|
|
|
|
Input: |
1097
|
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
$string -> Word2Phrase executable directory path. |
1099
|
|
|
|
|
|
|
|
1100
|
|
|
|
|
|
|
Output: |
1101
|
|
|
|
|
|
|
|
1102
|
|
|
|
|
|
|
None |
1103
|
|
|
|
|
|
|
|
1104
|
|
|
|
|
|
|
Example: |
1105
|
|
|
|
|
|
|
|
1106
|
|
|
|
|
|
|
use Word2vec::Word2phrase: |
1107
|
|
|
|
|
|
|
|
1108
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
1109
|
|
|
|
|
|
|
$w2p->SetWord2PhraseExeDir( "filePath" ); |
1110
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
undef( $w2p ); |
1112
|
|
|
|
|
|
|
|
1113
|
|
|
|
|
|
|
=head3 SetOverwriteOldFile |
1114
|
|
|
|
|
|
|
|
1115
|
|
|
|
|
|
|
Description: |
1116
|
|
|
|
|
|
|
|
1117
|
|
|
|
|
|
|
Enables overwriting word2phrase output file if one already exists with the same output file name. |
1118
|
|
|
|
|
|
|
|
1119
|
|
|
|
|
|
|
Input: |
1120
|
|
|
|
|
|
|
|
1121
|
|
|
|
|
|
|
$value -> Integer: 1 = Overwrite old file, 0 = No not overwrite old file. |
1122
|
|
|
|
|
|
|
|
1123
|
|
|
|
|
|
|
Output: |
1124
|
|
|
|
|
|
|
|
1125
|
|
|
|
|
|
|
None |
1126
|
|
|
|
|
|
|
|
1127
|
|
|
|
|
|
|
Example: |
1128
|
|
|
|
|
|
|
|
1129
|
|
|
|
|
|
|
use Word2vec::Word2phrase: |
1130
|
|
|
|
|
|
|
|
1131
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
1132
|
|
|
|
|
|
|
$w2p->SetOverwriteOldFile( 1 ); |
1133
|
|
|
|
|
|
|
|
1134
|
|
|
|
|
|
|
undef( $w2p ); |
1135
|
|
|
|
|
|
|
|
1136
|
|
|
|
|
|
|
=head2 Debug Functions |
1137
|
|
|
|
|
|
|
|
1138
|
|
|
|
|
|
|
=head3 GetTime |
1139
|
|
|
|
|
|
|
|
1140
|
|
|
|
|
|
|
Description: |
1141
|
|
|
|
|
|
|
|
1142
|
|
|
|
|
|
|
Returns current time string in "Hour:Minute:Second" format. |
1143
|
|
|
|
|
|
|
|
1144
|
|
|
|
|
|
|
Input: |
1145
|
|
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
None |
1147
|
|
|
|
|
|
|
|
1148
|
|
|
|
|
|
|
Output: |
1149
|
|
|
|
|
|
|
|
1150
|
|
|
|
|
|
|
$string -> XX:XX:XX ("Hour:Minute:Second") |
1151
|
|
|
|
|
|
|
|
1152
|
|
|
|
|
|
|
Example: |
1153
|
|
|
|
|
|
|
|
1154
|
|
|
|
|
|
|
use Word2vec::Word2phrase: |
1155
|
|
|
|
|
|
|
|
1156
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
1157
|
|
|
|
|
|
|
my $time = $w2p->GetTime(); |
1158
|
|
|
|
|
|
|
|
1159
|
|
|
|
|
|
|
print( "Current Time: $time\n" ) if defined( $time ); |
1160
|
|
|
|
|
|
|
|
1161
|
|
|
|
|
|
|
undef( $w2p ); |
1162
|
|
|
|
|
|
|
|
1163
|
|
|
|
|
|
|
=head3 GetDate |
1164
|
|
|
|
|
|
|
|
1165
|
|
|
|
|
|
|
Description: |
1166
|
|
|
|
|
|
|
|
1167
|
|
|
|
|
|
|
Returns current month, day and year string in "Month/Day/Year" format. |
1168
|
|
|
|
|
|
|
|
1169
|
|
|
|
|
|
|
Input: |
1170
|
|
|
|
|
|
|
|
1171
|
|
|
|
|
|
|
None |
1172
|
|
|
|
|
|
|
|
1173
|
|
|
|
|
|
|
Output: |
1174
|
|
|
|
|
|
|
|
1175
|
|
|
|
|
|
|
$string -> XX/XX/XXXX ("Month/Day/Year") |
1176
|
|
|
|
|
|
|
|
1177
|
|
|
|
|
|
|
Example: |
1178
|
|
|
|
|
|
|
|
1179
|
|
|
|
|
|
|
use Word2vec::Word2phrase: |
1180
|
|
|
|
|
|
|
|
1181
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
1182
|
|
|
|
|
|
|
my $date = $w2p->GetDate(); |
1183
|
|
|
|
|
|
|
|
1184
|
|
|
|
|
|
|
print( "Current Date: $date\n" ) if defined( $date ); |
1185
|
|
|
|
|
|
|
|
1186
|
|
|
|
|
|
|
undef( $w2p ); |
1187
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
=head3 WriteLog |
1189
|
|
|
|
|
|
|
|
1190
|
|
|
|
|
|
|
Description: |
1191
|
|
|
|
|
|
|
|
1192
|
|
|
|
|
|
|
Prints passed string parameter to the console, log file or both depending on user options. |
1193
|
|
|
|
|
|
|
|
1194
|
|
|
|
|
|
|
Note: printNewLine parameter prints a new line character following the string if the parameter |
1195
|
|
|
|
|
|
|
is undefined and does not if parameter is 0. |
1196
|
|
|
|
|
|
|
|
1197
|
|
|
|
|
|
|
Input: |
1198
|
|
|
|
|
|
|
|
1199
|
|
|
|
|
|
|
$string -> String to print to the console/log file. |
1200
|
|
|
|
|
|
|
$value -> 0 = Do not print newline character after string, all else prints new line character including 'undef'. |
1201
|
|
|
|
|
|
|
|
1202
|
|
|
|
|
|
|
Output: |
1203
|
|
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
None |
1205
|
|
|
|
|
|
|
|
1206
|
|
|
|
|
|
|
Example: |
1207
|
|
|
|
|
|
|
|
1208
|
|
|
|
|
|
|
use Word2vec::Word2phrase: |
1209
|
|
|
|
|
|
|
|
1210
|
|
|
|
|
|
|
my $w2p = Word2vec::Word2phrase->new(); |
1211
|
|
|
|
|
|
|
$w2p->WriteLog( "Hello World" ); |
1212
|
|
|
|
|
|
|
|
1213
|
|
|
|
|
|
|
undef( $w2p ); |
1214
|
|
|
|
|
|
|
|
1215
|
|
|
|
|
|
|
=head1 Author |
1216
|
|
|
|
|
|
|
|
1217
|
|
|
|
|
|
|
Clint Cuffy, Virginia Commonwealth University |
1218
|
|
|
|
|
|
|
|
1219
|
|
|
|
|
|
|
=head1 COPYRIGHT |
1220
|
|
|
|
|
|
|
|
1221
|
|
|
|
|
|
|
Copyright (c) 2016 |
1222
|
|
|
|
|
|
|
|
1223
|
|
|
|
|
|
|
Bridget T McInnes, Virginia Commonwealth University |
1224
|
|
|
|
|
|
|
btmcinnes at vcu dot edu |
1225
|
|
|
|
|
|
|
|
1226
|
|
|
|
|
|
|
Clint Cuffy, Virginia Commonwealth University |
1227
|
|
|
|
|
|
|
cuffyca at vcu dot edu |
1228
|
|
|
|
|
|
|
|
1229
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
1230
|
|
|
|
|
|
|
under the terms of the GNU General Public License as published by the Free |
1231
|
|
|
|
|
|
|
Software Foundation; either version 2 of the License, or (at your option) |
1232
|
|
|
|
|
|
|
any later version. |
1233
|
|
|
|
|
|
|
|
1234
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, but WITHOUT |
1235
|
|
|
|
|
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
1236
|
|
|
|
|
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
1237
|
|
|
|
|
|
|
|
1238
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License along with |
1239
|
|
|
|
|
|
|
this program; if not, write to: |
1240
|
|
|
|
|
|
|
|
1241
|
|
|
|
|
|
|
The Free Software Foundation, Inc., |
1242
|
|
|
|
|
|
|
59 Temple Place - Suite 330, |
1243
|
|
|
|
|
|
|
Boston, MA 02111-1307, USA. |
1244
|
|
|
|
|
|
|
|
1245
|
|
|
|
|
|
|
=cut |