line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#!usr/bin/perl
|
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
######################################################################################
|
4
|
|
|
|
|
|
|
# #
|
5
|
|
|
|
|
|
|
# Author: Clint Cuffy #
|
6
|
|
|
|
|
|
|
# Date: 06/16/2016 #
|
7
|
|
|
|
|
|
|
# Revised: 03/21/2017 #
|
8
|
|
|
|
|
|
|
# UMLS Similarity Word2Vec Package Interface Module #
|
9
|
|
|
|
|
|
|
# #
|
10
|
|
|
|
|
|
|
######################################################################################
|
11
|
|
|
|
|
|
|
# #
|
12
|
|
|
|
|
|
|
# Description: #
|
13
|
|
|
|
|
|
|
# ============ #
|
14
|
|
|
|
|
|
|
# Perl "word2vec" package interface for UMLS Similarity #
|
15
|
|
|
|
|
|
|
# #
|
16
|
|
|
|
|
|
|
######################################################################################
|
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
package Word2vec::Interface;
|
20
|
|
|
|
|
|
|
|
21
|
3
|
|
|
3
|
|
33626
|
use strict;
|
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
74
|
|
22
|
3
|
|
|
3
|
|
10
|
use warnings;
|
|
3
|
|
|
|
|
4
|
|
|
3
|
|
|
|
|
89
|
|
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
# Standard Package(s)
|
25
|
3
|
|
|
3
|
|
12
|
use Cwd;
|
|
3
|
|
|
|
|
9
|
|
|
3
|
|
|
|
|
172
|
|
26
|
3
|
|
|
3
|
|
1357
|
use File::Type;
|
|
3
|
|
|
|
|
40571
|
|
|
3
|
|
|
|
|
128
|
|
27
|
3
|
|
|
3
|
|
1885
|
use Sys::CpuAffinity;
|
|
3
|
|
|
|
|
134487
|
|
|
3
|
|
|
|
|
111
|
|
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
# Word2Vec Utility Package(s)
|
30
|
3
|
|
|
3
|
|
2254
|
use Word2vec::Word2vec;
|
|
3
|
|
|
|
|
11
|
|
|
3
|
|
|
|
|
132
|
|
31
|
3
|
|
|
3
|
|
1885
|
use Word2vec::Word2phrase;
|
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
75
|
|
32
|
3
|
|
|
3
|
|
1765
|
use Word2vec::Xmltow2v;
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
use Word2vec::Wsddata;
|
34
|
|
|
|
|
|
|
use Word2vec::Util;
|
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
use vars qw($VERSION);
|
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
$VERSION = '0.03';
|
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
######################################################################################
|
43
|
|
|
|
|
|
|
# Constructor
|
44
|
|
|
|
|
|
|
######################################################################################
|
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
BEGIN
|
47
|
|
|
|
|
|
|
{
|
48
|
|
|
|
|
|
|
# CONSTRUCTOR : DO SOMETHING HERE
|
49
|
|
|
|
|
|
|
}
|
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
######################################################################################
|
53
|
|
|
|
|
|
|
# Deconstructor
|
54
|
|
|
|
|
|
|
######################################################################################
|
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
END
|
57
|
|
|
|
|
|
|
{
|
58
|
|
|
|
|
|
|
# DECONSTRUCTOR : DO SOMETHING HERE
|
59
|
|
|
|
|
|
|
}
|
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
######################################################################################
|
63
|
|
|
|
|
|
|
# new Class Operator
|
64
|
|
|
|
|
|
|
######################################################################################
|
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
sub new
|
67
|
|
|
|
|
|
|
{
|
68
|
|
|
|
|
|
|
my $class = shift;
|
69
|
|
|
|
|
|
|
my $self = {
|
70
|
|
|
|
|
|
|
# Private Member Variables
|
71
|
|
|
|
|
|
|
_word2vecDir => shift, # String (word2vec package program directory)
|
72
|
|
|
|
|
|
|
_debugLog => shift, # Boolean (Binary): 0 = False, 1 = True
|
73
|
|
|
|
|
|
|
_writeLog => shift, # Boolean (Binary): 0 = False, 1 = True
|
74
|
|
|
|
|
|
|
_ignoreCompileErrors => shift, # Boolean (Binary): 0 = False, 1 = True
|
75
|
|
|
|
|
|
|
_ignoreFileChecks => shift, # Boolean (Binary): 0 = False, 1 = True
|
76
|
|
|
|
|
|
|
_exitFlag => shift, # Boolean (Binary): 0 = False, 1 = True
|
77
|
|
|
|
|
|
|
_workingDir => shift, # String (current working directory)
|
78
|
|
|
|
|
|
|
_word2vec => shift, # "Word2vec::Word2vec" module object
|
79
|
|
|
|
|
|
|
_word2phrase => shift, # "Word2vec::Word2phrase" module object
|
80
|
|
|
|
|
|
|
_xmltow2v => shift, # "Word2vec::Xmltow2v" module object
|
81
|
|
|
|
|
|
|
_util => shift, # "Word2vec::Util" module object
|
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
# Word Sense Disambiguation Variables
|
84
|
|
|
|
|
|
|
_instanceAry => shift, # Array Of 'Word2vec::Wsddata' Elements
|
85
|
|
|
|
|
|
|
_senseAry => shift, # Array Of 'Word2vec::Wsddata' Elements
|
86
|
|
|
|
|
|
|
_instanceCount => shift, # Integer
|
87
|
|
|
|
|
|
|
_senseCount => shift, # Integer
|
88
|
|
|
|
|
|
|
};
|
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# Set Variable Default If Not Defined
|
91
|
|
|
|
|
|
|
$self->{ _debugLog } = 0 if !defined ( $self->{ _debugLog } );
|
92
|
|
|
|
|
|
|
$self->{ _writeLog } = 0 if !defined ( $self->{ _writeLog } );
|
93
|
|
|
|
|
|
|
$self->{ _ignoreCompileErrors } = 1 if !defined ( $self->{ _ignoreCompileErrors } );
|
94
|
|
|
|
|
|
|
$self->{ _ignoreFileChecks } = 0 if !defined ( $self->{ _ignoreFileChecks } );
|
95
|
|
|
|
|
|
|
$self->{ _exitFlag } = 0 if !defined ( $self->{ _exitFlag } );
|
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
@{ $self->{ _instanceAry } } = () if !defined ( $self->{ _instanceAry } );
|
98
|
|
|
|
|
|
|
@{ $self->{ _instanceAry } } = @{ $self->{ _instanceAry } } if defined ( $self->{ _instanceAry } );
|
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
@{ $self->{ _senseAry } } = () if !defined ( $self->{ _senseAry } );
|
101
|
|
|
|
|
|
|
@{ $self->{ _senseAry } } = @{ $self->{ _instanceAry } } if defined ( $self->{ _senseAry } );
|
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
$self->{ _instanceCount } = 0 if !defined ( $self->{ _instanceCount } );
|
104
|
|
|
|
|
|
|
$self->{ _senseCount } = 0 if !defined ( $self->{ _senseCount } );
|
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
# Open File Handler if checked variable is true
|
107
|
|
|
|
|
|
|
if( $self->{ _writeLog } )
|
108
|
|
|
|
|
|
|
{
|
109
|
|
|
|
|
|
|
open( $self->{ _fileHandle }, '>:utf8', 'InterfaceLog.txt' );
|
110
|
|
|
|
|
|
|
$self->{ _fileHandle }->autoflush( 1 ); # Auto-flushes writes to log file
|
111
|
|
|
|
|
|
|
}
|
112
|
|
|
|
|
|
|
else
|
113
|
|
|
|
|
|
|
{
|
114
|
|
|
|
|
|
|
$self->{ _fileHandle } = undef;
|
115
|
|
|
|
|
|
|
}
|
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
bless $self, $class;
|
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
$self->WriteLog( "New - Debug On" );
|
120
|
|
|
|
|
|
|
$self->WriteLog( "New - No Working Directory Specified - Using Current Directory" ) if !defined( $self->{ _workingDir } );
|
121
|
|
|
|
|
|
|
$self->{ _workingDir } = Cwd::getcwd() if !defined( $self->{ _workingDir } );
|
122
|
|
|
|
|
|
|
$self->WriteLog( "New - Setting Working Directory To: \"" . $self->{ _workingDir } . "\"" ) if defined( $self->{ _workingDir } );
|
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
if( !defined( $self->{ _word2vecDir } ) )
|
125
|
|
|
|
|
|
|
{
|
126
|
|
|
|
|
|
|
$self->WriteLog( "New - No Word2Vec Directory Specified / Searching For Word2Vec Directory" );
|
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
for my $dir ( @INC )
|
129
|
|
|
|
|
|
|
{
|
130
|
|
|
|
|
|
|
$self->{ _word2vecDir } = "$dir/External/Word2vec" if ( -e "$dir/External/Word2vec" ); # Test Directory
|
131
|
|
|
|
|
|
|
$self->{ _word2vecDir } = "$dir/../External/Word2vec" if ( -e "$dir/../External/Word2vec" ); # Dev Directory
|
132
|
|
|
|
|
|
|
$self->{ _word2vecDir } = "$dir/../../External/Word2vec" if ( -e "$dir/../../External/Word2vec" ); # Dev Directory
|
133
|
|
|
|
|
|
|
$self->{ _word2vecDir } = "$dir/Word2vec/External/Word2vec" if ( -e "$dir/Word2vec/External/Word2vec" ); # Release Directory
|
134
|
|
|
|
|
|
|
}
|
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
$self->WriteLog( "New - Word2Vec Executable Directory Found" ) if defined( $self->{ _word2vecDir } );
|
137
|
|
|
|
|
|
|
$self->WriteLog( "New - Setting Word2Vec Executable Directory To: \"" . $self->{ _word2vecDir } . "\"" ) if defined( $self->{ _word2vecDir } );
|
138
|
|
|
|
|
|
|
}
|
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
# Initialize "Word2vec::Word2vec", "Word2vec::Word2phrase", "Word2vec::Xmltow2v" and "Word2vec::Util" modules
|
141
|
|
|
|
|
|
|
my $debugLog = $self->{ _debugLog };
|
142
|
|
|
|
|
|
|
my $writeLog = $self->{ _writeLog };
|
143
|
|
|
|
|
|
|
$self->{ _word2vec } = Word2vec::Word2vec->new( $debugLog, $writeLog ) if !defined ( $self->{ _word2vec } );
|
144
|
|
|
|
|
|
|
$self->{ _word2phrase } = Word2vec::Word2phrase->new( $debugLog, $writeLog ) if !defined ( $self->{ _word2phrase } );
|
145
|
|
|
|
|
|
|
$self->{ _xmltow2v } = Word2vec::Xmltow2v->new( $debugLog, $writeLog, 1, 1, 1, 1, 2 ) if !defined ( $self->{ _xmltow2v } );
|
146
|
|
|
|
|
|
|
$self->{ _util } = Word2vec::Util->new( $debugLog, $writeLog ) if !defined ( $self->{ _util } );
|
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
# Set word2vec Directory In Respective Objects
|
149
|
|
|
|
|
|
|
$self->{ _word2vec }->SetWord2VecExeDir( $self->{ _word2vecDir } );
|
150
|
|
|
|
|
|
|
$self->{ _word2phrase }->SetWord2PhraseExeDir( $self->{ _word2vecDir } );
|
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
# Run Word2Vec Package Executable/Source File Checks
|
153
|
|
|
|
|
|
|
my $result = $self->RunFileChecks( $self->{ _word2vecDir } );
|
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
# Set Exit Flag
|
156
|
|
|
|
|
|
|
$self->WriteLog( "New - Warning: An Error Has Occurred / Exit Flag Set" ) if $result == 0;
|
157
|
|
|
|
|
|
|
$self->{ _exitFlag } = $result;
|
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
return $self;
|
160
|
|
|
|
|
|
|
}
|
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
######################################################################################
|
164
|
|
|
|
|
|
|
# DESTROY
|
165
|
|
|
|
|
|
|
######################################################################################
|
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
sub DESTROY
|
168
|
|
|
|
|
|
|
{
|
169
|
|
|
|
|
|
|
my ( $self ) = @_;
|
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
# Close FileHandle
|
172
|
|
|
|
|
|
|
close( $self->{ _fileHandle } ) if( $self->{ _fileHandle } );
|
173
|
|
|
|
|
|
|
}
|
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
######################################################################################
|
177
|
|
|
|
|
|
|
# Module Functions
|
178
|
|
|
|
|
|
|
######################################################################################
|
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
sub RunFileChecks
|
181
|
|
|
|
|
|
|
{
|
182
|
|
|
|
|
|
|
my ( $self ) = shift;
|
183
|
|
|
|
|
|
|
my $dir = shift;
|
184
|
|
|
|
|
|
|
my $result = 0;
|
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
if ( $self->GetIgnoreFileChecks() == 1 )
|
187
|
|
|
|
|
|
|
{
|
188
|
|
|
|
|
|
|
$self->WriteLog( "RunFileChecks - Warning: Ignore File Checks = TRUE / Skipping File Checks" );
|
189
|
|
|
|
|
|
|
return 1;
|
190
|
|
|
|
|
|
|
}
|
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
# Check(s)
|
193
|
|
|
|
|
|
|
$self->WriteLog( "RunFileChecks - Error: Directory Not Defined" ) if !defined( $dir );
|
194
|
|
|
|
|
|
|
return 0 if !defined( $dir );
|
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
$self->WriteLog( "RunFileChecks - Error: Directory Does Not Exist" ) if !( -e $dir );
|
197
|
|
|
|
|
|
|
return 0 if !( -e $dir );
|
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
# OS Check - Ignore Compile Errors If Operating System Is Windows
|
200
|
|
|
|
|
|
|
$self->SetIgnoreCompileErrors( 1 ) if ( $self->GetOSType() eq "MSWin32" );
|
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
$self->WriteLog( "RunFileChecks - Running Module Check(s)" );
|
203
|
|
|
|
|
|
|
$self->WriteLog( "RunFileChecks - Word2Vec Dir: $dir" );
|
204
|
|
|
|
|
|
|
$self->WriteLog( "RunFileChecks - Word2Vec Directory Exists? Y" ) if ( -e "$dir" );
|
205
|
|
|
|
|
|
|
$self->WriteLog( "RunFileChecks - Error - Word2Vec Directory Exists? N" ) if !( -e "$dir" );
|
206
|
|
|
|
|
|
|
return 0 if !( -e "$dir" );
|
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
# List of executable files to check for
|
209
|
|
|
|
|
|
|
my @fileNameVtr = qw( compute-accuracy distance word2phrase word2vec word-analogy );
|
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
for my $fileName ( @fileNameVtr )
|
212
|
|
|
|
|
|
|
{
|
213
|
|
|
|
|
|
|
# Run file checks
|
214
|
|
|
|
|
|
|
if( $self->CheckIfExecutableFileExists( $dir, $fileName ) == 0 )
|
215
|
|
|
|
|
|
|
{
|
216
|
|
|
|
|
|
|
$result = $self->CheckIfSourceFileExists( $dir, $fileName );
|
217
|
|
|
|
|
|
|
$result = $self->_ModifyWord2VecSourceForWindows() if ( $result == 1 && $self->GetOSType() eq "MSWin32" );
|
218
|
|
|
|
|
|
|
$result = $self->CompileSourceFile( $dir, $fileName ) if( $result == 1 );
|
219
|
|
|
|
|
|
|
$result = $self->CheckIfExecutableFileExists( $dir, $fileName ) if( $result == 1 || $self->GetIgnoreCompileErrors() == 1 );
|
220
|
|
|
|
|
|
|
$self->_RemoveWord2VecSourceModification() if ( $result == 1 && $self->GetOSType() eq "MSWin32" );
|
221
|
|
|
|
|
|
|
}
|
222
|
|
|
|
|
|
|
else
|
223
|
|
|
|
|
|
|
{
|
224
|
|
|
|
|
|
|
$result = 1;
|
225
|
|
|
|
|
|
|
}
|
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
$self->WriteLog( "RunFileChecks - Failed Word2Vec File Checks" ) if $result == 0;
|
228
|
|
|
|
|
|
|
return 0 if $result == 0;
|
229
|
|
|
|
|
|
|
}
|
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
$self->WriteLog( "RunFileChecks - Passed Word2Vec File Checks" );
|
232
|
|
|
|
|
|
|
return $result;
|
233
|
|
|
|
|
|
|
}
|
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
sub CheckIfExecutableFileExists
|
236
|
|
|
|
|
|
|
{
|
237
|
|
|
|
|
|
|
my ( $self, $dir, $fileName ) = @_;
|
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
# OS Check
|
240
|
|
|
|
|
|
|
$fileName .= ".exe" if ( $self->GetOSType() eq "MSWin32" );
|
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
my $filePath = $dir . "/" . $fileName;
|
243
|
|
|
|
|
|
|
my $result = 0;
|
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
$self->WriteLog( "CheckIfExecutableFileExists - Checking For \"$fileName\" Executable File" );
|
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
# Check if the directory exists
|
248
|
|
|
|
|
|
|
$result = 1 if ( -e "$dir" );
|
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
# Continue if directory found
|
251
|
|
|
|
|
|
|
if ( $result == 1 )
|
252
|
|
|
|
|
|
|
{
|
253
|
|
|
|
|
|
|
# Check for executable file
|
254
|
|
|
|
|
|
|
$result = 0 if !( -e "$filePath" );
|
255
|
|
|
|
|
|
|
$result = 1 if ( -e "$filePath" );
|
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
# Check file type
|
258
|
|
|
|
|
|
|
my $fileType = "";
|
259
|
|
|
|
|
|
|
$fileType = $self->GetFileType( $filePath ) if $result == 1;
|
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
$result = 1 if $fileType eq "application/x-executable-file";
|
262
|
|
|
|
|
|
|
$self->WriteLog( "CheckIfExecutableFileExists - Executable File Found" ) if $result == 1;
|
263
|
|
|
|
|
|
|
$self->WriteLog( "CheckIfExecutableFileExists - Warning: Executable File Not Found" ) if $result == 0;
|
264
|
|
|
|
|
|
|
return $result;
|
265
|
|
|
|
|
|
|
}
|
266
|
|
|
|
|
|
|
else
|
267
|
|
|
|
|
|
|
{
|
268
|
|
|
|
|
|
|
$self->WriteLog( "CheckIfExecutableFileExists - Specified Directory Does Not Exist" );
|
269
|
|
|
|
|
|
|
}
|
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
return 0;
|
272
|
|
|
|
|
|
|
}
|
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
sub CheckIfSourceFileExists
|
275
|
|
|
|
|
|
|
{
|
276
|
|
|
|
|
|
|
my ( $self, $dir, $fileName ) = @_;
|
277
|
|
|
|
|
|
|
my $filePath = $dir . "/" . $fileName . ".c";
|
278
|
|
|
|
|
|
|
my $result = 0;
|
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
$self->WriteLog( "CheckIfSourceFileExists - Checking For \"$fileName.c\" Source File" );
|
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
# Check if the file/directory exists
|
283
|
|
|
|
|
|
|
$result = 1 if ( -e "$filePath" );
|
284
|
|
|
|
|
|
|
$self->WriteLog( "CheckIfSourceFileExists - Warning: File Does Not Exist" ) if $result == 0;
|
285
|
|
|
|
|
|
|
return 0 if $result == 0;
|
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
# Check file type
|
288
|
|
|
|
|
|
|
my $fileType = $self->GetFileType( $filePath );
|
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
$result = 1 if $fileType eq "text/cpp";
|
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
$self->WriteLog( "CheckIfSourceFileExists - File Exists" ) if $result == 1;
|
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
return $result;
|
295
|
|
|
|
|
|
|
}
|
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
sub CompileSourceFile
|
298
|
|
|
|
|
|
|
{
|
299
|
|
|
|
|
|
|
my ( $self, $dir, $fileName ) = @_;
|
300
|
|
|
|
|
|
|
my $executablePath = $dir . "/" . $fileName;
|
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
# Check if OS is Windows and adjust accordingly
|
303
|
|
|
|
|
|
|
$executablePath .= ".exe" if ( $self->GetOSType eq "MSWin32" );
|
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
$self->WriteLog( "CompileSourceFile - Compiling Source File \"$fileName.c\"" );
|
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
my $sourceName = "/" . $fileName . ".c";
|
308
|
|
|
|
|
|
|
$dir .= $sourceName;
|
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
my $result = 0;
|
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
# Execute External System Command To Compile "word2vec.c" Source File
|
313
|
|
|
|
|
|
|
# Execute command without capturing program output
|
314
|
|
|
|
|
|
|
system( "gcc \"$dir\" -o \"$executablePath\" -lm -pthread -O3 -march=native -funroll-loops -Wno-unused-result -Wno-int-to-pointer-cast" ) if $self->GetIgnoreCompileErrors() == 1;
|
315
|
|
|
|
|
|
|
system( "gcc \"$dir\" -o \"$executablePath\" -lm -pthread -O3 -march=native -Wall -funroll-loops -Wno-unused-result" ) if $self->GetIgnoreCompileErrors() == 0;
|
316
|
|
|
|
|
|
|
$result = 1 if ( $self->GetOSType() ne "MSWin32" && -e "$executablePath" && $self->GetFileType( $executablePath ) eq "application/x-executable-file" );
|
317
|
|
|
|
|
|
|
$result = 1 if ( $self->GetOSType() eq "MSWin32" && -e "$executablePath" && $self->GetFileType( $executablePath ) eq "application/x-ms-dos-executable" );
|
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
$self->WriteLog( "CompileSourceFile - Compile Failed" ) if $result == 0;
|
320
|
|
|
|
|
|
|
$self->WriteLog( "CompileSourceFile - Compiled Successfully") if $result == 1;
|
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
return $result;
|
323
|
|
|
|
|
|
|
}
|
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
sub GetFileType
|
326
|
|
|
|
|
|
|
{
|
327
|
|
|
|
|
|
|
my ( $self, $filePath ) = @_;
|
328
|
|
|
|
|
|
|
my $ft = File::Type->new();
|
329
|
|
|
|
|
|
|
my $fileType = $ft->checktype_filename( "$filePath" );
|
330
|
|
|
|
|
|
|
undef( $ft );
|
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
return $fileType;
|
333
|
|
|
|
|
|
|
}
|
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
sub GetOSType
|
336
|
|
|
|
|
|
|
{
|
337
|
|
|
|
|
|
|
my ( $self ) = @_;
|
338
|
|
|
|
|
|
|
return $^O;
|
339
|
|
|
|
|
|
|
}
|
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
sub _ModifyWord2VecSourceForWindows
|
342
|
|
|
|
|
|
|
{
|
343
|
|
|
|
|
|
|
my ( $self ) = @_;
|
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
my $result = 1;
|
346
|
|
|
|
|
|
|
my $tempStr = "";
|
347
|
|
|
|
|
|
|
my $modifiedCode = "#define posix_memalign(p, a, s) (((*(p)) = _aligned_malloc((s), (a))), *(p) ?0 :errno)\n";
|
348
|
|
|
|
|
|
|
my $workingDir = $self->GetWord2VecDir();
|
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
# Open "word2vec.c" and add $modifiedCode to list of #define statements
|
351
|
|
|
|
|
|
|
open( my $fileHandle, "<:", "$workingDir/word2vec.c" ) or $result = 0;
|
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
$self->WriteLog( "_ModifyWord2VecSourceForWindows - Error Opening \"word2vec.c\"" ) if $result == 0;
|
354
|
|
|
|
|
|
|
exit if $result == 0;
|
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
while( my $line = <$fileHandle> )
|
357
|
|
|
|
|
|
|
{
|
358
|
|
|
|
|
|
|
$tempStr .= "$line";
|
359
|
|
|
|
|
|
|
$tempStr .= $modifiedCode if ( index( $line, "#define MAX_CODE_LENGTH " ) != -1 );
|
360
|
|
|
|
|
|
|
}
|
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
close( $fileHandle );
|
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
# Write overwrite old file with modified file
|
365
|
|
|
|
|
|
|
open( $fileHandle, ">:", "$workingDir/word2vec.c" ) or die $self->WriteLog( "_ModifyWord2VecSourceForWindows - Error creating or writing file: \"word2vec.c\"" );
|
366
|
|
|
|
|
|
|
print $fileHandle $tempStr;
|
367
|
|
|
|
|
|
|
close( $fileHandle );
|
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
$tempStr = "";
|
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
return $result;
|
372
|
|
|
|
|
|
|
}
|
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
sub _RemoveWord2VecSourceModification
|
375
|
|
|
|
|
|
|
{
|
376
|
|
|
|
|
|
|
my ( $self ) = @_;
|
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
my $result = 1;
|
379
|
|
|
|
|
|
|
my $tempStr = "";
|
380
|
|
|
|
|
|
|
my $modifiedCode = "#define posix_memalign(p, a, s) (((*(p)) = _aligned_malloc((s), (a))), *(p) ?0 :errno)\n";
|
381
|
|
|
|
|
|
|
my $workingDir = $self->GetWord2VecDir();
|
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
# Open "word2vec.c" and remove $modifiedCode to list of #define statements
|
384
|
|
|
|
|
|
|
open( my $fileHandle, "<:", "$workingDir/word2vec.c" ) or $result = 0;
|
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
$self->WriteLog( "_RemoveWord2VecSourceModification - Error Opening \"word2vec.c\"" ) if $result == 0;
|
387
|
|
|
|
|
|
|
exit if $result == 0;
|
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
while( my $line = <$fileHandle> )
|
390
|
|
|
|
|
|
|
{
|
391
|
|
|
|
|
|
|
$tempStr .= "$line" if $line ne $modifiedCode;
|
392
|
|
|
|
|
|
|
}
|
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
close( $fileHandle );
|
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
# Write overwrite modified file with original file
|
397
|
|
|
|
|
|
|
open( $fileHandle, ">:", "$workingDir/word2vec.c" ) or die $self->WriteLog( "_RemoveWord2VecSourceModification - Error creating or writing file: \"word2vec.c\"" );
|
398
|
|
|
|
|
|
|
print $fileHandle $tempStr;
|
399
|
|
|
|
|
|
|
close( $fileHandle );
|
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
$tempStr = "";
|
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
return $result;
|
404
|
|
|
|
|
|
|
}
|
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
######################################################################################
|
408
|
|
|
|
|
|
|
# Interface Driver Module Functions
|
409
|
|
|
|
|
|
|
######################################################################################
|
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
sub CLComputeCosineSimilarity
|
412
|
|
|
|
|
|
|
{
|
413
|
|
|
|
|
|
|
my ( $self, $vectorBinaryFile, $wordA, $wordB ) = @_;
|
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
# Check(s)
|
416
|
|
|
|
|
|
|
$self->WriteLog( "CLComputeCosineSimilarity - Vector Data File Not Specified" ) if !defined( $vectorBinaryFile );
|
417
|
|
|
|
|
|
|
print( "Error: Vector Data File Does Not Exist\n" ) if !( -e "$vectorBinaryFile" ) && $self->GetDebugLog() == 0;
|
418
|
|
|
|
|
|
|
$self->WriteLog( "CLComputeCosineSimilarity - Vector Data File: \"$vectorBinaryFile\" Does Not Exist" ) if !( -e "$vectorBinaryFile" );
|
419
|
|
|
|
|
|
|
$self->WriteLog( "CLComputeCosineSimilarity - Two Words Required To Compute Cosine Similarity" ) if !defined( $wordA ) || !defined( $wordB );
|
420
|
|
|
|
|
|
|
return -1 if !defined( $vectorBinaryFile ) || !( -e "$vectorBinaryFile" ) || !defined( $wordA ) || !defined( $wordB );
|
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
$self->WriteLog( "CLComputeCosineSimilarity - Preparing To Compute Cosine Similarity Of Word Vectors: \"$wordA\" and \"$wordB\"" );
|
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
# Word2Vec Module Object
|
425
|
|
|
|
|
|
|
my $word2vec = $self->GetWord2VecHandler();
|
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
# Load vector data file (Binary/Text Data)
|
428
|
|
|
|
|
|
|
my $dataLoaded = $word2vec->ReadTrainedVectorDataFromFile( $vectorBinaryFile );
|
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
$self->WriteLog( "CLComputeCosineSimilarity - Unable To Load Vector Data From File: \"$vectorBinaryFile\"" ) if $dataLoaded == -1;
|
431
|
|
|
|
|
|
|
return -1 if $dataLoaded == -1;
|
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
my $value = $word2vec->ComputeCosineSimilarity( $wordA, $wordB );
|
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
$self->WriteLog( "CLComputeCosineSimilarity - Computed Cosine Similarity: $value" ) if defined( $value );
|
436
|
|
|
|
|
|
|
$self->WriteLog( "CLComputeCosineSimilarity - Error Computing Cosine Similarity" ) if !defined( $value );
|
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
# Clear Vector Data From Memory
|
439
|
|
|
|
|
|
|
$word2vec->ClearVocabularyHash();
|
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
return $value;
|
442
|
|
|
|
|
|
|
}
|
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
sub CLComputeMultiWordCosineSimilarity
|
445
|
|
|
|
|
|
|
{
|
446
|
|
|
|
|
|
|
my ( $self, $vectorBinaryFile, $wordA, $wordB ) = @_;
|
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
# Check(s)
|
449
|
|
|
|
|
|
|
$self->WriteLog( "CLComputeMultiWordCosineSimilarity - Vector Data File Not Specified" ) if !defined( $vectorBinaryFile );
|
450
|
|
|
|
|
|
|
print( "Error: Vector Data File Does Not Exist\n" ) if !( -e "$vectorBinaryFile" ) && $self->GetDebugLog() == 0;
|
451
|
|
|
|
|
|
|
$self->WriteLog( "CLComputeMultiWordCosineSimilarity - Vector Data File: \"$vectorBinaryFile\" Does Not Exist" ) if !( -e "$vectorBinaryFile" );
|
452
|
|
|
|
|
|
|
$self->WriteLog( "CLComputeMultiWordCosineSimilarity - Two Words Required To Compute Cosine Similarity" ) if !defined( $wordA ) || !defined( $wordB );
|
453
|
|
|
|
|
|
|
return -1 if !defined( $vectorBinaryFile ) || !( -e "$vectorBinaryFile" ) || !defined( $wordA ) || !defined( $wordB );
|
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
# Replace ':' With Space
|
456
|
|
|
|
|
|
|
$wordA =~ s/:/ /g;
|
457
|
|
|
|
|
|
|
$wordB =~ s/:/ /g;
|
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
$self->WriteLog( "CLComputeMultiWordCosineSimilarity - Preparing To Compute Cosine Similarity Of Word Vectors: \"$wordA\" and \"$wordB\"" );
|
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
# Word2Vec Module Object
|
462
|
|
|
|
|
|
|
my $word2vec = $self->GetWord2VecHandler();
|
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
# Load vector data file (Binary/Text Data)
|
465
|
|
|
|
|
|
|
my $dataLoaded = $word2vec->ReadTrainedVectorDataFromFile( $vectorBinaryFile );
|
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
$self->WriteLog( "CLComputeMultiWordCosineSimilarity - Unable To Load Vector Data From File: \"$vectorBinaryFile\"" ) if $dataLoaded == -1;
|
468
|
|
|
|
|
|
|
return -1 if $dataLoaded == -1;
|
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
my $value = $word2vec->ComputeMultiWordCosineSimilarity( $wordA, $wordB );
|
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
$self->WriteLog( "CLComputeMultiWordCosineSimilarity - Computed Multi-Word Cosine Similarity: $value" ) if defined( $value );
|
473
|
|
|
|
|
|
|
$self->WriteLog( "CLComputeMultiWordCosineSimilarity - Error Computing Cosine Similarity" ) if !defined( $value );
|
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
# Clear Vector Data From Memory
|
476
|
|
|
|
|
|
|
$word2vec->ClearVocabularyHash();
|
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
return $value;
|
479
|
|
|
|
|
|
|
}
|
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
sub CLComputeAvgOfWordsCosineSimilarity
|
482
|
|
|
|
|
|
|
{
|
483
|
|
|
|
|
|
|
my ( $self, $vectorBinaryFile, $wordA, $wordB ) = @_;
|
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
# Check(s)
|
486
|
|
|
|
|
|
|
$self->WriteLog( "CLComputeAvgOfWordsCosineSimilarity - Vector Data File Not Specified" ) if !defined( $vectorBinaryFile );
|
487
|
|
|
|
|
|
|
print( "Error: Vector Data File Does Not Exist\n" ) if !( -e "$vectorBinaryFile" ) && $self->GetDebugLog() == 0;
|
488
|
|
|
|
|
|
|
$self->WriteLog( "CLComputeAvgOfWordsCosineSimilarity - Vector Data File: \"$vectorBinaryFile\" Does Not Exist" ) if !( -e "$vectorBinaryFile" );
|
489
|
|
|
|
|
|
|
$self->WriteLog( "CLComputeAvgOfWordsCosineSimilarity - Two Words Required To Compute Cosine Similarity" ) if !defined( $wordA ) || !defined( $wordB );
|
490
|
|
|
|
|
|
|
return -1 if !defined( $vectorBinaryFile ) || !( -e "$vectorBinaryFile" ) || !defined( $wordA ) || !defined( $wordB );
|
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
# Replace ':' With Space
|
493
|
|
|
|
|
|
|
$wordA =~ s/:/ /g;
|
494
|
|
|
|
|
|
|
$wordB =~ s/:/ /g;
|
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
$self->WriteLog( "CLComputeAvgOfWordsCosineSimilarity - Preparing To Compute Cosine Similarity Of Word Vectors: \"$wordA\" and \"$wordB\"" );
|
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
# Word2Vec Module Object
|
499
|
|
|
|
|
|
|
my $word2vec = $self->GetWord2VecHandler();
|
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
# Load vector data file (Binary/Text Data)
|
502
|
|
|
|
|
|
|
my $dataLoaded = $word2vec->ReadTrainedVectorDataFromFile( $vectorBinaryFile );
|
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
$self->WriteLog( "CLComputeAvgOfWordsCosineSimilarity - Unable To Load Vector Data From File: \"$vectorBinaryFile\"" ) if $dataLoaded == -1;
|
505
|
|
|
|
|
|
|
return -1 if $dataLoaded == -1;
|
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
my $value = $word2vec->ComputeAvgOfWordsCosineSimilarity( $wordA, $wordB ) if ( defined( $wordA ) && defined( $wordB ) );
|
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
$self->WriteLog( "CLComputeAvgOfWordsCosineSimilarity - Computed Average Cosine Similarity: $value" ) if defined( $value );
|
510
|
|
|
|
|
|
|
$self->WriteLog( "CLComputeAvgOfWordsCosineSimilarity - Error Computing Cosine Similarity" ) if !defined( $value );
|
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
# Clear Vector Data From Memory
|
513
|
|
|
|
|
|
|
$word2vec->ClearVocabularyHash();
|
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
return $value;
|
516
|
|
|
|
|
|
|
}
|
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
sub CLMultiWordCosSimWithUserInput
|
519
|
|
|
|
|
|
|
{
|
520
|
|
|
|
|
|
|
my ( $self, $vectorBinaryFile ) = @_;
|
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
# Check(s)
|
523
|
|
|
|
|
|
|
return -1 if !defined( $vectorBinaryFile );
|
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
my $word2vec = $self->GetWord2VecHandler();
|
526
|
|
|
|
|
|
|
$word2vec->ReadTrainedVectorDataFromFile( $vectorBinaryFile );
|
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
print "Error Loading \"$vectorBinaryFile\"\n" if $word2vec->IsVectorDataInMemory() == 0;
|
529
|
|
|
|
|
|
|
$self->WriteLog( "CLMultiWordCosSimWithUserInput - Error Loading \"$vectorBinaryFile\"" ) if $word2vec->IsVectorDataInMemory() == 0;
|
530
|
|
|
|
|
|
|
return -1 if $word2vec->IsVectorDataInMemory() == 0;
|
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
$word2vec->MultiWordCosSimWithUserInput();
|
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
# Clear Vector Data From Memory
|
535
|
|
|
|
|
|
|
$word2vec->ClearVocabularyHash();
|
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
return 0;
|
538
|
|
|
|
|
|
|
}
|
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
sub CLAddTwoWordVectors
|
541
|
|
|
|
|
|
|
{
|
542
|
|
|
|
|
|
|
my ( $self, $vectorDataFilePath, $wordA, $wordB ) = @_;
|
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
# Check(s)
|
545
|
|
|
|
|
|
|
$self->WriteLog( "CLAddTwoWordVectors - Error: Word Vector A Not Defined" ) if !defined( $wordA );
|
546
|
|
|
|
|
|
|
return undef if !defined( $wordA );
|
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
$self->WriteLog( "CLAddTwoWordVectors - Error: Word Vector B Not Defined" ) if !defined( $wordB );
|
549
|
|
|
|
|
|
|
return undef if !defined( $wordB );
|
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
$self->WriteLog( "CLAddTwoWordVectors - Preparing To Add Two Word Vectors: \"$wordA\" and \"$wordB\"" );
|
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
$self->GetWord2VecHandler()->ReadTrainedVectorDataFromFile( $vectorDataFilePath );
|
554
|
|
|
|
|
|
|
$wordA = $self->GetWord2VecHandler()->GetWordVector( $wordA );
|
555
|
|
|
|
|
|
|
$wordB = $self->GetWord2VecHandler()->GetWordVector( $wordB );
|
556
|
|
|
|
|
|
|
|
557
|
|
|
|
|
|
|
$self->WriteLog( "CLAddTwoWordVectors - Error: Locating Word In Dictionary" ) if !defined( $wordA );
|
558
|
|
|
|
|
|
|
$self->WriteLog( "CLAddTwoWordVectors - Error: Locating Word In Dictionary" ) if !defined( $wordB );
|
559
|
|
|
|
|
|
|
return undef if ( !defined( $wordA ) || !defined( $wordB ) );
|
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
# Clear Vector Data From Memory
|
562
|
|
|
|
|
|
|
$self->GetWord2VecHandler()->ClearVocabularyHash();
|
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
# Removing Words From Vector Data Array
|
565
|
|
|
|
|
|
|
my @wordAry = split( ' ', $wordA, 2 );
|
566
|
|
|
|
|
|
|
my $firstWord = shift( @wordAry );
|
567
|
|
|
|
|
|
|
$wordA = $wordAry[0];
|
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
@wordAry = split( ' ', $wordB, 2 );
|
570
|
|
|
|
|
|
|
my $secondWord = shift( @wordAry );
|
571
|
|
|
|
|
|
|
$wordB = $wordAry[0];
|
572
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
undef( @wordAry );
|
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
$self->WriteLog( "CLAddTwoWordVectors - Adding Two Word Vectors: \n\n$firstWord: $wordA\n\n$secondWord: $wordB\n" ) if ( defined( $wordA ) && defined( $wordB ) );
|
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->AddTwoWordVectors( $wordA, $wordB ) if ( defined( $wordA ) && defined( $wordB ) );
|
578
|
|
|
|
|
|
|
return undef;
|
579
|
|
|
|
|
|
|
}
|
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
sub CLSubtractTwoWordVectors
|
582
|
|
|
|
|
|
|
{
|
583
|
|
|
|
|
|
|
my ( $self, $vectorDataFilePath, $wordA, $wordB ) = @_;
|
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
# Check(s)
|
586
|
|
|
|
|
|
|
$self->WriteLog( "CLSubtractTwoWordVectors - Error: Word Vector A Not Defined" ) if !defined( $wordA );
|
587
|
|
|
|
|
|
|
return undef if !defined( $wordA );
|
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
$self->WriteLog( "CLSubtractTwoWordVectors - Error: Word Vector B Not Defined" ) if !defined( $wordB );
|
590
|
|
|
|
|
|
|
return undef if !defined( $wordB );
|
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
$self->WriteLog( "CLSubtractTwoWordVectors - Preparing To Subtract Two Word Vectors: \"$wordA\" and \"$wordB\"" );
|
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
$self->GetWord2VecHandler()->ReadTrainedVectorDataFromFile( $vectorDataFilePath );
|
595
|
|
|
|
|
|
|
$wordA = $self->GetWord2VecHandler()->GetWordVector( $wordA );
|
596
|
|
|
|
|
|
|
$wordB = $self->GetWord2VecHandler()->GetWordVector( $wordB );
|
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
$self->WriteLog( "CLSubtractTwoWordVectors - Error: Locating Word In Dictionary" ) if !defined( $wordA );
|
599
|
|
|
|
|
|
|
$self->WriteLog( "CLSubtractTwoWordVectors - Error: Locating Word In Dictionary" ) if !defined( $wordB );
|
600
|
|
|
|
|
|
|
return undef if ( !defined( $wordA ) || !defined( $wordB ) );
|
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
# Clear Vector Data From Memory
|
603
|
|
|
|
|
|
|
$self->GetWord2VecHandler()->ClearVocabularyHash();
|
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
# Removing Words From Vector Data Array
|
606
|
|
|
|
|
|
|
my @wordAry = split( ' ', $wordA, 2 );
|
607
|
|
|
|
|
|
|
my $firstWord = shift( @wordAry );
|
608
|
|
|
|
|
|
|
$wordA = $wordAry[0];
|
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
@wordAry = split( ' ', $wordB, 2 );
|
611
|
|
|
|
|
|
|
my $secondWord = shift( @wordAry );
|
612
|
|
|
|
|
|
|
$wordB = $wordAry[0];
|
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
undef( @wordAry );
|
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
$self->WriteLog( "CLSubtractTwoWordVectors - Subtracting Two Word Vectors: \n\n$firstWord: $wordA\n\n$secondWord: $wordB\n" ) if ( defined( $wordA ) && defined( $wordB ) );
|
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SubtractTwoWordVectors( $wordA, $wordB ) if ( defined( $wordA ) && defined( $wordB ) );
|
619
|
|
|
|
|
|
|
return undef;
|
620
|
|
|
|
|
|
|
}
|
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
sub CLStartWord2VecTraining
|
623
|
|
|
|
|
|
|
{
|
624
|
|
|
|
|
|
|
my ( $self, $optionsHashRef ) = @_;
|
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
my %options = %{ $optionsHashRef };
|
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
# Word2Vec Module Object
|
629
|
|
|
|
|
|
|
my $word2vec = $self->GetWord2VecHandler();
|
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
# Parse and Set Word2Vec Options
|
632
|
|
|
|
|
|
|
for my $option ( keys %options )
|
633
|
|
|
|
|
|
|
{
|
634
|
|
|
|
|
|
|
$word2vec->SetTrainFilePath( $options{$option} ) if $option eq "-trainfile";
|
635
|
|
|
|
|
|
|
$word2vec->SetOutputFilePath( $options{$option} ) if $option eq "-outputfile";
|
636
|
|
|
|
|
|
|
$word2vec->SetWordVecSize( $options{$option} ) if $option eq "-size";
|
637
|
|
|
|
|
|
|
$word2vec->SetWindowSize( $options{$option} ) if $option eq "-window";
|
638
|
|
|
|
|
|
|
$word2vec->SetSample( $options{$option} ) if $option eq "-sample";
|
639
|
|
|
|
|
|
|
$word2vec->SetNegative( $options{$option} ) if $option eq "-negative";
|
640
|
|
|
|
|
|
|
$word2vec->SetHSoftMax( $options{$option} ) if $option eq "-hs";
|
641
|
|
|
|
|
|
|
$word2vec->SetBinaryOutput( $options{$option} ) if $option eq "-binary";
|
642
|
|
|
|
|
|
|
$word2vec->SetNumOfThreads( $options{$option} ) if $option eq "-threads";
|
643
|
|
|
|
|
|
|
$word2vec->SetNumOfIterations( $options{$option} ) if $option eq "-iter";
|
644
|
|
|
|
|
|
|
$word2vec->SetUseCBOW( $options{$option} ) if $option eq "-cbow";
|
645
|
|
|
|
|
|
|
$word2vec->SetClasses( $options{$option} ) if $option eq "-classes";
|
646
|
|
|
|
|
|
|
$word2vec->SetReadVocabFilePath( $options{$option} ) if $option eq "-read-vocab";
|
647
|
|
|
|
|
|
|
$word2vec->SetSaveVocabFilePath( $options{$option} ) if $option eq "-save-vocab";
|
648
|
|
|
|
|
|
|
$word2vec->SetDebugTraining( $options{$option} ) if $option eq "-debug";
|
649
|
|
|
|
|
|
|
$word2vec->SetOverwriteOldFile( $options{$option} ) if $option eq "-overwrite";
|
650
|
|
|
|
|
|
|
}
|
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
# Check(s)
|
653
|
|
|
|
|
|
|
my $trainFile = $word2vec->GetTrainFilePath();
|
654
|
|
|
|
|
|
|
my $outputFile = $word2vec->GetOutputFilePath();
|
655
|
|
|
|
|
|
|
$self->WriteLog( "CLStartWord2VecTraining - Error: No Training File Specified" ) if !defined( $trainFile ) || $trainFile eq "";
|
656
|
|
|
|
|
|
|
return -1 if !defined( $trainFile ) || $trainFile eq "";
|
657
|
|
|
|
|
|
|
$self->WriteLog( "CLStartWord2VecTraining - Error: Training File: \"$trainFile\" Does Not Exist" ) if !( -e "$trainFile" );
|
658
|
|
|
|
|
|
|
return -1 if !( -e "$trainFile" );
|
659
|
|
|
|
|
|
|
$self->WriteLog( "CLStartWord2VecTraining - Error: Training File Exists But Has No Data / File Size = 0 bytes" ) if ( -z "$trainFile" );
|
660
|
|
|
|
|
|
|
return -1 if ( -z "$trainFile" );
|
661
|
|
|
|
|
|
|
$self->WriteLog( "CLStartWord2VecTraining - Error: No Output File/Directory Specified" ) if !defined( $outputFile ) || $outputFile eq "";
|
662
|
|
|
|
|
|
|
return -1 if !defined( $outputFile ) || $outputFile eq "";
|
663
|
|
|
|
|
|
|
$self->WriteLog( "CLStartWord2VecTraining - Warning: No Word2Vec Options Specified - Using Default Options" ) if ( keys %options ) == 2;
|
664
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
$self->WriteLog( "CLStartWord2VecTraining - Starting Word2Vec Training" );
|
666
|
|
|
|
|
|
|
my $result = $word2vec->ExecuteTraining();
|
667
|
|
|
|
|
|
|
$self->WriteLog( "CLStartWord2VecTraining - Word2Vec Training Successful" ) if $result == 0;
|
668
|
|
|
|
|
|
|
$self->WriteLog( "CLStartWord2VecTraining - Word2Vec Training Not Successful" ) if $result != 0;
|
669
|
|
|
|
|
|
|
$self->WriteLog( "CLStartWord2VecTraining - See \"Word2vecLog.txt\" For Details" ) if $result != 0;
|
670
|
|
|
|
|
|
|
return $result;
|
671
|
|
|
|
|
|
|
}
|
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
sub CLStartWord2PhraseTraining
|
674
|
|
|
|
|
|
|
{
|
675
|
|
|
|
|
|
|
my ( $self, $optionsHashRef ) = @_;
|
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
my %options = %{ $optionsHashRef };
|
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
# Word2Vec Module Object
|
680
|
|
|
|
|
|
|
my $word2phrase = $self->GetWord2PhraseHandler();
|
681
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
# Parse and Set Word2Vec Options
|
683
|
|
|
|
|
|
|
for my $option ( keys %options )
|
684
|
|
|
|
|
|
|
{
|
685
|
|
|
|
|
|
|
$word2phrase->SetTrainFilePath( $options{$option} ) if $option eq "-trainfile";
|
686
|
|
|
|
|
|
|
$word2phrase->SetOutputFilePath( $options{$option} ) if $option eq "-outputfile";
|
687
|
|
|
|
|
|
|
$word2phrase->SetMinCount( $options{$option} ) if $option eq "-min-count";
|
688
|
|
|
|
|
|
|
$word2phrase->SetThreshold( $options{$option} ) if $option eq "-threshold";
|
689
|
|
|
|
|
|
|
$word2phrase->SetW2PDebug( $options{$option} ) if $option eq "-debug";
|
690
|
|
|
|
|
|
|
$word2phrase->SetOverwriteOldFile( $options{$option} ) if $option eq "-overwrite";
|
691
|
|
|
|
|
|
|
}
|
692
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
# Check(s)
|
694
|
|
|
|
|
|
|
my $trainFile = $word2phrase->GetTrainFilePath();
|
695
|
|
|
|
|
|
|
my $outputFile = $word2phrase->GetOutputFilePath();
|
696
|
|
|
|
|
|
|
$self->WriteLog( "CLStartWord2PhraseTraining - Error: No Training File Specified" ) if !defined( $trainFile ) || $trainFile eq "";
|
697
|
|
|
|
|
|
|
return -1 if !defined( $trainFile ) || $trainFile eq "";
|
698
|
|
|
|
|
|
|
$self->WriteLog( "CLStartWord2PhraseTraining - Error: Training File: \"$trainFile\" Does Not Exist" ) if !( -e "$trainFile" );
|
699
|
|
|
|
|
|
|
return -1 if !( -e "$trainFile" );
|
700
|
|
|
|
|
|
|
$self->WriteLog( "CLStartWord2PhraseTraining - Error: Training File Exists But Has No Data / File Size = 0 bytes" ) if ( -z "$trainFile" );
|
701
|
|
|
|
|
|
|
return -1 if ( -z "$trainFile" );
|
702
|
|
|
|
|
|
|
$self->WriteLog( "CLStartWord2PhraseTraining - Error: No Output File/Directory Specified" ) if !defined( $outputFile ) || $outputFile eq "";
|
703
|
|
|
|
|
|
|
return -1 if !defined( $outputFile ) || $outputFile eq "";
|
704
|
|
|
|
|
|
|
$self->WriteLog( "CLStartWord2PhraseTraining - Warning: No Word2Phrase Options Specified - Using Default Options" ) if ( keys %options ) == 2;
|
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
$self->WriteLog( "CLStartWord2PhraseTraining - Starting Word2Phrase Training" );
|
707
|
|
|
|
|
|
|
my $result = $word2phrase->ExecuteTraining();
|
708
|
|
|
|
|
|
|
$self->WriteLog( "CLStartWord2PhraseTraining - Word2Phrase Training Successful" ) if $result == 0;
|
709
|
|
|
|
|
|
|
$self->WriteLog( "CLStartWord2PhraseTraining - Word2Phrase Training Not Successful" ) if $result != 0;
|
710
|
|
|
|
|
|
|
$self->WriteLog( "CLStartWord2PhraseTraining - See \"Word2phraseLog.txt\" For Details" ) if $result != 0;
|
711
|
|
|
|
|
|
|
return $result;
|
712
|
|
|
|
|
|
|
}
|
713
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
sub CLCompileTextCorpus
|
715
|
|
|
|
|
|
|
{
|
716
|
|
|
|
|
|
|
my ( $self, $optionsHashRef ) = @_;
|
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
my %options = %{ $optionsHashRef };
|
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
# XMLToW2V Option Variables
|
721
|
|
|
|
|
|
|
my $workingDir = undef;
|
722
|
|
|
|
|
|
|
my $saveDir = undef;
|
723
|
|
|
|
|
|
|
my $startDate = undef;
|
724
|
|
|
|
|
|
|
my $endDate = undef;
|
725
|
|
|
|
|
|
|
my $storeTitle = undef;
|
726
|
|
|
|
|
|
|
my $storeAbstract = undef;
|
727
|
|
|
|
|
|
|
my $quickParse = undef;
|
728
|
|
|
|
|
|
|
my $compoundWordFile = undef;
|
729
|
|
|
|
|
|
|
my $numOfThreads = undef;
|
730
|
|
|
|
|
|
|
my $overwriteExistingFile = undef;
|
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
# Parse and Set XMLToW2V Options
|
733
|
|
|
|
|
|
|
for my $option ( keys %options )
|
734
|
|
|
|
|
|
|
{
|
735
|
|
|
|
|
|
|
$workingDir = $options{$option} if $option eq "-workdir";
|
736
|
|
|
|
|
|
|
$saveDir = $options{$option} if $option eq "-savedir";
|
737
|
|
|
|
|
|
|
$startDate = $options{$option} if $option eq "-startdate";
|
738
|
|
|
|
|
|
|
$endDate = $options{$option} if $option eq "-enddate";
|
739
|
|
|
|
|
|
|
$storeTitle = $options{$option} if $option eq "-title";
|
740
|
|
|
|
|
|
|
$storeAbstract = $options{$option} if $option eq "-abstract";
|
741
|
|
|
|
|
|
|
$quickParse = $options{$option} if $option eq "-qparse";
|
742
|
|
|
|
|
|
|
$compoundWordFile = $options{$option} if $option eq "-compwordfile";
|
743
|
|
|
|
|
|
|
$numOfThreads = $options{$option} if $option eq "-threads";
|
744
|
|
|
|
|
|
|
$overwriteExistingFile = $options{$option} if $option eq "-overwrite";
|
745
|
|
|
|
|
|
|
}
|
746
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
undef( $optionsHashRef );
|
748
|
|
|
|
|
|
|
undef( %options );
|
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
|
751
|
|
|
|
|
|
|
# Check(s)
|
752
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Warning: Working Directory Not Defined - Using Default Directory" ) if !defined( $workingDir );
|
753
|
|
|
|
|
|
|
$workingDir =$self->GetWorkingDirectory() if !defined( $workingDir );
|
754
|
|
|
|
|
|
|
print( "Warning: Save Directory Not Defined - Using Working Directory / Saving To \"text.txt\"\n" ) if !defined( $saveDir ) && $self->GetDebugLog() == 0;
|
755
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Warning: Save Directory Not Defined - Using Working Directory / Saving To \"text.txt\"" ) if !defined( $saveDir );
|
756
|
|
|
|
|
|
|
$saveDir = "text.txt" if !defined( $saveDir );
|
757
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Warning: Start Date Not Defined - Using 00/00/0000 By Default" ) if !defined( $startDate );
|
758
|
|
|
|
|
|
|
$startDate = "00/00/0000" if !defined( $startDate );
|
759
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Warning: End Date Not Defined - Using 99/99/9999 By Default" ) if !defined( $endDate );
|
760
|
|
|
|
|
|
|
$endDate = "99/99/9999" if !defined( $endDate );
|
761
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Warning: Store Title Not Defined - Storing All Article Title By Default" ) if !defined( $storeTitle );
|
762
|
|
|
|
|
|
|
$storeTitle = 1 if !defined( $storeTitle );
|
763
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Warning: Store Abstract Not Defined - Storing All Article Abstracts By Default" ) if !defined( $storeAbstract );
|
764
|
|
|
|
|
|
|
$storeAbstract = 1 if !defined( $storeAbstract );
|
765
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Warning: Quick Parse Option Not Defined - Enabling Quick Parse By Default" ) if !defined( $quickParse );
|
766
|
|
|
|
|
|
|
$quickParse = 1 if !defined( $quickParse );
|
767
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Warning: Compound Word File Not Defined - Compoundify Option Disabled" ) if !defined( $compoundWordFile );
|
768
|
|
|
|
|
|
|
$self->XTWSetCompoundifyText( 0 ) if !defined( $compoundWordFile );
|
769
|
|
|
|
|
|
|
print "Warning: Number Of Working Threads Not Defined - Using 1 Thread Per CPU Core\n" if !defined( $numOfThreads ) && $self->GetDebugLog() == 0;
|
770
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Warning: Number Of Working Threads Not Defined - Using 1 Thread Per CPU Core By Default / " . Sys::CpuAffinity::getNumCpus() . " Threads" ) if !defined( $numOfThreads );
|
771
|
|
|
|
|
|
|
$numOfThreads = Sys::CpuAffinity::getNumCpus() if !defined( $numOfThreads );
|
772
|
|
|
|
|
|
|
print( "Error: File \"$saveDir\" Exists And Overwrite Existing File Option Not Defined\n" ) if !defined( $overwriteExistingFile ) && ( -e "$saveDir" ) && $self->GetDebugLog() == 0;
|
773
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Error: File \"$saveDir\" Exists And Overwrite Existing File Option Not Defined" ) if !defined( $overwriteExistingFile ) && ( -e "$saveDir" );
|
774
|
|
|
|
|
|
|
return -1 if !defined( $overwriteExistingFile ) && ( -e "$saveDir" );
|
775
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Warning: Overwrite Existing File Option Not Defined - Default = 1 / YES" ) if !defined( $overwriteExistingFile ) && !( -e "$saveDir" );
|
776
|
|
|
|
|
|
|
$overwriteExistingFile = 1 if !defined( $overwriteExistingFile ) && !( -e "$saveDir" );
|
777
|
|
|
|
|
|
|
print( "Warning: Existing Save File Found / Appending To File\n" ) if defined( $overwriteExistingFile ) && $overwriteExistingFile == 0 && ( -e "$saveDir" ) && $self->GetDebugLog() == 0;
|
778
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Warning: Existing Save File Found / Appending To File" ) if defined( $overwriteExistingFile ) && $overwriteExistingFile == 0 && ( -e "$saveDir" );
|
779
|
|
|
|
|
|
|
print( "Warning: Existing Save File Found / Overwriting File\n" ) if defined( $overwriteExistingFile ) && $overwriteExistingFile == 1 && ( -e "$saveDir" ) && $self->GetDebugLog() == 0;
|
780
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Warning: Existing Save File Found / Overwriting File" ) if defined( $overwriteExistingFile ) && $overwriteExistingFile == 1 && ( -e "$saveDir" );
|
781
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Warning: Working Directory Is Blank - Using \".\" Directory" ) if ( $workingDir eq "" );
|
783
|
|
|
|
|
|
|
$workingDir = "." if ( $workingDir eq "" );
|
784
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Error: Working Directory: \"$workingDir\" Does Not Exist" ) if !( -e "$workingDir" );
|
785
|
|
|
|
|
|
|
return -1 if !( -e "$workingDir" );
|
786
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Error: Compound Word File \"$compoundWordFile\" Does Not Exist - Disabling Compoundify Option" ) if $self->XTWGetCompoundifyText() == 1 && !( -e "$compoundWordFile" );
|
788
|
|
|
|
|
|
|
$self->XTWSetCompoundifyText( 0 ) if $self->XTWGetCompoundifyText() == 1 && !( -e "$compoundWordFile" );
|
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Printing Current Xmltow2v Settings" );
|
791
|
|
|
|
|
|
|
print "Printing Current Xmltow2v Setting(s)\n" if $self->GetDebugLog() == 0;
|
792
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
# Print Status Messages In The Event Debug Logging Is Disabled
|
794
|
|
|
|
|
|
|
print "Working Directory: $workingDir\n" if $self->GetDebugLog() == 0;
|
795
|
|
|
|
|
|
|
print "Save Directory: $saveDir\n" if $self->GetDebugLog() == 0;
|
796
|
|
|
|
|
|
|
print "Start Date: $startDate\n" if $self->GetDebugLog() == 0;
|
797
|
|
|
|
|
|
|
print "End Date: $endDate\n" if $self->GetDebugLog() == 0;
|
798
|
|
|
|
|
|
|
print "Store Title: $storeTitle - ( 0=Disabled / 1=Enabled )\n" if $self->GetDebugLog() == 0;
|
799
|
|
|
|
|
|
|
print "Store Abstract: $storeAbstract - ( 0=Disabled / 1=Enabled )\n" if $self->GetDebugLog() == 0;
|
800
|
|
|
|
|
|
|
print "Quick Parse: $quickParse - ( 0=Disabled / 1=Enabled )\n" if $self->GetDebugLog() == 0;
|
801
|
|
|
|
|
|
|
print "Warning: No Compound Word File Specified - Compoundify Option Disabled\n" if $self->GetDebugLog() == 0 && $self->XTWGetCompoundifyText() == 0;
|
802
|
|
|
|
|
|
|
print "Compound Word File Specified - Compoundify Option Enabled\n" if $self->GetDebugLog() == 0 && $self->XTWGetCompoundifyText() == 1;
|
803
|
|
|
|
|
|
|
print "Compound Word File: $compoundWordFile\n" if $self->GetDebugLog() == 0 && $self->XTWGetCompoundifyText() == 1;
|
804
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Working Directory: \"$workingDir\"" );
|
806
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Save Directory: \"$saveDir\"" );
|
807
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Start Date: $startDate" );
|
808
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - End Date: $endDate" );
|
809
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Store Title: $storeTitle" );
|
810
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Store Abstract: $storeAbstract" );
|
811
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Quick Parse: $quickParse" );
|
812
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Number Of Working Threads: $numOfThreads" );
|
813
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Overwrite Previous File: $overwriteExistingFile" );
|
814
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Compoundifying Using File: \"$compoundWordFile\"" ) if $self->XTWGetCompoundifyText() == 1;
|
815
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
my @beginDateAry = split( '/', $startDate );
|
817
|
|
|
|
|
|
|
my @endDateAry = split( '/', $endDate );
|
818
|
|
|
|
|
|
|
|
819
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Error: Start Date Range In Wrong Format - XX/XX/XXXX" ) if @beginDateAry < 3;
|
820
|
|
|
|
|
|
|
return -1 if @beginDateAry < 3;
|
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Error: End Date Range In Wrong Format - XX/XX/XXXX" ) if @endDateAry < 3;
|
823
|
|
|
|
|
|
|
return -1 if @endDateAry < 3;
|
824
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
undef( @beginDateAry );
|
826
|
|
|
|
|
|
|
undef( @endDateAry );
|
827
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
my $result = 0;
|
829
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
my $xmlconv = $self->GetXMLToW2VHandler();
|
831
|
|
|
|
|
|
|
$xmlconv->SetStoreTitle( $storeTitle );
|
832
|
|
|
|
|
|
|
$xmlconv->SetStoreAbstract( $storeAbstract );
|
833
|
|
|
|
|
|
|
$xmlconv->SetWorkingDir( "$workingDir" );
|
834
|
|
|
|
|
|
|
$xmlconv->SetSavePath( "$saveDir" );
|
835
|
|
|
|
|
|
|
$xmlconv->SetBeginDate( $startDate );
|
836
|
|
|
|
|
|
|
$xmlconv->SetEndDate( $endDate );
|
837
|
|
|
|
|
|
|
$xmlconv->SetQuickParse( $quickParse );
|
838
|
|
|
|
|
|
|
$xmlconv->SetNumOfThreads( $numOfThreads );
|
839
|
|
|
|
|
|
|
$xmlconv->SetOverwriteExistingFile( $overwriteExistingFile );
|
840
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
if( defined( $compoundWordFile ) && ( -e "$compoundWordFile" ) )
|
842
|
|
|
|
|
|
|
{
|
843
|
|
|
|
|
|
|
$result = $xmlconv->ReadCompoundWordDataFromFile( "$compoundWordFile", 1 );
|
844
|
|
|
|
|
|
|
|
845
|
|
|
|
|
|
|
# Check
|
846
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Error Loading Compound Word File" ) if ( $result == -1 );
|
847
|
|
|
|
|
|
|
return -1 if ( $result == -1 );
|
848
|
|
|
|
|
|
|
|
849
|
|
|
|
|
|
|
$result = $xmlconv->CreateCompoundWordBST() if ( $result == 0 );
|
850
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
# Check
|
852
|
|
|
|
|
|
|
$self->WriteLog( "CLCompileTextCorpus - Error Creating Compound Word Binary Search Tree" ) if ( $result == -1 );
|
853
|
|
|
|
|
|
|
return -1 if ( $result == -1 );
|
854
|
|
|
|
|
|
|
}
|
855
|
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
$result = $xmlconv->ConvertMedlineXMLToW2V( "$workingDir" );
|
857
|
|
|
|
|
|
|
|
858
|
|
|
|
|
|
|
# Clean up
|
859
|
|
|
|
|
|
|
$xmlconv->ClearCompoundWordAry();
|
860
|
|
|
|
|
|
|
$xmlconv->ClearCompoundWordBST();
|
861
|
|
|
|
|
|
|
|
862
|
|
|
|
|
|
|
return $result;
|
863
|
|
|
|
|
|
|
}
|
864
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
sub CLConvertWord2VecVectorFileToText
|
866
|
|
|
|
|
|
|
{
|
867
|
|
|
|
|
|
|
my ( $self, $filePath, $savePath ) = @_;
|
868
|
|
|
|
|
|
|
|
869
|
|
|
|
|
|
|
# Check(s)
|
870
|
|
|
|
|
|
|
$self->WriteLog( "CLConvertWord2VecVectorFileToText - Specified File: \"$filePath\" Not Defined" ) if !defined( $filePath );
|
871
|
|
|
|
|
|
|
return -1 if !defined( $filePath );
|
872
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
$self->WriteLog( "CLConvertWord2VecVectorFileToText - Specified File: \"$filePath\" Does Not Exist" ) if !( -e $filePath );
|
874
|
|
|
|
|
|
|
return -1 if !( -e $filePath );
|
875
|
|
|
|
|
|
|
|
876
|
|
|
|
|
|
|
$self->WriteLog( "CLConvertWord2VecVectorFileToText - No Save File Name Specified - Saving To \"convertedvectors.bin\"" ) if !defined( $savePath );
|
877
|
|
|
|
|
|
|
$savePath = "convertedvectors.bin" if !defined( $savePath );
|
878
|
|
|
|
|
|
|
|
879
|
|
|
|
|
|
|
my $w2v = $self->GetWord2VecHandler();
|
880
|
|
|
|
|
|
|
my $previousSetting = $w2v->GetSparseVectorMode();
|
881
|
|
|
|
|
|
|
my $result = $w2v->ReadTrainedVectorDataFromFile( $filePath );
|
882
|
|
|
|
|
|
|
|
883
|
|
|
|
|
|
|
# Check
|
884
|
|
|
|
|
|
|
$self->WriteLog( "CLConvertWord2VecVectorFileToText - Error Reading Vector Data File" ) if ( $result == -1 );
|
885
|
|
|
|
|
|
|
return -1 if ( $result == -1 );
|
886
|
|
|
|
|
|
|
|
887
|
|
|
|
|
|
|
$result = $w2v->SaveTrainedVectorDataToFile( $savePath );
|
888
|
|
|
|
|
|
|
|
889
|
|
|
|
|
|
|
# Check
|
890
|
|
|
|
|
|
|
$self->WriteLog( "CLConvertWord2VecVectorFileToText - Error Saving Vector Data To File" ) if ( $result == -1 );
|
891
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
# Clean up
|
893
|
|
|
|
|
|
|
$w2v->ClearVocabularyHash();
|
894
|
|
|
|
|
|
|
$w2v->SetSparseVectorMode( $previousSetting );
|
895
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
$self->WriteLog( "CLConvertWord2VecVectorFileToText - Finished Conversion" );
|
897
|
|
|
|
|
|
|
return $result;
|
898
|
|
|
|
|
|
|
}
|
899
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
sub CLConvertWord2VecVectorFileToBinary
|
901
|
|
|
|
|
|
|
{
|
902
|
|
|
|
|
|
|
my ( $self, $filePath, $savePath ) = @_;
|
903
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
# Check(s)
|
905
|
|
|
|
|
|
|
$self->WriteLog( "CLConvertWord2VecVectorFileToBinary - Specified File: \"$filePath\" Not Defined" ) if !defined( $filePath );
|
906
|
|
|
|
|
|
|
return -1 if !defined( $filePath );
|
907
|
|
|
|
|
|
|
|
908
|
|
|
|
|
|
|
$self->WriteLog( "CLConvertWord2VecVectorFileToBinary - Specified File: \"$filePath\" Does Not Exist" ) if !( -e $filePath );
|
909
|
|
|
|
|
|
|
return -1 if !( -e $filePath );
|
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
$self->WriteLog( "CLConvertWord2VecVectorFileToBinary - No Save File Name Specified - Saving To \"convertedvectors.bin\"" ) if !defined( $savePath );
|
912
|
|
|
|
|
|
|
$savePath = "convertedvectors.bin" if !defined( $savePath );
|
913
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
my $w2v = $self->GetWord2VecHandler();
|
915
|
|
|
|
|
|
|
my $previousSetting = $w2v->GetSparseVectorMode();
|
916
|
|
|
|
|
|
|
my $result = $w2v->ReadTrainedVectorDataFromFile( $filePath );
|
917
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
# Check
|
919
|
|
|
|
|
|
|
$self->WriteLog( "CLConvertWord2VecVectorFileToBinary - Error Reading Vector Data File" ) if ( $result == -1 );
|
920
|
|
|
|
|
|
|
return -1 if ( $result == -1 );
|
921
|
|
|
|
|
|
|
|
922
|
|
|
|
|
|
|
$result = $w2v->SaveTrainedVectorDataToFile( $savePath, 1 );
|
923
|
|
|
|
|
|
|
|
924
|
|
|
|
|
|
|
$self->WriteLog( "CLConvertWord2VecVectorFileToBinary - Error Saving Vector Data To File" ) if ( $result == -1 );
|
925
|
|
|
|
|
|
|
|
926
|
|
|
|
|
|
|
# Clean up
|
927
|
|
|
|
|
|
|
$w2v->ClearVocabularyHash();
|
928
|
|
|
|
|
|
|
$w2v->SetSparseVectorMode( $previousSetting );
|
929
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
$self->WriteLog( "CLConvertWord2VecVectorFileToBinary - Finished Conversion" );
|
931
|
|
|
|
|
|
|
return $result;
|
932
|
|
|
|
|
|
|
}
|
933
|
|
|
|
|
|
|
|
934
|
|
|
|
|
|
|
sub CLConvertWord2VecVectorFileToSparse
|
935
|
|
|
|
|
|
|
{
|
936
|
|
|
|
|
|
|
my ( $self, $filePath, $savePath ) = @_;
|
937
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
# Check(s)
|
939
|
|
|
|
|
|
|
$self->WriteLog( "CLConvertVectorsToSparseVectors - Specified File: \"$filePath\" Not Defined" ) if !defined( $filePath );
|
940
|
|
|
|
|
|
|
return -1 if !defined( $filePath );
|
941
|
|
|
|
|
|
|
|
942
|
|
|
|
|
|
|
$self->WriteLog( "CLConvertVectorsToSparseVectors - Specified File: \"$filePath\" Does Not Exist" ) if !( -e $filePath );
|
943
|
|
|
|
|
|
|
return -1 if !( -e $filePath );
|
944
|
|
|
|
|
|
|
|
945
|
|
|
|
|
|
|
$self->WriteLog( "CLConvertVectorsToSparseVectors - No Save File Name Specified - Saving To \"convertedvectors.bin\"" ) if !defined( $savePath );
|
946
|
|
|
|
|
|
|
$savePath = "convertedvectors.bin" if !defined( $savePath );
|
947
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
my $w2v = $self->GetWord2VecHandler();
|
949
|
|
|
|
|
|
|
my $previousSetting = $w2v->GetSparseVectorMode();
|
950
|
|
|
|
|
|
|
my $result = $w2v->ReadTrainedVectorDataFromFile( $filePath );
|
951
|
|
|
|
|
|
|
|
952
|
|
|
|
|
|
|
# Check
|
953
|
|
|
|
|
|
|
$self->WriteLog( "CLConvertVectorsToSparseVectors - Error Reading Vector Data File" ) if ( $result == -1 );
|
954
|
|
|
|
|
|
|
return -1 if ( $result == -1 );
|
955
|
|
|
|
|
|
|
|
956
|
|
|
|
|
|
|
$result = $w2v->SaveTrainedVectorDataToFile( $savePath, 2 );
|
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
$self->WriteLog( "CLConvertVectorsToSparseVectors - Error Saving Vector Data To File" ) if ( $result == -1 );
|
959
|
|
|
|
|
|
|
|
960
|
|
|
|
|
|
|
# Clean up
|
961
|
|
|
|
|
|
|
$w2v->ClearVocabularyHash();
|
962
|
|
|
|
|
|
|
$w2v->SetSparseVectorMode( $previousSetting );
|
963
|
|
|
|
|
|
|
|
964
|
|
|
|
|
|
|
$self->WriteLog( "CLConvertVectorsToSparseVectors - Finished Conversion" );
|
965
|
|
|
|
|
|
|
return $result;
|
966
|
|
|
|
|
|
|
}
|
967
|
|
|
|
|
|
|
|
968
|
|
|
|
|
|
|
sub CLCompoundifyTextInFile
|
969
|
|
|
|
|
|
|
{
|
970
|
|
|
|
|
|
|
my ( $self, $filePath, $savePath, $compoundWordFile ) = @_;
|
971
|
|
|
|
|
|
|
|
972
|
|
|
|
|
|
|
# Check(s)
|
973
|
|
|
|
|
|
|
$self->WriteLog( "CLCompoundifyTextInFile - No File Specified" ) if !defined( $filePath );
|
974
|
|
|
|
|
|
|
return -1 if !defined( $filePath );
|
975
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
$self->WriteLog( "CLCompoundifyTextInFile - Save File Name Not Specified - Saving File To: \"comptext.txt\"" ) if !defined( $savePath );
|
977
|
|
|
|
|
|
|
$savePath = "comptext.txt" if !defined( $savePath );
|
978
|
|
|
|
|
|
|
|
979
|
|
|
|
|
|
|
$self->WriteLog( "CLCompoundifyTextInFile - No Compound Word File Specified" ) if !defined( $compoundWordFile );
|
980
|
|
|
|
|
|
|
return -1 if !defined( $compoundWordFile );
|
981
|
|
|
|
|
|
|
|
982
|
|
|
|
|
|
|
$self->WriteLog( "CLCompoundifyTextInFile - Specified File: \"$filePath\" Does Not Exist" ) if !( -e $filePath );
|
983
|
|
|
|
|
|
|
return -1 if !( -e $filePath );
|
984
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
$self->WriteLog( "CLCompoundifyTextInFile - Specified File: \"$compoundWordFile\" Does Not Exist" ) if !( -e $compoundWordFile );
|
986
|
|
|
|
|
|
|
return -1 if !( -e $compoundWordFile );
|
987
|
|
|
|
|
|
|
|
988
|
|
|
|
|
|
|
|
989
|
|
|
|
|
|
|
$self->WriteLog( "CLCompoundifyTextInFile - Compoundifying File: \"$compoundWordFile\"" );
|
990
|
|
|
|
|
|
|
|
991
|
|
|
|
|
|
|
my $text = "";
|
992
|
|
|
|
|
|
|
|
993
|
|
|
|
|
|
|
open( my $fileHandle, "<:encoding(utf8)", "$filePath" ) or die "CLCompoundifyTextInFile - Error: Cannot Open Specified File";
|
994
|
|
|
|
|
|
|
|
995
|
|
|
|
|
|
|
while( my $line = <$fileHandle> )
|
996
|
|
|
|
|
|
|
{
|
997
|
|
|
|
|
|
|
chomp( $line );
|
998
|
|
|
|
|
|
|
$text .= $line;
|
999
|
|
|
|
|
|
|
}
|
1000
|
|
|
|
|
|
|
|
1001
|
|
|
|
|
|
|
close( $fileHandle );
|
1002
|
|
|
|
|
|
|
|
1003
|
|
|
|
|
|
|
my $xmltow2v = $self->GetXMLToW2VHandler();
|
1004
|
|
|
|
|
|
|
|
1005
|
|
|
|
|
|
|
$self->WriteLog( "CLCompoundifyTextInFile - Cleaning Text Data" );
|
1006
|
|
|
|
|
|
|
$text = $xmltow2v->RemoveSpecialCharactersFromString( $text );
|
1007
|
|
|
|
|
|
|
|
1008
|
|
|
|
|
|
|
my $result = $xmltow2v->ReadCompoundWordDataFromFile( $compoundWordFile, 1 );
|
1009
|
|
|
|
|
|
|
|
1010
|
|
|
|
|
|
|
$self->WriteLog( "CLCompoundifyTextInFile - An Error Has Occured While Loading Compound Word File" ) if $result == -1;
|
1011
|
|
|
|
|
|
|
return -1 if $result == -1;
|
1012
|
|
|
|
|
|
|
|
1013
|
|
|
|
|
|
|
$xmltow2v->CreateCompoundWordBST();
|
1014
|
|
|
|
|
|
|
|
1015
|
|
|
|
|
|
|
$self->WriteLog( "CLCompoundifyTextInFile - An Error Has Occured While Creating Compound Word BST" ) if $result == -1;
|
1016
|
|
|
|
|
|
|
return -1 if $result == -1;
|
1017
|
|
|
|
|
|
|
|
1018
|
|
|
|
|
|
|
$text = $xmltow2v->CompoundifyString( $text );
|
1019
|
|
|
|
|
|
|
|
1020
|
|
|
|
|
|
|
open( $fileHandle, ">:encoding(utf8)", "$savePath" ) or die "CLCompoundifyTextInFile - Error: Cannot Save File - \"$savePath\"";
|
1021
|
|
|
|
|
|
|
print $fileHandle "$text\n";
|
1022
|
|
|
|
|
|
|
close( $fileHandle );
|
1023
|
|
|
|
|
|
|
undef( $fileHandle );
|
1024
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
# Clean up
|
1026
|
|
|
|
|
|
|
$text = "";
|
1027
|
|
|
|
|
|
|
$xmltow2v->ClearCompoundWordAry();
|
1028
|
|
|
|
|
|
|
$xmltow2v->ClearCompoundWordBST();
|
1029
|
|
|
|
|
|
|
|
1030
|
|
|
|
|
|
|
$self->WriteLog( "CLCompoundifyTextInFile - Finished Compoundify" );
|
1031
|
|
|
|
|
|
|
|
1032
|
|
|
|
|
|
|
return 0;
|
1033
|
|
|
|
|
|
|
}
|
1034
|
|
|
|
|
|
|
|
1035
|
|
|
|
|
|
|
sub CLSortVectorFile
|
1036
|
|
|
|
|
|
|
{
|
1037
|
|
|
|
|
|
|
my ( $self, $optionsHashRef ) = @_;
|
1038
|
|
|
|
|
|
|
|
1039
|
|
|
|
|
|
|
my %options = %{ $optionsHashRef };
|
1040
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
# Check(s)
|
1042
|
|
|
|
|
|
|
$self->WriteLog( "CLSortVectorFile - Error: No Arguments Specified" ) if keys( %options ) == 0;
|
1043
|
|
|
|
|
|
|
return -1 if keys( %options ) == 0;
|
1044
|
|
|
|
|
|
|
|
1045
|
|
|
|
|
|
|
my $vectorDataFilePath = $options{ "-filepath" };
|
1046
|
|
|
|
|
|
|
my $overwriteOldFile = $options{ "-overwrite" };
|
1047
|
|
|
|
|
|
|
undef( %options );
|
1048
|
|
|
|
|
|
|
|
1049
|
|
|
|
|
|
|
# Check(s)
|
1050
|
|
|
|
|
|
|
$self->WriteLog( "CLSortVectorFile - Error: Vector Data File Path Not Specified" ) if !defined( $vectorDataFilePath );
|
1051
|
|
|
|
|
|
|
return -1 if !defined( $vectorDataFilePath );
|
1052
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
$self->WriteLog( "CLSortVectorFile - Error: Specified Vector Data File Not Found" ) if !( -e $vectorDataFilePath );
|
1054
|
|
|
|
|
|
|
return -1 if !( -e $vectorDataFilePath );
|
1055
|
|
|
|
|
|
|
|
1056
|
|
|
|
|
|
|
# Check To See If File Is Already Sorted
|
1057
|
|
|
|
|
|
|
my $fileAlreadySorted = 0;
|
1058
|
|
|
|
|
|
|
|
1059
|
|
|
|
|
|
|
open( my $fileHandle, "<:", $vectorDataFilePath );
|
1060
|
|
|
|
|
|
|
|
1061
|
|
|
|
|
|
|
# Read Vector File Header
|
1062
|
|
|
|
|
|
|
my $headerLine = <$fileHandle>;
|
1063
|
|
|
|
|
|
|
|
1064
|
|
|
|
|
|
|
# Check(s)
|
1065
|
|
|
|
|
|
|
$self->WriteLog( "CLSortVectorFile - Error: Header Not Defined" ) if !defined( $headerLine );
|
1066
|
|
|
|
|
|
|
return -1 if !defined( $headerLine );
|
1067
|
|
|
|
|
|
|
|
1068
|
|
|
|
|
|
|
# Fetch Number Of Words And Vector Length From Header
|
1069
|
|
|
|
|
|
|
my @headerAry = split( ' ', $headerLine );
|
1070
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
# Check(s)
|
1072
|
|
|
|
|
|
|
$self->WriteLog( "CLSortVectorFile - Error: Invalid Header" ) if ( @headerAry < 2 );
|
1073
|
|
|
|
|
|
|
return -1 if ( @headerAry < 2 );
|
1074
|
|
|
|
|
|
|
my $numberOfWords = $headerAry[0];
|
1075
|
|
|
|
|
|
|
my $vectorLength = $headerAry[1];
|
1076
|
|
|
|
|
|
|
|
1077
|
|
|
|
|
|
|
# Check Header String For Sorted Signature
|
1078
|
|
|
|
|
|
|
$fileAlreadySorted = 1 if ( defined( $headerLine ) && index( $headerLine, "#\$\@RTED#" ) != -1 );
|
1079
|
|
|
|
|
|
|
undef( $headerLine );
|
1080
|
|
|
|
|
|
|
undef( $fileHandle );
|
1081
|
|
|
|
|
|
|
|
1082
|
|
|
|
|
|
|
$self->WriteLog( "CLSortVectorFile - Checking To See If File Has Been Previously Sorted?" );
|
1083
|
|
|
|
|
|
|
print( "Warning: Vector Data File Is Already Sorted\n" ) if ( $self->GetDebugLog() == 0 && $fileAlreadySorted == 1 );
|
1084
|
|
|
|
|
|
|
$self->WriteLog( "CLSortVectorFile - Warning: Vector Data File Is Already Sorted / Header Signed As Sorted" ) if ( $fileAlreadySorted == 1 );
|
1085
|
|
|
|
|
|
|
return 1 if ( $fileAlreadySorted == 1 );
|
1086
|
|
|
|
|
|
|
|
1087
|
|
|
|
|
|
|
$self->WriteLog( "CLSortVectorFile - File Has Not Been Sorted" );
|
1088
|
|
|
|
|
|
|
$overwriteOldFile = 0 if !defined( $overwriteOldFile );
|
1089
|
|
|
|
|
|
|
$self->WriteLog( "CLSortVectorFile - Warning: Overwrite Old File Option Enabled" ) if ( $overwriteOldFile == 1 );
|
1090
|
|
|
|
|
|
|
$self->WriteLog( "CLSortVectorFile - Saving As New File: sortedvectors.bin" ) if ( $overwriteOldFile == 0 );
|
1091
|
|
|
|
|
|
|
|
1092
|
|
|
|
|
|
|
$self->WriteLog( "CLSortVectorFile - Beginning Data Format Detection And Sort Routine" );
|
1093
|
|
|
|
|
|
|
|
1094
|
|
|
|
|
|
|
# Check Vector File For Vector Data Format
|
1095
|
|
|
|
|
|
|
my $saveFormat = $self->W2VCheckWord2VecDataFileType( $vectorDataFilePath );
|
1096
|
|
|
|
|
|
|
$self->WriteLog( "CLSortVectorFile - Vector File Data Format Detected As: $saveFormat" );
|
1097
|
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
# Read Vector Data File In Memory
|
1099
|
|
|
|
|
|
|
$self->WriteLog( "CLSortVectorFile - Reading Vector Data File" );
|
1100
|
|
|
|
|
|
|
my $result = $self->W2VReadTrainedVectorDataFromFile( $vectorDataFilePath, 0 ) if defined( $saveFormat );
|
1101
|
|
|
|
|
|
|
|
1102
|
|
|
|
|
|
|
# Modify Array Header To Include Sorted Signature
|
1103
|
|
|
|
|
|
|
$self->WriteLog( "CLSortVectorFile - Signing Header" );
|
1104
|
|
|
|
|
|
|
my $vocabularyHashRef = $self->W2VGetVocabularyHash();
|
1105
|
|
|
|
|
|
|
$vocabularyHashRef->{ $numberOfWords } = "$vectorLength #\$\@RTED#" if defined( $vocabularyHashRef->{ $numberOfWords } );
|
1106
|
|
|
|
|
|
|
|
1107
|
|
|
|
|
|
|
# Save Array In Word2vec Object
|
1108
|
|
|
|
|
|
|
$self->W2VSetNumberOfWords( $numberOfWords );
|
1109
|
|
|
|
|
|
|
$self->W2VSetVectorLength( $vectorLength );
|
1110
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
# Set File Name If Overwrite Option Is Disabled
|
1112
|
|
|
|
|
|
|
$vectorDataFilePath = "sortedvectors.bin" if $overwriteOldFile == 0;
|
1113
|
|
|
|
|
|
|
|
1114
|
|
|
|
|
|
|
# Set Save Format
|
1115
|
|
|
|
|
|
|
if( defined( $saveFormat ) )
|
1116
|
|
|
|
|
|
|
{
|
1117
|
|
|
|
|
|
|
$saveFormat = 0 if ( $saveFormat eq "text" );
|
1118
|
|
|
|
|
|
|
$saveFormat = 1 if ( $saveFormat eq "binary" );
|
1119
|
|
|
|
|
|
|
$saveFormat = 2 if ( $saveFormat eq "sparsetext" );
|
1120
|
|
|
|
|
|
|
}
|
1121
|
|
|
|
|
|
|
|
1122
|
|
|
|
|
|
|
# Save Sorted Vector Data To File
|
1123
|
|
|
|
|
|
|
$self->WriteLog( "CLSortVectorFile - Saving New File As: \"$vectorDataFilePath\"" );
|
1124
|
|
|
|
|
|
|
$result = $self->W2VSaveTrainedVectorDataToFile( $vectorDataFilePath, $saveFormat );
|
1125
|
|
|
|
|
|
|
|
1126
|
|
|
|
|
|
|
# Clean Up
|
1127
|
|
|
|
|
|
|
$self->W2VClearVocabularyHash();
|
1128
|
|
|
|
|
|
|
|
1129
|
|
|
|
|
|
|
$self->WriteLog( "CLSortVectorFile - Complete" );
|
1130
|
|
|
|
|
|
|
|
1131
|
|
|
|
|
|
|
return $result;
|
1132
|
|
|
|
|
|
|
}
|
1133
|
|
|
|
|
|
|
|
1134
|
|
|
|
|
|
|
sub CleanWord2VecDirectory
|
1135
|
|
|
|
|
|
|
{
|
1136
|
|
|
|
|
|
|
my ( $self ) = @_;
|
1137
|
|
|
|
|
|
|
|
1138
|
|
|
|
|
|
|
# Check(s)
|
1139
|
|
|
|
|
|
|
my $directory = $self->GetWord2VecDir();
|
1140
|
|
|
|
|
|
|
$self->WriteLog( "CleanWord2VecDirectory - Word2Vec Directory: \"$directory\" Does Not Exist" ) if !( -e $directory );
|
1141
|
|
|
|
|
|
|
return -1 if !( -e $directory );
|
1142
|
|
|
|
|
|
|
|
1143
|
|
|
|
|
|
|
$self->WriteLog( "CleanWord2VecDirectory - Cleaning Up Word2Vec Directory Files" );
|
1144
|
|
|
|
|
|
|
|
1145
|
|
|
|
|
|
|
my $word2vec = $directory . "/word2vec";
|
1146
|
|
|
|
|
|
|
my $word2phrase = $directory . "/word2phrase";
|
1147
|
|
|
|
|
|
|
my $wordAnalogy = $directory . "/word-analogy";
|
1148
|
|
|
|
|
|
|
my $distance = $directory . "/distance";
|
1149
|
|
|
|
|
|
|
my $computeAccuracy = $directory . "/compute-accuracy";
|
1150
|
|
|
|
|
|
|
|
1151
|
|
|
|
|
|
|
$self->WriteLog( "CleanWord2VecDirectory - Removing C Object Files" );
|
1152
|
|
|
|
|
|
|
|
1153
|
|
|
|
|
|
|
unlink( "$word2vec.o" ) if ( -e "$word2vec.o" );
|
1154
|
|
|
|
|
|
|
unlink( "$word2phrase.o" ) if ( -e "$word2phrase.o" );
|
1155
|
|
|
|
|
|
|
unlink( "$wordAnalogy.o" ) if ( -e "$wordAnalogy.o" );
|
1156
|
|
|
|
|
|
|
unlink( "$distance.o" ) if ( -e "$distance.o" );
|
1157
|
|
|
|
|
|
|
unlink( "$computeAccuracy.o" ) if ( -e "$computeAccuracy.o" );
|
1158
|
|
|
|
|
|
|
|
1159
|
|
|
|
|
|
|
$self->WriteLog( "CleanWord2VecDirectory - Removed C Object Files" );
|
1160
|
|
|
|
|
|
|
$self->WriteLog( "CleanWord2VecDirectory - Removing Word2Vec Executable Files" );
|
1161
|
|
|
|
|
|
|
|
1162
|
|
|
|
|
|
|
if( $self->GetOSType() eq "MSWin32" )
|
1163
|
|
|
|
|
|
|
{
|
1164
|
|
|
|
|
|
|
unlink( "$word2vec.exe" ) if ( -e "$word2vec.exe" );
|
1165
|
|
|
|
|
|
|
unlink( "$word2phrase.exe" ) if ( -e "$word2phrase.exe" );
|
1166
|
|
|
|
|
|
|
unlink( "$wordAnalogy.exe" ) if ( -e "$wordAnalogy.exe" );
|
1167
|
|
|
|
|
|
|
unlink( "$distance.exe" ) if ( -e "$distance.exe" );
|
1168
|
|
|
|
|
|
|
unlink( "$computeAccuracy.exe" ) if ( -e "$computeAccuracy.exe" );
|
1169
|
|
|
|
|
|
|
}
|
1170
|
|
|
|
|
|
|
else
|
1171
|
|
|
|
|
|
|
{
|
1172
|
|
|
|
|
|
|
unlink( "$word2vec" ) if ( -e "$word2vec" );
|
1173
|
|
|
|
|
|
|
unlink( "$word2phrase" ) if ( -e "$word2phrase" );
|
1174
|
|
|
|
|
|
|
unlink( "$wordAnalogy" ) if ( -e "$wordAnalogy" );
|
1175
|
|
|
|
|
|
|
unlink( "$distance" ) if ( -e "$distance" );
|
1176
|
|
|
|
|
|
|
unlink( "$computeAccuracy" ) if ( -e "$computeAccuracy" );
|
1177
|
|
|
|
|
|
|
}
|
1178
|
|
|
|
|
|
|
|
1179
|
|
|
|
|
|
|
print( "Cleaned Word2Vec Directory\n" ) if ( $self->GetDebugLog() == 0 );
|
1180
|
|
|
|
|
|
|
|
1181
|
|
|
|
|
|
|
$self->WriteLog( "CleanWord2VecDirectory - Removed Word2Vec Executable Files" );
|
1182
|
|
|
|
|
|
|
return 0;
|
1183
|
|
|
|
|
|
|
}
|
1184
|
|
|
|
|
|
|
|
1185
|
|
|
|
|
|
|
######################################################################################
|
1186
|
|
|
|
|
|
|
# Similarity Functions
|
1187
|
|
|
|
|
|
|
######################################################################################
|
1188
|
|
|
|
|
|
|
|
1189
|
|
|
|
|
|
|
sub CLSimilarityAvg
|
1190
|
|
|
|
|
|
|
{
|
1191
|
|
|
|
|
|
|
my ( $self, $similarityFilePath ) = @_;
|
1192
|
|
|
|
|
|
|
|
1193
|
|
|
|
|
|
|
my @dataAry = ();
|
1194
|
|
|
|
|
|
|
|
1195
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilarityAvg - Error: Specified File: \"$similarityFilePath\" Does Not Exist" ) if !( -e "$similarityFilePath" );
|
1196
|
|
|
|
|
|
|
return -1 if !( -e "$similarityFilePath" );
|
1197
|
|
|
|
|
|
|
|
1198
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilarityAvg - Error: No Vector Data In Memory" ) if $self->W2VIsVectorDataInMemory() == 0;
|
1199
|
|
|
|
|
|
|
return -1 if ( $self->W2VIsVectorDataInMemory() == 0 );
|
1200
|
|
|
|
|
|
|
|
1201
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilarityAvg - Reading Similarity File Data: $similarityFilePath" );
|
1202
|
|
|
|
|
|
|
|
1203
|
|
|
|
|
|
|
open( my $fileHandle, "<:encoding(UTF-8)", "$similarityFilePath" );
|
1204
|
|
|
|
|
|
|
|
1205
|
|
|
|
|
|
|
while( my $line = <$fileHandle> )
|
1206
|
|
|
|
|
|
|
{
|
1207
|
|
|
|
|
|
|
chomp( $line );
|
1208
|
|
|
|
|
|
|
push( @dataAry, $line );
|
1209
|
|
|
|
|
|
|
}
|
1210
|
|
|
|
|
|
|
|
1211
|
|
|
|
|
|
|
close( $fileHandle );
|
1212
|
|
|
|
|
|
|
undef( $fileHandle );
|
1213
|
|
|
|
|
|
|
|
1214
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilarityAvg - Finished Reading Similarity File Data" );
|
1215
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilarityAvg - Computing File Data Cosine Similarity" );
|
1216
|
|
|
|
|
|
|
print( "Generating Average Cosine Similarity File\n" ) if ( $self->GetDebugLog() == 0 );
|
1217
|
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
my @resultAry = ();
|
1219
|
|
|
|
|
|
|
|
1220
|
|
|
|
|
|
|
for( my $i = 0; $i < @dataAry; $i++ )
|
1221
|
|
|
|
|
|
|
{
|
1222
|
|
|
|
|
|
|
my @searchWords = split( '<>', $dataAry[$i] );
|
1223
|
|
|
|
|
|
|
|
1224
|
|
|
|
|
|
|
my $searchWord1 = $searchWords[@searchWords-2];
|
1225
|
|
|
|
|
|
|
my $searchWord2 = $searchWords[@searchWords-1];
|
1226
|
|
|
|
|
|
|
|
1227
|
|
|
|
|
|
|
# Check(s)
|
1228
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilarityAvg - Warning: Comparison Contains Less Than Two Words - Line Number: ". $i+1 . ", Line String: \"" . $dataAry[$i] . "\"" ) if @searchWords < 2;
|
1229
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilarityAvg - Warning: Line Contains Empty Search Term - Line Number: ". $i+1 . ", Line String: \"" . $dataAry[$i] . "\"" )
|
1230
|
|
|
|
|
|
|
if ( defined( $searchWord1 ) && $searchWord1 eq "" ) || ( defined( $searchWord2 ) && $searchWord2 eq "" );
|
1231
|
|
|
|
|
|
|
$searchWord1 = undef if @searchWords - 2 < 0;
|
1232
|
|
|
|
|
|
|
$searchWord2 = undef if @searchWords - 1 < 0;
|
1233
|
|
|
|
|
|
|
$searchWord1 = undef if defined( $searchWord1 ) && length( $searchWord1 ) == 0;
|
1234
|
|
|
|
|
|
|
$searchWord2 = undef if defined( $searchWord2 ) && length( $searchWord2 ) == 0;
|
1235
|
|
|
|
|
|
|
my $result = -1 if !defined( $searchWord1 ) or !defined( $searchWord2 );
|
1236
|
|
|
|
|
|
|
|
1237
|
|
|
|
|
|
|
$result = $self->W2VComputeAvgOfWordsCosineSimilarity( lc( $searchWord1 ), lc( $searchWord2 ) ) if !defined( $result );
|
1238
|
|
|
|
|
|
|
$result = -1 if !defined( $result );
|
1239
|
|
|
|
|
|
|
push( @resultAry, $result );
|
1240
|
|
|
|
|
|
|
|
1241
|
|
|
|
|
|
|
$result = undef;
|
1242
|
|
|
|
|
|
|
|
1243
|
|
|
|
|
|
|
if( @searchWords == 3 )
|
1244
|
|
|
|
|
|
|
{
|
1245
|
|
|
|
|
|
|
my $start = @searchWords - 2;
|
1246
|
|
|
|
|
|
|
my $end = @searchWords - 1;
|
1247
|
|
|
|
|
|
|
@searchWords = @searchWords[$start..$end];
|
1248
|
|
|
|
|
|
|
$dataAry[$i] = join( '<>', @searchWords );
|
1249
|
|
|
|
|
|
|
}
|
1250
|
|
|
|
|
|
|
}
|
1251
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilarityAvg - Finished Computing Results" );
|
1253
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilarityAvg - Saving Results In Similarity Format" );
|
1254
|
|
|
|
|
|
|
|
1255
|
|
|
|
|
|
|
$similarityFilePath .= ".avg_results";
|
1256
|
|
|
|
|
|
|
$similarityFilePath =~ s/\.sim//g;
|
1257
|
|
|
|
|
|
|
open( $fileHandle, ">:encoding(utf8)", $similarityFilePath ) or $self->( "CLSimilarityAvg - Error: Creating/Saving Results File" );
|
1258
|
|
|
|
|
|
|
|
1259
|
|
|
|
|
|
|
for( my $i = 0; $i < @dataAry; $i++ )
|
1260
|
|
|
|
|
|
|
{
|
1261
|
|
|
|
|
|
|
print $fileHandle $resultAry[$i] . "<>" . $dataAry[$i] . "\n";
|
1262
|
|
|
|
|
|
|
}
|
1263
|
|
|
|
|
|
|
|
1264
|
|
|
|
|
|
|
close( $fileHandle );
|
1265
|
|
|
|
|
|
|
undef( $fileHandle );
|
1266
|
|
|
|
|
|
|
|
1267
|
|
|
|
|
|
|
undef( @dataAry );
|
1268
|
|
|
|
|
|
|
undef( @resultAry );
|
1269
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilarityAvg - Finished" );
|
1271
|
|
|
|
|
|
|
|
1272
|
|
|
|
|
|
|
return 0;
|
1273
|
|
|
|
|
|
|
}
|
1274
|
|
|
|
|
|
|
|
1275
|
|
|
|
|
|
|
sub CLSimilarityComp
|
1276
|
|
|
|
|
|
|
{
|
1277
|
|
|
|
|
|
|
my ( $self, $similarityFilePath, $vectorBinFilePath ) = @_;
|
1278
|
|
|
|
|
|
|
|
1279
|
|
|
|
|
|
|
my @dataAry = ();
|
1280
|
|
|
|
|
|
|
|
1281
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilarityComp - Error: Specified File: \"$similarityFilePath\" Does Not Exist" ) if !( -e "$similarityFilePath" );
|
1282
|
|
|
|
|
|
|
return -1 if !( -e "$similarityFilePath" );
|
1283
|
|
|
|
|
|
|
|
1284
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilarityComp - Error: No Vector Data In Memory" ) if $self->W2VIsVectorDataInMemory() == 0;
|
1285
|
|
|
|
|
|
|
return -1 if ( $self->W2VIsVectorDataInMemory() == 0 );
|
1286
|
|
|
|
|
|
|
|
1287
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilarityComp - Reading Similarity File Data: $similarityFilePath" );
|
1288
|
|
|
|
|
|
|
|
1289
|
|
|
|
|
|
|
open( my $fileHandle, "<:encoding(UTF-8)", "$similarityFilePath" );
|
1290
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
while( my $line = <$fileHandle> )
|
1292
|
|
|
|
|
|
|
{
|
1293
|
|
|
|
|
|
|
chomp( $line );
|
1294
|
|
|
|
|
|
|
push( @dataAry, $line );
|
1295
|
|
|
|
|
|
|
}
|
1296
|
|
|
|
|
|
|
|
1297
|
|
|
|
|
|
|
close( $fileHandle );
|
1298
|
|
|
|
|
|
|
undef( $fileHandle );
|
1299
|
|
|
|
|
|
|
|
1300
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilarityComp - Finished Reading Similarity File Data" );
|
1301
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilarityComp - Computing File Data Cosine Similarity" );
|
1302
|
|
|
|
|
|
|
print( "Generating Compound Cosine Similarity File\n" ) if ( $self->GetDebugLog() == 0 );
|
1303
|
|
|
|
|
|
|
|
1304
|
|
|
|
|
|
|
my @resultAry = ();
|
1305
|
|
|
|
|
|
|
|
1306
|
|
|
|
|
|
|
for( my $i = 0; $i < @dataAry; $i++ )
|
1307
|
|
|
|
|
|
|
{
|
1308
|
|
|
|
|
|
|
my @searchWords = split( '<>', $dataAry[$i] );
|
1309
|
|
|
|
|
|
|
|
1310
|
|
|
|
|
|
|
my $searchWord1 = $searchWords[@searchWords-2];
|
1311
|
|
|
|
|
|
|
my $searchWord2 = $searchWords[@searchWords-1];
|
1312
|
|
|
|
|
|
|
|
1313
|
|
|
|
|
|
|
$searchWord1 =~ s/ +/_/g if defined( $searchWord1 );
|
1314
|
|
|
|
|
|
|
$searchWord2 =~ s/ +/_/g if defined( $searchWord2 );
|
1315
|
|
|
|
|
|
|
|
1316
|
|
|
|
|
|
|
# Check(s)
|
1317
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilarityComp - Warning: Comparison Contains Less Than Two Words - Line Number: ". $i+1 . ", Line String: \"" . $dataAry[$i] . "\"" ) if @searchWords < 2;
|
1318
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilarityComp - Warning: Line Contains Empty Search Term - Line Number: ". $i+1 . ", Line String: \"" . $dataAry[$i] . "\"" )
|
1319
|
|
|
|
|
|
|
if ( defined( $searchWord1 ) && $searchWord1 eq "" ) || ( defined( $searchWord2 ) && $searchWord2 eq "" );
|
1320
|
|
|
|
|
|
|
$searchWord1 = undef if @searchWords - 2 < 0;
|
1321
|
|
|
|
|
|
|
$searchWord2 = undef if @searchWords - 1 < 0;
|
1322
|
|
|
|
|
|
|
$searchWord1 = undef if defined( $searchWord1 ) && length( $searchWord1 ) == 0;
|
1323
|
|
|
|
|
|
|
$searchWord2 = undef if defined( $searchWord2 ) && length( $searchWord2 ) == 0;
|
1324
|
|
|
|
|
|
|
my $result = -1 if !defined( $searchWord1 ) or !defined( $searchWord2 );
|
1325
|
|
|
|
|
|
|
|
1326
|
|
|
|
|
|
|
$result = $self->W2VComputeCosineSimilarity( lc( $searchWord1 ), lc( $searchWord2 ) ) if !defined( $result );
|
1327
|
|
|
|
|
|
|
$result = -1 if !defined( $result );
|
1328
|
|
|
|
|
|
|
push( @resultAry, $result );
|
1329
|
|
|
|
|
|
|
|
1330
|
|
|
|
|
|
|
$result = undef;
|
1331
|
|
|
|
|
|
|
|
1332
|
|
|
|
|
|
|
if( @searchWords == 3 )
|
1333
|
|
|
|
|
|
|
{
|
1334
|
|
|
|
|
|
|
my $start = @searchWords - 2;
|
1335
|
|
|
|
|
|
|
my $end = @searchWords - 1;
|
1336
|
|
|
|
|
|
|
@searchWords = @searchWords[$start..$end];
|
1337
|
|
|
|
|
|
|
$dataAry[$i] = join( '<>', @searchWords );
|
1338
|
|
|
|
|
|
|
}
|
1339
|
|
|
|
|
|
|
}
|
1340
|
|
|
|
|
|
|
|
1341
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilarityComp - Finished Computing Results" );
|
1342
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilarityComp - Saving Results In Similarity Format" );
|
1343
|
|
|
|
|
|
|
|
1344
|
|
|
|
|
|
|
$similarityFilePath .= ".comp_results";
|
1345
|
|
|
|
|
|
|
$similarityFilePath =~ s/\.sim//g;
|
1346
|
|
|
|
|
|
|
open( $fileHandle, ">:encoding(utf8)", $similarityFilePath ) or $self->( "CLSimilarityComp - Error: Creating/Saving Results File" );
|
1347
|
|
|
|
|
|
|
|
1348
|
|
|
|
|
|
|
for( my $i = 0; $i < @dataAry; $i++ )
|
1349
|
|
|
|
|
|
|
{
|
1350
|
|
|
|
|
|
|
print $fileHandle $resultAry[$i] . "<>" . $dataAry[$i] . "\n";
|
1351
|
|
|
|
|
|
|
}
|
1352
|
|
|
|
|
|
|
|
1353
|
|
|
|
|
|
|
close( $fileHandle );
|
1354
|
|
|
|
|
|
|
undef( $fileHandle );
|
1355
|
|
|
|
|
|
|
|
1356
|
|
|
|
|
|
|
undef( @dataAry );
|
1357
|
|
|
|
|
|
|
undef( @resultAry );
|
1358
|
|
|
|
|
|
|
|
1359
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilarityComp - Finished" );
|
1360
|
|
|
|
|
|
|
|
1361
|
|
|
|
|
|
|
return 0;
|
1362
|
|
|
|
|
|
|
}
|
1363
|
|
|
|
|
|
|
|
1364
|
|
|
|
|
|
|
sub CLSimilaritySum
|
1365
|
|
|
|
|
|
|
{
|
1366
|
|
|
|
|
|
|
my ( $self, $similarityFilePath, $vectorBinFilePath ) = @_;
|
1367
|
|
|
|
|
|
|
|
1368
|
|
|
|
|
|
|
my @dataAry = ();
|
1369
|
|
|
|
|
|
|
|
1370
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilaritySum - Error: Specified File: \"$similarityFilePath\" Does Not Exist" ) if !( -e "$similarityFilePath" );
|
1371
|
|
|
|
|
|
|
return -1 if !( -e "$similarityFilePath" );
|
1372
|
|
|
|
|
|
|
|
1373
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilaritySum - Error: No Vector Data In Memory" ) if $self->W2VIsVectorDataInMemory() == 0;
|
1374
|
|
|
|
|
|
|
return -1 if ( $self->W2VIsVectorDataInMemory() == 0 );
|
1375
|
|
|
|
|
|
|
|
1376
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilaritySum - Reading Similarity File Data: $similarityFilePath" );
|
1377
|
|
|
|
|
|
|
|
1378
|
|
|
|
|
|
|
open( my $fileHandle, "<:encoding(UTF-8)", "$similarityFilePath" );
|
1379
|
|
|
|
|
|
|
|
1380
|
|
|
|
|
|
|
while( my $line = <$fileHandle> )
|
1381
|
|
|
|
|
|
|
{
|
1382
|
|
|
|
|
|
|
chomp( $line );
|
1383
|
|
|
|
|
|
|
push( @dataAry, $line );
|
1384
|
|
|
|
|
|
|
}
|
1385
|
|
|
|
|
|
|
|
1386
|
|
|
|
|
|
|
close( $fileHandle );
|
1387
|
|
|
|
|
|
|
undef( $fileHandle );
|
1388
|
|
|
|
|
|
|
|
1389
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilaritySum - Finished Reading Similarity File Data" );
|
1390
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilaritySum - Computing File Data Cosine Similarity" );
|
1391
|
|
|
|
|
|
|
print( "Generating Summed Cosine Similarity File\n" ) if ( $self->GetDebugLog() == 0 );
|
1392
|
|
|
|
|
|
|
|
1393
|
|
|
|
|
|
|
my @resultAry = ();
|
1394
|
|
|
|
|
|
|
|
1395
|
|
|
|
|
|
|
for( my $i = 0; $i < @dataAry; $i++ )
|
1396
|
|
|
|
|
|
|
{
|
1397
|
|
|
|
|
|
|
my @searchWords = split( '<>', $dataAry[$i] );
|
1398
|
|
|
|
|
|
|
|
1399
|
|
|
|
|
|
|
my $searchWord1 = $searchWords[@searchWords-2];
|
1400
|
|
|
|
|
|
|
my $searchWord2 = $searchWords[@searchWords-1];
|
1401
|
|
|
|
|
|
|
|
1402
|
|
|
|
|
|
|
# Check(s)
|
1403
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilaritySum - Warning: Comparison Contains Less Than Two Words - Line Number: ". $i+1 . ", Line String: \"" . $dataAry[$i] . "\"" ) if @searchWords < 2;
|
1404
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilaritySum - Warning: Line Contains Empty Search Term - Line Number: ". $i+1 . ", Line String: \"" . $dataAry[$i] . "\"" )
|
1405
|
|
|
|
|
|
|
if ( defined( $searchWord1 ) && $searchWord1 eq "" ) || ( defined( $searchWord2 ) && $searchWord2 eq "" );
|
1406
|
|
|
|
|
|
|
$searchWord1 = undef if @searchWords - 2 < 0;
|
1407
|
|
|
|
|
|
|
$searchWord2 = undef if @searchWords - 1 < 0;
|
1408
|
|
|
|
|
|
|
$searchWord1 = undef if defined( $searchWord1 ) && length( $searchWord1 ) == 0;
|
1409
|
|
|
|
|
|
|
$searchWord2 = undef if defined( $searchWord2 ) && length( $searchWord2 ) == 0;
|
1410
|
|
|
|
|
|
|
my $result = -1 if !defined( $searchWord1 ) or !defined( $searchWord2 );
|
1411
|
|
|
|
|
|
|
|
1412
|
|
|
|
|
|
|
$result = $self->W2VComputeMultiWordCosineSimilarity( lc( $searchWord1 ), lc( $searchWord2 ) ) if !defined( $result );
|
1413
|
|
|
|
|
|
|
$result = -1 if !defined( $result );
|
1414
|
|
|
|
|
|
|
push( @resultAry, $result );
|
1415
|
|
|
|
|
|
|
|
1416
|
|
|
|
|
|
|
$result = undef;
|
1417
|
|
|
|
|
|
|
|
1418
|
|
|
|
|
|
|
if( @searchWords == 3 )
|
1419
|
|
|
|
|
|
|
{
|
1420
|
|
|
|
|
|
|
my $start = @searchWords - 2;
|
1421
|
|
|
|
|
|
|
my $end = @searchWords - 1;
|
1422
|
|
|
|
|
|
|
@searchWords = @searchWords[$start..$end];
|
1423
|
|
|
|
|
|
|
$dataAry[$i] = join( '<>', @searchWords );
|
1424
|
|
|
|
|
|
|
}
|
1425
|
|
|
|
|
|
|
}
|
1426
|
|
|
|
|
|
|
|
1427
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilaritySum - Finished Computing Results" );
|
1428
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilaritySum - Saving Results In Similarity Format" );
|
1429
|
|
|
|
|
|
|
|
1430
|
|
|
|
|
|
|
$similarityFilePath .= ".sum_results";
|
1431
|
|
|
|
|
|
|
$similarityFilePath =~ s/\.sim//g;
|
1432
|
|
|
|
|
|
|
open( $fileHandle, ">:encoding(utf8)", $similarityFilePath ) or $self->( "CLSimilaritySum - Error: Creating/Saving Results File" );
|
1433
|
|
|
|
|
|
|
|
1434
|
|
|
|
|
|
|
for( my $i = 0; $i < @dataAry; $i++ )
|
1435
|
|
|
|
|
|
|
{
|
1436
|
|
|
|
|
|
|
print $fileHandle $resultAry[$i] . "<>" . $dataAry[$i] . "\n";
|
1437
|
|
|
|
|
|
|
}
|
1438
|
|
|
|
|
|
|
|
1439
|
|
|
|
|
|
|
close( $fileHandle );
|
1440
|
|
|
|
|
|
|
undef( $fileHandle );
|
1441
|
|
|
|
|
|
|
|
1442
|
|
|
|
|
|
|
undef( @dataAry );
|
1443
|
|
|
|
|
|
|
undef( @resultAry );
|
1444
|
|
|
|
|
|
|
|
1445
|
|
|
|
|
|
|
$self->WriteLog( "CLSimilaritySum - Finished" );
|
1446
|
|
|
|
|
|
|
|
1447
|
|
|
|
|
|
|
return 0;
|
1448
|
|
|
|
|
|
|
}
|
1449
|
|
|
|
|
|
|
|
1450
|
|
|
|
|
|
|
######################################################################################
|
1451
|
|
|
|
|
|
|
# Word Sense Disambiguation Functions
|
1452
|
|
|
|
|
|
|
######################################################################################
|
1453
|
|
|
|
|
|
|
|
1454
|
|
|
|
|
|
|
sub CLWordSenseDisambiguation
|
1455
|
|
|
|
|
|
|
{
|
1456
|
|
|
|
|
|
|
my ( $self, $instancesFilePath, $sensesFilePath, $vectorBinFilePath, $stopListFilePath, $listOfFilesPath ) = @_;
|
1457
|
|
|
|
|
|
|
|
1458
|
|
|
|
|
|
|
my $result = 0;
|
1459
|
|
|
|
|
|
|
my %listOfFiles;
|
1460
|
|
|
|
|
|
|
|
1461
|
|
|
|
|
|
|
# Check(s)
|
1462
|
|
|
|
|
|
|
$listOfFilesPath = "" if !defined( $listOfFilesPath );
|
1463
|
|
|
|
|
|
|
|
1464
|
|
|
|
|
|
|
if( $listOfFilesPath eq "" )
|
1465
|
|
|
|
|
|
|
{
|
1466
|
|
|
|
|
|
|
# Parse Directory Of Files
|
1467
|
|
|
|
|
|
|
if( defined( $instancesFilePath ) && $self->XTWIsFileOrDirectory( $instancesFilePath ) eq "dir" )
|
1468
|
|
|
|
|
|
|
{
|
1469
|
|
|
|
|
|
|
my $hashRef = $self->WSDParseDirectory( $instancesFilePath );
|
1470
|
|
|
|
|
|
|
%listOfFiles = %{ $hashRef } if defined( $hashRef );
|
1471
|
|
|
|
|
|
|
|
1472
|
|
|
|
|
|
|
# Enable List Parsing
|
1473
|
|
|
|
|
|
|
$listOfFilesPath = "directory";
|
1474
|
|
|
|
|
|
|
}
|
1475
|
|
|
|
|
|
|
# Parse Pair Of Files
|
1476
|
|
|
|
|
|
|
else
|
1477
|
|
|
|
|
|
|
{
|
1478
|
|
|
|
|
|
|
$self->WriteLog( "CLWordSenseDisambiguation - Error: \"Instances\" File Not Specified" ) if !defined( $instancesFilePath ) || length( $instancesFilePath ) == 0;
|
1479
|
|
|
|
|
|
|
$self->WriteLog( "CLWordSenseDisambiguation - Error: \"Senses\" File Not Specified" ) if !defined( $sensesFilePath ) || length( $sensesFilePath ) == 0;
|
1480
|
|
|
|
|
|
|
$self->WriteLog( "CLWordSenseDisambiguation - Error: \"vector binary\" File Not Specified" ) if !defined ( $vectorBinFilePath ) || length( $vectorBinFilePath ) == 0;
|
1481
|
|
|
|
|
|
|
$self->WriteLog( "CLWordSenseDisambiguation - Attn: \"stoplist\" File Not Specified" ) if !defined ( $stopListFilePath ) || length( $stopListFilePath ) == 0;
|
1482
|
|
|
|
|
|
|
$self->WriteLog( "CLWordSenseDisambiguation - \"$instancesFilePath\" Does Not Exist" ) if length( $instancesFilePath ) != 0 && !( -e $instancesFilePath );
|
1483
|
|
|
|
|
|
|
$self->WriteLog( "CLWordSenseDisambiguation - \"$sensesFilePath\" Does Not Exist" ) if length( $sensesFilePath ) != 0 && !( -e $sensesFilePath );
|
1484
|
|
|
|
|
|
|
$self->WriteLog( "CLWordSenseDisambiguation - \"$vectorBinFilePath\" Does Not Exist" ) if length( $vectorBinFilePath ) != 0 && !( -e $vectorBinFilePath );
|
1485
|
|
|
|
|
|
|
$self->WriteLog( "CLWordSenseDisambiguation - \"$stopListFilePath\" Does Not Exist" ) if length( $stopListFilePath ) != 0 && !( -e $stopListFilePath );
|
1486
|
|
|
|
|
|
|
|
1487
|
|
|
|
|
|
|
print( "CLWordSenseDisambiguation - Error: No Specified Files To Parse\n" ) if ( $self->GetDebugLog() == 0 )
|
1488
|
|
|
|
|
|
|
&& ( !defined( $instancesFilePath ) || !defined( $sensesFilePath ) || !defined( $vectorBinFilePath ) );
|
1489
|
|
|
|
|
|
|
|
1490
|
|
|
|
|
|
|
print( "CLWordSenseDisambiguation - Error: Specified File(s) Do Not Exist\n" ) if ( $self->GetDebugLog() == 0 )
|
1491
|
|
|
|
|
|
|
&& ( !( -e $instancesFilePath ) || !( -e $sensesFilePath ) || !( -e $vectorBinFilePath ) );
|
1492
|
|
|
|
|
|
|
|
1493
|
|
|
|
|
|
|
return -1 if ( !defined( $instancesFilePath ) || !defined( $sensesFilePath ) || !defined( $vectorBinFilePath ) );
|
1494
|
|
|
|
|
|
|
return -1 if ( !( -e $instancesFilePath ) || !( -e $sensesFilePath ) || !( -e $vectorBinFilePath ) );
|
1495
|
|
|
|
|
|
|
}
|
1496
|
|
|
|
|
|
|
}
|
1497
|
|
|
|
|
|
|
else
|
1498
|
|
|
|
|
|
|
{
|
1499
|
|
|
|
|
|
|
$self->WriteLog( "CLWordSenseDisambiguation - Parsing List Of Files Option Enabled" );
|
1500
|
|
|
|
|
|
|
|
1501
|
|
|
|
|
|
|
my $hashRef = $self->WSDReadList( $listOfFilesPath );
|
1502
|
|
|
|
|
|
|
%listOfFiles = %{ $hashRef } if defined( $hashRef );
|
1503
|
|
|
|
|
|
|
}
|
1504
|
|
|
|
|
|
|
|
1505
|
|
|
|
|
|
|
# Continue if files are defined
|
1506
|
|
|
|
|
|
|
if( $listOfFilesPath eq "" )
|
1507
|
|
|
|
|
|
|
{
|
1508
|
|
|
|
|
|
|
$listOfFiles{ $instancesFilePath } = $sensesFilePath;
|
1509
|
|
|
|
|
|
|
$result = $self->WSDParseList( \%listOfFiles, $vectorBinFilePath, $stopListFilePath );
|
1510
|
|
|
|
|
|
|
}
|
1511
|
|
|
|
|
|
|
elsif( $listOfFilesPath ne "" )
|
1512
|
|
|
|
|
|
|
{
|
1513
|
|
|
|
|
|
|
$vectorBinFilePath = $listOfFiles{ "-vectors" } if !defined( $vectorBinFilePath ) || length( $vectorBinFilePath ) == 0;
|
1514
|
|
|
|
|
|
|
$stopListFilePath = $listOfFiles{ "-stoplist" } if !defined( $stopListFilePath ) || length( $stopListFilePath ) == 0;
|
1515
|
|
|
|
|
|
|
chomp( $vectorBinFilePath ) if defined( $vectorBinFilePath );
|
1516
|
|
|
|
|
|
|
chomp( $stopListFilePath ) if defined( $stopListFilePath );
|
1517
|
|
|
|
|
|
|
delete( $listOfFiles{ "-vectors" } );
|
1518
|
|
|
|
|
|
|
delete( $listOfFiles{ "-stoplist" } );
|
1519
|
|
|
|
|
|
|
$result = $self->WSDParseList( \%listOfFiles, $vectorBinFilePath, $stopListFilePath );
|
1520
|
|
|
|
|
|
|
$self->WSDGenerateAccuracyReport( $self->GetWorkingDirectory() ) if $result != -1 && ( keys %listOfFiles ) > 1;
|
1521
|
|
|
|
|
|
|
}
|
1522
|
|
|
|
|
|
|
|
1523
|
|
|
|
|
|
|
$self->WriteLog( "CLWordSenseDisambiguation - Finished" ) if ( $result == 0 );
|
1524
|
|
|
|
|
|
|
$self->WriteLog( "CLWordSenseDisambiguation - Script Finished With Errors" ) if ( $result != 0 && $self->GetWriteLog() == 0 );
|
1525
|
|
|
|
|
|
|
$self->WriteLog( "CLWordSenseDisambiguation - Script Finished With Errors, See Logs For Details" ) if ( $result != 0 && $self-GetWriteLog() == 1 );
|
1526
|
|
|
|
|
|
|
|
1527
|
|
|
|
|
|
|
print( "Complete\n" ) if ( $self->GetDebugLog() == 0 && $result == 0 );
|
1528
|
|
|
|
|
|
|
print( "Error Processing File(s)\n" ) if ( $self->GetDebugLog() == 0 && $result != 0 );
|
1529
|
|
|
|
|
|
|
|
1530
|
|
|
|
|
|
|
return $result;
|
1531
|
|
|
|
|
|
|
}
|
1532
|
|
|
|
|
|
|
|
1533
|
|
|
|
|
|
|
sub WSDAnalyzeSenseData
|
1534
|
|
|
|
|
|
|
{
|
1535
|
|
|
|
|
|
|
my ( $self ) = @_;
|
1536
|
|
|
|
|
|
|
|
1537
|
|
|
|
|
|
|
my $senseStrLength = 0;
|
1538
|
|
|
|
|
|
|
my @instanceAry = $self->GetInstanceAry();
|
1539
|
|
|
|
|
|
|
my @senseAry = $self->GetSenseAry();
|
1540
|
|
|
|
|
|
|
|
1541
|
|
|
|
|
|
|
# Check(s)
|
1542
|
|
|
|
|
|
|
$self->WriteLog( "WSDAnalyzeSenseData - Senses Array Empty / Has WSD Sense File Been Loaded Into Memory?" ) if @senseAry == 0;
|
1543
|
|
|
|
|
|
|
return -1 if @senseAry == 0;
|
1544
|
|
|
|
|
|
|
|
1545
|
|
|
|
|
|
|
# Find Length Of SenseID For Instances
|
1546
|
|
|
|
|
|
|
for( my $i = 0; $i < @instanceAry; $i++ )
|
1547
|
|
|
|
|
|
|
{
|
1548
|
|
|
|
|
|
|
$senseStrLength = length( $instanceAry[$i]->senseID ) if ( length( $instanceAry[$i]->senseID ) > $senseStrLength );
|
1549
|
|
|
|
|
|
|
}
|
1550
|
|
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
# Check If Each Expected SenseID Is The Same Length In Sense Objects, Else Expected SenseID Is Probably Supposed To Be The InstanceID
|
1552
|
|
|
|
|
|
|
# ie. Instance->SenseID = C10030234 and Sense->SenseID = M2. Replace Sense->SenseID with Sense->InstanceID
|
1553
|
|
|
|
|
|
|
for( my $i = 0; $i < @senseAry; $i++ )
|
1554
|
|
|
|
|
|
|
{
|
1555
|
|
|
|
|
|
|
my $sense = $senseAry[$i];
|
1556
|
|
|
|
|
|
|
|
1557
|
|
|
|
|
|
|
my $instanceID = $sense->instanceID;
|
1558
|
|
|
|
|
|
|
my $answerID = $sense->answerInstanceID;
|
1559
|
|
|
|
|
|
|
my $senseID = $sense->senseID;
|
1560
|
|
|
|
|
|
|
|
1561
|
|
|
|
|
|
|
# Adjust SenseID If InstanceID Not Equal To SenseID
|
1562
|
|
|
|
|
|
|
if( length( $senseID ) != $senseStrLength && $instanceID ne $senseID )
|
1563
|
|
|
|
|
|
|
{
|
1564
|
|
|
|
|
|
|
$self->WriteLog( "WSDAnalyzeSenseData - Warning: SenseID Mis-Match - InstanceID: $instanceID Not Equal SenseID: $senseID" );
|
1565
|
|
|
|
|
|
|
|
1566
|
|
|
|
|
|
|
$sense->senseID( $instanceID );
|
1567
|
|
|
|
|
|
|
$senseAry[$i] = $sense;
|
1568
|
|
|
|
|
|
|
|
1569
|
|
|
|
|
|
|
$self->WriteLog( " Correcting Data - InstanceID: $instanceID ----> SenseID: $instanceID" );
|
1570
|
|
|
|
|
|
|
}
|
1571
|
|
|
|
|
|
|
}
|
1572
|
|
|
|
|
|
|
}
|
1573
|
|
|
|
|
|
|
|
1574
|
|
|
|
|
|
|
sub WSDReadList
|
1575
|
|
|
|
|
|
|
{
|
1576
|
|
|
|
|
|
|
my ( $self, $listOfFilesPath ) = @_;
|
1577
|
|
|
|
|
|
|
|
1578
|
|
|
|
|
|
|
# Check(s)
|
1579
|
|
|
|
|
|
|
$self->WriteLog( "WSDReadList - \"$listOfFilesPath\" Does Not Exist" ) if !( -e "$listOfFilesPath" );
|
1580
|
|
|
|
|
|
|
return undef if !( -e "$listOfFilesPath" );
|
1581
|
|
|
|
|
|
|
|
1582
|
|
|
|
|
|
|
my %listOfFiles;
|
1583
|
|
|
|
|
|
|
|
1584
|
|
|
|
|
|
|
open( my $fileHandle, "<:encoding(utf8)", "$listOfFilesPath" ) or die "Error: Unable To Open File: $listOfFilesPath";
|
1585
|
|
|
|
|
|
|
|
1586
|
|
|
|
|
|
|
while( my $line = <$fileHandle> )
|
1587
|
|
|
|
|
|
|
{
|
1588
|
|
|
|
|
|
|
chomp( $line );
|
1589
|
|
|
|
|
|
|
|
1590
|
|
|
|
|
|
|
# Skip Commented And Empty Lines
|
1591
|
|
|
|
|
|
|
if( $line eq "" || index( $line, "#" ) != -1 )
|
1592
|
|
|
|
|
|
|
{
|
1593
|
|
|
|
|
|
|
next;
|
1594
|
|
|
|
|
|
|
}
|
1595
|
|
|
|
|
|
|
else
|
1596
|
|
|
|
|
|
|
{
|
1597
|
|
|
|
|
|
|
my @tempAry = split( ' ', $line );
|
1598
|
|
|
|
|
|
|
|
1599
|
|
|
|
|
|
|
# Check
|
1600
|
|
|
|
|
|
|
next if @tempAry < 2;
|
1601
|
|
|
|
|
|
|
|
1602
|
|
|
|
|
|
|
$listOfFiles{ $tempAry[0] } = $tempAry[1];
|
1603
|
|
|
|
|
|
|
undef( @tempAry );
|
1604
|
|
|
|
|
|
|
}
|
1605
|
|
|
|
|
|
|
}
|
1606
|
|
|
|
|
|
|
|
1607
|
|
|
|
|
|
|
close( $fileHandle );
|
1608
|
|
|
|
|
|
|
|
1609
|
|
|
|
|
|
|
return \%listOfFiles;
|
1610
|
|
|
|
|
|
|
}
|
1611
|
|
|
|
|
|
|
|
1612
|
|
|
|
|
|
|
sub WSDParseDirectory
|
1613
|
|
|
|
|
|
|
{
|
1614
|
|
|
|
|
|
|
my ( $self, $directory ) = @_;
|
1615
|
|
|
|
|
|
|
|
1616
|
|
|
|
|
|
|
# Check(s)
|
1617
|
|
|
|
|
|
|
$self->WriteLog( "WSDParseDirectory - Directory Not Defined" ) if !defined( $directory );
|
1618
|
|
|
|
|
|
|
return undef if !defined( $directory );
|
1619
|
|
|
|
|
|
|
|
1620
|
|
|
|
|
|
|
$self->WriteLog( "WSDParseDirectory - Specified Directory Does Not Exist" ) if !( -e $directory );
|
1621
|
|
|
|
|
|
|
return undef if !( -e $directory );
|
1622
|
|
|
|
|
|
|
|
1623
|
|
|
|
|
|
|
# Set Working Directory
|
1624
|
|
|
|
|
|
|
$self->SetWorkingDirectory( $directory );
|
1625
|
|
|
|
|
|
|
|
1626
|
|
|
|
|
|
|
# Read File Name(s) From Specified Directory
|
1627
|
|
|
|
|
|
|
my $result = 0;
|
1628
|
|
|
|
|
|
|
my %listOfFiles;
|
1629
|
|
|
|
|
|
|
opendir( my $dirHandle, "$directory" ) or $result = -1;
|
1630
|
|
|
|
|
|
|
$self->WriteLog( "WSDParseDirectory - Error: Can't open $directory: $!" ) if $result == -1;
|
1631
|
|
|
|
|
|
|
return -1 if $result == -1;
|
1632
|
|
|
|
|
|
|
|
1633
|
|
|
|
|
|
|
for my $file ( readdir( $dirHandle ) )
|
1634
|
|
|
|
|
|
|
{
|
1635
|
|
|
|
|
|
|
# Only Include ".sval" Files ( Omit ".sval.results" Files )
|
1636
|
|
|
|
|
|
|
if ( index( $file, ".sval" ) != -1 && index( $file, ".sval.results" ) == -1 )
|
1637
|
|
|
|
|
|
|
{
|
1638
|
|
|
|
|
|
|
my @fileName = split( '.', $file );
|
1639
|
|
|
|
|
|
|
|
1640
|
|
|
|
|
|
|
my $instanceFile = $file;
|
1641
|
|
|
|
|
|
|
my $senseFile = $file;
|
1642
|
|
|
|
|
|
|
|
1643
|
|
|
|
|
|
|
$instanceFile =~ s/senses/instances/g;
|
1644
|
|
|
|
|
|
|
$senseFile =~ s/instances/senses/g;
|
1645
|
|
|
|
|
|
|
|
1646
|
|
|
|
|
|
|
$listOfFiles{ $instanceFile } = $senseFile if ( !defined( $listOfFiles{ $instanceFile } ) && -f "$directory/$instanceFile" );
|
1647
|
|
|
|
|
|
|
}
|
1648
|
|
|
|
|
|
|
}
|
1649
|
|
|
|
|
|
|
|
1650
|
|
|
|
|
|
|
closedir $dirHandle;
|
1651
|
|
|
|
|
|
|
undef $dirHandle;
|
1652
|
|
|
|
|
|
|
|
1653
|
|
|
|
|
|
|
return \%listOfFiles;
|
1654
|
|
|
|
|
|
|
}
|
1655
|
|
|
|
|
|
|
|
1656
|
|
|
|
|
|
|
sub WSDParseList
|
1657
|
|
|
|
|
|
|
{
|
1658
|
|
|
|
|
|
|
my ( $self, $hashRef, $vectorBinFilePath, $stopListFilePath ) = @_;
|
1659
|
|
|
|
|
|
|
|
1660
|
|
|
|
|
|
|
# Check(s)
|
1661
|
|
|
|
|
|
|
$self->WriteLog( "WSDParseList - List Of Files Not Defined" ) if !defined( $hashRef );
|
1662
|
|
|
|
|
|
|
return undef if !defined( $hashRef );
|
1663
|
|
|
|
|
|
|
|
1664
|
|
|
|
|
|
|
my %listOfFiles = %{ $hashRef };
|
1665
|
|
|
|
|
|
|
|
1666
|
|
|
|
|
|
|
$self->WriteLog( "WSDParseList - Error: No Files To Compare Listed" ) if ( keys %listOfFiles ) == 0;
|
1667
|
|
|
|
|
|
|
return -1 if ( keys %listOfFiles ) == 0;
|
1668
|
|
|
|
|
|
|
|
1669
|
|
|
|
|
|
|
print( "Generating Stop List Regex\n" ) if ( $self->GetDebugLog() == 0 && defined( $stopListFilePath ) && length( $stopListFilePath ) == 0 );
|
1670
|
|
|
|
|
|
|
|
1671
|
|
|
|
|
|
|
print( "Attn: Stop List Not Utilized\n" ) if !defined( $stopListFilePath ) || length( $stopListFilePath ) == 0;
|
1672
|
|
|
|
|
|
|
$self->WriteLog( "WSDParseList - Attn: \"Stop List\" File Not Specified" ) if !defined( $stopListFilePath ) || length( $stopListFilePath ) == 0;
|
1673
|
|
|
|
|
|
|
print( "Warning: Stop List File Does Not Exist\n" ) if defined( $stopListFilePath ) && !( -e $stopListFilePath );
|
1674
|
|
|
|
|
|
|
$self->WriteLog( "Warning: Stop List File Does Not Exist" ) if defined( $stopListFilePath ) && !( -e $stopListFilePath );
|
1675
|
|
|
|
|
|
|
|
1676
|
|
|
|
|
|
|
print( "Generating Stop List Regex\n" ) if defined( $stopListFilePath ) && ( -e $stopListFilePath );
|
1677
|
|
|
|
|
|
|
$self->WriteLog( "WSDParseList - Generating Stop List Regex" ) if defined( $stopListFilePath ) && ( -e $stopListFilePath );
|
1678
|
|
|
|
|
|
|
my $stopListRegex = $self->_WSDStop( $stopListFilePath ) if defined( $stopListFilePath ) && ( -e $stopListFilePath );
|
1679
|
|
|
|
|
|
|
|
1680
|
|
|
|
|
|
|
$self->WriteLog( "WSDParseList - Generated Stop List Regex: $stopListRegex" ) if defined( $stopListRegex );
|
1681
|
|
|
|
|
|
|
$self->WriteLog( "WSDParseList - Warning: Stop List Regex Generation Failed - Continuing Without Stop List Regex" ) if !defined( $stopListRegex );
|
1682
|
|
|
|
|
|
|
|
1683
|
|
|
|
|
|
|
my $word2vec = $self->GetWord2VecHandler();
|
1684
|
|
|
|
|
|
|
my $readFile = 0;
|
1685
|
|
|
|
|
|
|
|
1686
|
|
|
|
|
|
|
if( $word2vec->IsVectorDataInMemory() == 0 )
|
1687
|
|
|
|
|
|
|
{
|
1688
|
|
|
|
|
|
|
print( "Reading Vector File: $vectorBinFilePath\n" ) if ( $self->GetDebugLog() == 0 );
|
1689
|
|
|
|
|
|
|
$self->WriteLog( "WSDParseList - Reading \"Vector Binary\" File: \"$vectorBinFilePath\"" );
|
1690
|
|
|
|
|
|
|
$readFile = $word2vec->ReadTrainedVectorDataFromFile( $vectorBinFilePath );
|
1691
|
|
|
|
|
|
|
|
1692
|
|
|
|
|
|
|
print( "Unable To Read Specified Vector Binary File: \"$vectorBinFilePath\"\n" ) if ( $self->GetDebugLog() == 0 && $readFile == -1 );
|
1693
|
|
|
|
|
|
|
$self->WriteLog( "WSDParseList - Unable To Read Specified Vector Binary File: \"$vectorBinFilePath\"" ) if $readFile == -1;
|
1694
|
|
|
|
|
|
|
return -1 if $readFile == -1;
|
1695
|
|
|
|
|
|
|
}
|
1696
|
|
|
|
|
|
|
elsif( $word2vec->IsVectorDataInMemory() == 1 && defined( $vectorBinFilePath ) )
|
1697
|
|
|
|
|
|
|
{
|
1698
|
|
|
|
|
|
|
print( "Warning: Clearing Previous Vector Data In Memory\n" ) if ( $self->GetDebugLog() == 0 );
|
1699
|
|
|
|
|
|
|
$self->WriteLog("Warning: Clearing Previous Vector Data In Memory" );
|
1700
|
|
|
|
|
|
|
$word2vec->ClearVocabularyHash();
|
1701
|
|
|
|
|
|
|
|
1702
|
|
|
|
|
|
|
print( "Reading Vector File: $vectorBinFilePath\n" ) if ( $self->GetDebugLog() == 0 );
|
1703
|
|
|
|
|
|
|
$self->WriteLog( "WSDParseList - Reading \"Vector Binary\" File: \"$vectorBinFilePath\"" );
|
1704
|
|
|
|
|
|
|
$readFile = $word2vec->ReadTrainedVectorDataFromFile( $vectorBinFilePath );
|
1705
|
|
|
|
|
|
|
|
1706
|
|
|
|
|
|
|
print( "Unable To Read Specified Vector Binary File: \"$vectorBinFilePath\"\n" ) if ( $self->GetDebugLog() == 0 && $readFile == -1 );
|
1707
|
|
|
|
|
|
|
$self->WriteLog( "WSDParseList - Unable To Read Specified Vector Binary File: \"$vectorBinFilePath\"" ) if $readFile == -1;
|
1708
|
|
|
|
|
|
|
return -1 if $readFile == -1;
|
1709
|
|
|
|
|
|
|
}
|
1710
|
|
|
|
|
|
|
else
|
1711
|
|
|
|
|
|
|
{
|
1712
|
|
|
|
|
|
|
print( "Warning: Vector Data Already Exists In Memory - Using Existing Data\n" ) if ( $self->GetDebugLog() == 0 );
|
1713
|
|
|
|
|
|
|
$self->WriteLog( "Warning: Vector Data Already Exists In Memory - Using Existing Data" );
|
1714
|
|
|
|
|
|
|
}
|
1715
|
|
|
|
|
|
|
|
1716
|
|
|
|
|
|
|
print( "Parsing File(s)\n" ) if ( $self->GetDebugLog() == 0 );
|
1717
|
|
|
|
|
|
|
$self->WriteLog( "WSDParseList - Parsing List Of Files" );
|
1718
|
|
|
|
|
|
|
|
1719
|
|
|
|
|
|
|
for my $file ( keys %listOfFiles )
|
1720
|
|
|
|
|
|
|
{
|
1721
|
|
|
|
|
|
|
my $instancesFilePath = $self->GetWorkingDirectory() . "/$file";
|
1722
|
|
|
|
|
|
|
my $sensesFilePath = $self->GetWorkingDirectory() . "/" .$listOfFiles{ $file };
|
1723
|
|
|
|
|
|
|
|
1724
|
|
|
|
|
|
|
# Check(s)
|
1725
|
|
|
|
|
|
|
print( "\"$instancesFilePath\" Cannot Be Found\n" ) if !( -e $instancesFilePath ) && $self->GetDebugLog() == 0;
|
1726
|
|
|
|
|
|
|
print( "\"$sensesFilePath\" Cannot Be Found\n" ) if !( -e $sensesFilePath ) && $self->GetDebugLog() == 0;
|
1727
|
|
|
|
|
|
|
$self->WriteLog( "WSDParseList - Error: \"$instancesFilePath\" Cannot Be Found" ) if !( -e $instancesFilePath );
|
1728
|
|
|
|
|
|
|
$self->WriteLog( "WSDParseList - Error: \"$sensesFilePath\" Cannot Be Found" ) if !( -e $sensesFilePath );
|
1729
|
|
|
|
|
|
|
$self->WriteLog( "WSDParseList - Error: \"$instancesFilePath\" Contains No Data" ) if ( -z $instancesFilePath );
|
1730
|
|
|
|
|
|
|
$self->WriteLog( "WSDParseList - Error: \"$sensesFilePath\" Contains No Data" ) if ( -z $sensesFilePath );
|
1731
|
|
|
|
|
|
|
next if !( -e $instancesFilePath ) || !( -e $sensesFilePath ) || ( -z $instancesFilePath ) || ( -z $sensesFilePath );
|
1732
|
|
|
|
|
|
|
|
1733
|
|
|
|
|
|
|
# Parse "Instances" From File
|
1734
|
|
|
|
|
|
|
my $aryRef = $self->WSDParseFile( $instancesFilePath, $stopListRegex );
|
1735
|
|
|
|
|
|
|
$self->SetInstanceAry( $aryRef ) if defined( $aryRef );
|
1736
|
|
|
|
|
|
|
$self->SetInstanceCount( @{ $aryRef } );
|
1737
|
|
|
|
|
|
|
$self->WriteLog( "WSDParseList - Parsed And Stored ". @{ $aryRef } . " Instances From File." );
|
1738
|
|
|
|
|
|
|
|
1739
|
|
|
|
|
|
|
# Parse "Senses" From File
|
1740
|
|
|
|
|
|
|
$aryRef = $self->WSDParseFile( $sensesFilePath, $stopListRegex );
|
1741
|
|
|
|
|
|
|
$self->SetSenseAry( $aryRef ) if defined( $aryRef );
|
1742
|
|
|
|
|
|
|
$self->SetSenseCount( @{ $aryRef } );
|
1743
|
|
|
|
|
|
|
$self->WriteLog( "WSDParseList - Parsed And Stored " . @{ $aryRef } . " Senses From File." );
|
1744
|
|
|
|
|
|
|
|
1745
|
|
|
|
|
|
|
# Analyze Sense Array For SenseID Mis-Match
|
1746
|
|
|
|
|
|
|
$self->WSDAnalyzeSenseData();
|
1747
|
|
|
|
|
|
|
|
1748
|
|
|
|
|
|
|
# Calculate Cosine Similarity For All Data Entries
|
1749
|
|
|
|
|
|
|
my $success = $self->WSDCalculateCosineAvgSimilarity();
|
1750
|
|
|
|
|
|
|
|
1751
|
|
|
|
|
|
|
$self->WriteLog( "WSDParseList - Error Calculating Cosine Average Similarity / Skipping File" ) if ( $success == -1 );
|
1752
|
|
|
|
|
|
|
next if ( $success == -1 );
|
1753
|
|
|
|
|
|
|
|
1754
|
|
|
|
|
|
|
# Save Results
|
1755
|
|
|
|
|
|
|
$self->WSDSaveResults( $instancesFilePath ) if ( $success == 0 );
|
1756
|
|
|
|
|
|
|
$self->WriteLog( "WSDParseList - Results Saved To File \"$instancesFilePath.results\"" ) if ( $success == 0 );
|
1757
|
|
|
|
|
|
|
|
1758
|
|
|
|
|
|
|
# Clear Old Data
|
1759
|
|
|
|
|
|
|
$instancesFilePath = "";
|
1760
|
|
|
|
|
|
|
$sensesFilePath = "";
|
1761
|
|
|
|
|
|
|
$self->SetInstanceCount( 0 );
|
1762
|
|
|
|
|
|
|
$self->SetSenseCount( 0 );
|
1763
|
|
|
|
|
|
|
$self->ClearInstanceAry();
|
1764
|
|
|
|
|
|
|
$self->ClearSenseAry();
|
1765
|
|
|
|
|
|
|
|
1766
|
|
|
|
|
|
|
}
|
1767
|
|
|
|
|
|
|
|
1768
|
|
|
|
|
|
|
$word2vec->ClearVocabularyHash();
|
1769
|
|
|
|
|
|
|
undef( $word2vec );
|
1770
|
|
|
|
|
|
|
|
1771
|
|
|
|
|
|
|
return 0;
|
1772
|
|
|
|
|
|
|
}
|
1773
|
|
|
|
|
|
|
|
1774
|
|
|
|
|
|
|
sub WSDParseFile
|
1775
|
|
|
|
|
|
|
{
|
1776
|
|
|
|
|
|
|
my ( $self, $filePath, $stopListRegex ) = @_;
|
1777
|
|
|
|
|
|
|
|
1778
|
|
|
|
|
|
|
# Check(s)
|
1779
|
|
|
|
|
|
|
return undef if !defined( $filePath );
|
1780
|
|
|
|
|
|
|
|
1781
|
|
|
|
|
|
|
# Begin file parsing
|
1782
|
|
|
|
|
|
|
print( "Parsing File: $filePath\n" ) if ( $self->GetDebugLog() == 0 );
|
1783
|
|
|
|
|
|
|
$self->WriteLog( "WSDParseFile - Parsing: $filePath" );
|
1784
|
|
|
|
|
|
|
|
1785
|
|
|
|
|
|
|
open( my $fileHandle, "<:", $filePath ) or die "Error: Unable To Read File: $filePath";
|
1786
|
|
|
|
|
|
|
|
1787
|
|
|
|
|
|
|
my $line = <$fileHandle>;
|
1788
|
|
|
|
|
|
|
return undef if ( index( $line, "
|
1789
|
|
|
|
|
|
|
|
1790
|
|
|
|
|
|
|
$line = <$fileHandle>;
|
1791
|
|
|
|
|
|
|
return undef if ( index( $line, "lexelt item=\"" ) == -1 );
|
1792
|
|
|
|
|
|
|
|
1793
|
|
|
|
|
|
|
my @dataAry = ();
|
1794
|
|
|
|
|
|
|
|
1795
|
|
|
|
|
|
|
while( $line = <$fileHandle> )
|
1796
|
|
|
|
|
|
|
{
|
1797
|
|
|
|
|
|
|
chomp( $line );
|
1798
|
|
|
|
|
|
|
#print "$line\n"; # REMOVE ME
|
1799
|
|
|
|
|
|
|
|
1800
|
|
|
|
|
|
|
if( index( $line, "
|
1801
|
|
|
|
|
|
|
{
|
1802
|
|
|
|
|
|
|
my $dataEntry = new WSDData();
|
1803
|
|
|
|
|
|
|
|
1804
|
|
|
|
|
|
|
$line =~ s/
|
1805
|
|
|
|
|
|
|
$line =~ s/\">//g;
|
1806
|
|
|
|
|
|
|
|
1807
|
|
|
|
|
|
|
$dataEntry->instanceID( $line );
|
1808
|
|
|
|
|
|
|
#print "InstanceID: $line\n"; # REMOVE ME
|
1809
|
|
|
|
|
|
|
|
1810
|
|
|
|
|
|
|
# Fetch next line for answer instance and sense id
|
1811
|
|
|
|
|
|
|
$line = <$fileHandle>;
|
1812
|
|
|
|
|
|
|
chomp( $line );
|
1813
|
|
|
|
|
|
|
|
1814
|
|
|
|
|
|
|
if( index( $line, "
|
1815
|
|
|
|
|
|
|
{
|
1816
|
|
|
|
|
|
|
# Set answer instance id
|
1817
|
|
|
|
|
|
|
$line =~ s/
|
1818
|
|
|
|
|
|
|
my $startIndex = 0;
|
1819
|
|
|
|
|
|
|
my $endIndex = index( $line, "\"" );
|
1820
|
|
|
|
|
|
|
$dataEntry->answerInstanceID( substr( $line, $startIndex, $endIndex ) );
|
1821
|
|
|
|
|
|
|
#print "Answer Instance ID: " . substr( $line, $startIndex, $endIndex ) . "\n"; # REMOVE ME
|
1822
|
|
|
|
|
|
|
|
1823
|
|
|
|
|
|
|
# Set sense id
|
1824
|
|
|
|
|
|
|
if( index( $line, "senseid=\"" ) != -1 )
|
1825
|
|
|
|
|
|
|
{
|
1826
|
|
|
|
|
|
|
$startIndex = $endIndex + 1;
|
1827
|
|
|
|
|
|
|
$endIndex = length( $line );
|
1828
|
|
|
|
|
|
|
$line = substr( $line, $startIndex, $endIndex );
|
1829
|
|
|
|
|
|
|
$line =~ s/ +//g;
|
1830
|
|
|
|
|
|
|
$line =~ s/senseid=\"//g;
|
1831
|
|
|
|
|
|
|
$line =~ s/\"\/>//g;
|
1832
|
|
|
|
|
|
|
$dataEntry->senseID( $line );
|
1833
|
|
|
|
|
|
|
#print "SenseID: $line\n"; # REMOVE ME
|
1834
|
|
|
|
|
|
|
}
|
1835
|
|
|
|
|
|
|
}
|
1836
|
|
|
|
|
|
|
|
1837
|
|
|
|
|
|
|
# Fetch next line for context
|
1838
|
|
|
|
|
|
|
$line = <$fileHandle>;
|
1839
|
|
|
|
|
|
|
chomp( $line );
|
1840
|
|
|
|
|
|
|
|
1841
|
|
|
|
|
|
|
if( index( $line, "" ) != -1 )
|
1842
|
|
|
|
|
|
|
{
|
1843
|
|
|
|
|
|
|
# Fetch next line for context data
|
1844
|
|
|
|
|
|
|
$line = <$fileHandle>;
|
1845
|
|
|
|
|
|
|
chomp( $line );
|
1846
|
|
|
|
|
|
|
|
1847
|
|
|
|
|
|
|
while( index( $line, "") == -1 )
|
1848
|
|
|
|
|
|
|
{
|
1849
|
|
|
|
|
|
|
# Normalize text
|
1850
|
|
|
|
|
|
|
$line =~ s///g; # Remove tag
|
1851
|
|
|
|
|
|
|
$line =~ s/<\/head>//g; # Remove tag
|
1852
|
|
|
|
|
|
|
$line = lc( $line ); # Convert all characters to lowercase
|
1853
|
|
|
|
|
|
|
$line =~ s/'s//g; # Remove "'s" characters (Apostrophe 's')
|
1854
|
|
|
|
|
|
|
$line =~ s/-/ /g; # Replace all hyphen characters to spaces
|
1855
|
|
|
|
|
|
|
$line =~ tr/a-z/ /cs; # Remove all characters except a to z
|
1856
|
|
|
|
|
|
|
$line =~ s/$stopListRegex//g if defined( $stopListRegex ); # Remove "stop" words
|
1857
|
|
|
|
|
|
|
$line =~ s/ +/ /g; # Remove duplicate white spaces between words
|
1858
|
|
|
|
|
|
|
|
1859
|
|
|
|
|
|
|
my $context = $dataEntry->contextStr;
|
1860
|
|
|
|
|
|
|
$context .= "$line ";
|
1861
|
|
|
|
|
|
|
$dataEntry->contextStr( $context );
|
1862
|
|
|
|
|
|
|
#print "Normalized Context: $line\n"; # REMOVE ME
|
1863
|
|
|
|
|
|
|
|
1864
|
|
|
|
|
|
|
# Fetch next line for more context data
|
1865
|
|
|
|
|
|
|
$line = <$fileHandle>;
|
1866
|
|
|
|
|
|
|
chomp( $line );
|
1867
|
|
|
|
|
|
|
}
|
1868
|
|
|
|
|
|
|
}
|
1869
|
|
|
|
|
|
|
|
1870
|
|
|
|
|
|
|
# Fetch next line for end of instance data entry
|
1871
|
|
|
|
|
|
|
$line = <$fileHandle>;
|
1872
|
|
|
|
|
|
|
chomp( $line );
|
1873
|
|
|
|
|
|
|
|
1874
|
|
|
|
|
|
|
push( @dataAry, $dataEntry ) if index( $line, "" ) != -1;
|
1875
|
|
|
|
|
|
|
}
|
1876
|
|
|
|
|
|
|
}
|
1877
|
|
|
|
|
|
|
|
1878
|
|
|
|
|
|
|
undef( $fileHandle );
|
1879
|
|
|
|
|
|
|
|
1880
|
|
|
|
|
|
|
return \@dataAry;
|
1881
|
|
|
|
|
|
|
}
|
1882
|
|
|
|
|
|
|
|
1883
|
|
|
|
|
|
|
sub WSDCalculateCosineAvgSimilarity
|
1884
|
|
|
|
|
|
|
{
|
1885
|
|
|
|
|
|
|
my ( $self ) = @_;
|
1886
|
|
|
|
|
|
|
|
1887
|
|
|
|
|
|
|
my @instanceAry = $self->GetInstanceAry();
|
1888
|
|
|
|
|
|
|
my @senseAry = $self->GetSenseAry();
|
1889
|
|
|
|
|
|
|
|
1890
|
|
|
|
|
|
|
# Check(s)
|
1891
|
|
|
|
|
|
|
$self->WriteLog( "WSDCalculateCosineAvgSimilarity - Error: Instance Or Sense Array Equals Zero - Cannot Continue" ) if ( @instanceAry == 0 || @senseAry == 0 );
|
1892
|
|
|
|
|
|
|
return -1 if ( @instanceAry == 0 || @senseAry == 0 );
|
1893
|
|
|
|
|
|
|
|
1894
|
|
|
|
|
|
|
$self->WriteLog( "WSDCalculateCosineAvgSimilarity - Starting Word Sense Disambiguation Computations" );
|
1895
|
|
|
|
|
|
|
|
1896
|
|
|
|
|
|
|
my $word2vec = $self->GetWord2VecHandler();
|
1897
|
|
|
|
|
|
|
|
1898
|
|
|
|
|
|
|
# Calculate best senseID for each instance via cosine similarity of vector average.
|
1899
|
|
|
|
|
|
|
for my $instance ( @instanceAry )
|
1900
|
|
|
|
|
|
|
{
|
1901
|
|
|
|
|
|
|
my $instanceContext = $instance->contextStr;
|
1902
|
|
|
|
|
|
|
my @instanceWordAry = split( ' ', $instanceContext );
|
1903
|
|
|
|
|
|
|
|
1904
|
|
|
|
|
|
|
# Compute vector average for instance->contextStr once and store value in memory to save computational time
|
1905
|
|
|
|
|
|
|
# NOTE: This is not necessary to store the result, since it is only used once.
|
1906
|
|
|
|
|
|
|
# Might have possibly applications in the future releases.
|
1907
|
|
|
|
|
|
|
# Comment out if needed be to save memory during run-time.
|
1908
|
|
|
|
|
|
|
my $resultStr1 = "";
|
1909
|
|
|
|
|
|
|
|
1910
|
|
|
|
|
|
|
if( !defined( $instance->vectorAvgStr ) )
|
1911
|
|
|
|
|
|
|
{
|
1912
|
|
|
|
|
|
|
$self->WriteLog( "WSDCalculateCosineAvgSimilarity - Calculating Vector Average Of Instance: \"" . $instance->instanceID . "\" Context" ) if defined( $instance->instanceID );
|
1913
|
|
|
|
|
|
|
|
1914
|
|
|
|
|
|
|
$resultStr1 = $word2vec->ComputeAverageOfWords( \@instanceWordAry );
|
1915
|
|
|
|
|
|
|
$instance->vectorAvgStr( $resultStr1 ) if defined( $resultStr1 );
|
1916
|
|
|
|
|
|
|
}
|
1917
|
|
|
|
|
|
|
else
|
1918
|
|
|
|
|
|
|
{
|
1919
|
|
|
|
|
|
|
$self->WriteLog( "WSDCalculateCosineAvgSimilarity - Vector Average Of Instance: \"" .$instance->instanceID . "\" Context Previously Computed" ) if defined( $instance->instanceID );
|
1920
|
|
|
|
|
|
|
$resultStr1 = $instance->vectorAvgStr;
|
1921
|
|
|
|
|
|
|
}
|
1922
|
|
|
|
|
|
|
|
1923
|
|
|
|
|
|
|
# Clear Instance Word Array
|
1924
|
|
|
|
|
|
|
undef( @instanceWordAry );
|
1925
|
|
|
|
|
|
|
@instanceWordAry = ();
|
1926
|
|
|
|
|
|
|
|
1927
|
|
|
|
|
|
|
for my $sense ( @senseAry )
|
1928
|
|
|
|
|
|
|
{
|
1929
|
|
|
|
|
|
|
my $senseContext = $sense->contextStr;
|
1930
|
|
|
|
|
|
|
my @senseWordAry = split( ' ', $senseContext );
|
1931
|
|
|
|
|
|
|
|
1932
|
|
|
|
|
|
|
# Compute vector average for sense->contextStr once and store value in memory to save computational time
|
1933
|
|
|
|
|
|
|
my $resultStr2 = "";
|
1934
|
|
|
|
|
|
|
|
1935
|
|
|
|
|
|
|
if( !defined( $sense->vectorAvgStr ) )
|
1936
|
|
|
|
|
|
|
{
|
1937
|
|
|
|
|
|
|
$self->WriteLog( "WSDCalculateCosineAvgSimilarity - Calculating Vector Average Of Sense: \"" . $sense->senseID . "\" Context" ) if defined( $sense->senseID );
|
1938
|
|
|
|
|
|
|
|
1939
|
|
|
|
|
|
|
$resultStr2 = $word2vec->ComputeAverageOfWords( \@senseWordAry ) if !defined( $sense->vectorAvgStr );
|
1940
|
|
|
|
|
|
|
$sense->vectorAvgStr( $resultStr2 ) if defined( $resultStr2 );
|
1941
|
|
|
|
|
|
|
}
|
1942
|
|
|
|
|
|
|
else
|
1943
|
|
|
|
|
|
|
{
|
1944
|
|
|
|
|
|
|
$self->WriteLog( "WSDCalculateCosineAvgSimilarity - Vector Average Of Sense: \"" . $sense->senseID . "\" Context Previously Computed" ) if defined( $sense->senseID );
|
1945
|
|
|
|
|
|
|
$resultStr2 = $sense->vectorAvgStr;
|
1946
|
|
|
|
|
|
|
}
|
1947
|
|
|
|
|
|
|
|
1948
|
|
|
|
|
|
|
# Clear Sense Word Array
|
1949
|
|
|
|
|
|
|
undef( @senseWordAry );
|
1950
|
|
|
|
|
|
|
@senseWordAry = ();
|
1951
|
|
|
|
|
|
|
|
1952
|
|
|
|
|
|
|
# Compute Cosine Similarity Of Average Vectors
|
1953
|
|
|
|
|
|
|
$self->WriteLog( "WSDCalculateCosineAvgSimilarity - Calculating Cosine Similarity Between: \"$instanceContext\" and \"$senseContext\"" );
|
1954
|
|
|
|
|
|
|
my $cosSimValue = $word2vec->ComputeCosineSimilarityOfWordVectors( $resultStr1, $resultStr2 );
|
1955
|
|
|
|
|
|
|
|
1956
|
|
|
|
|
|
|
# Assign First Sense ID To Calculated Sense ID
|
1957
|
|
|
|
|
|
|
if ( !defined( $instance->cosSimValue ) || ( defined( $instance->cosSimValue ) && defined( $cosSimValue ) && $cosSimValue > $instance->cosSimValue ) )
|
1958
|
|
|
|
|
|
|
{
|
1959
|
|
|
|
|
|
|
$self->WriteLog( "WSDCalculateCosineAvgSimilarity - Calculated Cosine Similarity Between Instance and Sense Context Greater Than Current Value" ) if defined( $cosSimValue );
|
1960
|
|
|
|
|
|
|
$self->WriteLog( "WSDCalculateCosineAvgSimilarity - Assigning \"Instance ID: " . $instance->instanceID .
|
1961
|
|
|
|
|
|
|
"\" -> \"Calculated Sense ID: " . $sense->senseID . "\" - \"CosSimValue: " . $cosSimValue . "\"" ) if defined( $cosSimValue );
|
1962
|
|
|
|
|
|
|
|
1963
|
|
|
|
|
|
|
# Only Assign Calculated Sense ID If Cosine Similarity Is Defined
|
1964
|
|
|
|
|
|
|
$instance->calculatedSenseID( $sense->senseID ) if defined( $cosSimValue );
|
1965
|
|
|
|
|
|
|
$instance->calculatedSenseID( "undef" ) if !defined( $cosSimValue );
|
1966
|
|
|
|
|
|
|
$instance->cosSimValue( $cosSimValue );
|
1967
|
|
|
|
|
|
|
}
|
1968
|
|
|
|
|
|
|
elsif( defined( $instance->cosSimValue ) && ( defined( $cosSimValue ) && $cosSimValue <= $instance->cosSimValue ) )
|
1969
|
|
|
|
|
|
|
{
|
1970
|
|
|
|
|
|
|
$self->WriteLog( "WSDCalculateCosineAvgSimilarity - Calculated Cosine Similarity Between Instance and Sense Context Less Than Or Equal To Current Value" .
|
1971
|
|
|
|
|
|
|
" - \"CosSimValue: " . $cosSimValue . "\"" ) if defined( $cosSimValue );
|
1972
|
|
|
|
|
|
|
}
|
1973
|
|
|
|
|
|
|
|
1974
|
|
|
|
|
|
|
# Clear Sense Context Average Cosine Similarity Vector
|
1975
|
|
|
|
|
|
|
$resultStr2 = "";
|
1976
|
|
|
|
|
|
|
}
|
1977
|
|
|
|
|
|
|
|
1978
|
|
|
|
|
|
|
# Clear Instance Context Average Cosine Similarity Vector
|
1979
|
|
|
|
|
|
|
$resultStr1 = "";
|
1980
|
|
|
|
|
|
|
}
|
1981
|
|
|
|
|
|
|
|
1982
|
|
|
|
|
|
|
$self->WriteLog( "WSDCalculateCosineAvgSimilarity - Complete" );
|
1983
|
|
|
|
|
|
|
|
1984
|
|
|
|
|
|
|
return 0;
|
1985
|
|
|
|
|
|
|
}
|
1986
|
|
|
|
|
|
|
|
1987
|
|
|
|
|
|
|
sub WSDCalculateAccuracy
|
1988
|
|
|
|
|
|
|
{
|
1989
|
|
|
|
|
|
|
my ( $self ) = @_;
|
1990
|
|
|
|
|
|
|
|
1991
|
|
|
|
|
|
|
my @instanceAry = $self->GetInstanceAry();
|
1992
|
|
|
|
|
|
|
|
1993
|
|
|
|
|
|
|
# Check(s)
|
1994
|
|
|
|
|
|
|
return -1 if @instanceAry == 0;
|
1995
|
|
|
|
|
|
|
|
1996
|
|
|
|
|
|
|
my $numberCorrect = 0;
|
1997
|
|
|
|
|
|
|
|
1998
|
|
|
|
|
|
|
for my $instance ( @instanceAry )
|
1999
|
|
|
|
|
|
|
{
|
2000
|
|
|
|
|
|
|
$numberCorrect++ if $instance->calculatedSenseID eq $instance->senseID;
|
2001
|
|
|
|
|
|
|
}
|
2002
|
|
|
|
|
|
|
|
2003
|
|
|
|
|
|
|
return $numberCorrect / @instanceAry;
|
2004
|
|
|
|
|
|
|
}
|
2005
|
|
|
|
|
|
|
|
2006
|
|
|
|
|
|
|
sub WSDPrintResults
|
2007
|
|
|
|
|
|
|
{
|
2008
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2009
|
|
|
|
|
|
|
|
2010
|
|
|
|
|
|
|
my $percentCorrect = CalculateAccuracy();
|
2011
|
|
|
|
|
|
|
|
2012
|
|
|
|
|
|
|
$self->WriteLog( "Accuracy: $percentCorrect" );
|
2013
|
|
|
|
|
|
|
|
2014
|
|
|
|
|
|
|
my @instanceAry = $self->GetInstanceAry();
|
2015
|
|
|
|
|
|
|
|
2016
|
|
|
|
|
|
|
for my $instance ( @instanceAry )
|
2017
|
|
|
|
|
|
|
{
|
2018
|
|
|
|
|
|
|
$self->WriteLog( "InstanceID: " . $instance->instanceID );
|
2019
|
|
|
|
|
|
|
$self->WriteLog( " - Assigned SenseID: " . $instance->senseID );
|
2020
|
|
|
|
|
|
|
$self->WriteLog( " - Calculated SenseID: " . $instance->calculatedSenseID );
|
2021
|
|
|
|
|
|
|
$self->WriteLog( " - CosSim: " . $instance->cosSimValue ) if defined( $instance->cosSimValue );
|
2022
|
|
|
|
|
|
|
$self->WriteLog( " - CosSim: undef " ) if !defined( $instance->cosSimValue );
|
2023
|
|
|
|
|
|
|
$self->WriteLog( "" );
|
2024
|
|
|
|
|
|
|
}
|
2025
|
|
|
|
|
|
|
}
|
2026
|
|
|
|
|
|
|
|
2027
|
|
|
|
|
|
|
sub WSDSaveResults
|
2028
|
|
|
|
|
|
|
{
|
2029
|
|
|
|
|
|
|
my ( $self, $instancesFilePath ) = @_;
|
2030
|
|
|
|
|
|
|
|
2031
|
|
|
|
|
|
|
open( my $fileHandle, ">:encoding(utf8)", "$instancesFilePath.results.txt" ) or die "Error: Unable to create save file\n";
|
2032
|
|
|
|
|
|
|
|
2033
|
|
|
|
|
|
|
my $percentCorrect = $self->WSDCalculateAccuracy();
|
2034
|
|
|
|
|
|
|
|
2035
|
|
|
|
|
|
|
print $fileHandle "Accuracy: $percentCorrect\n";
|
2036
|
|
|
|
|
|
|
|
2037
|
|
|
|
|
|
|
my @instanceAry = $self->GetInstanceAry();
|
2038
|
|
|
|
|
|
|
|
2039
|
|
|
|
|
|
|
for my $instance ( @instanceAry )
|
2040
|
|
|
|
|
|
|
{
|
2041
|
|
|
|
|
|
|
print $fileHandle "InstanceID: " . $instance->instanceID;
|
2042
|
|
|
|
|
|
|
print $fileHandle " - Assigned SenseID: " . $instance->senseID;
|
2043
|
|
|
|
|
|
|
print $fileHandle " - Calculated SenseID: " . $instance->calculatedSenseID;
|
2044
|
|
|
|
|
|
|
print $fileHandle " - CosSim: " . $instance->cosSimValue if defined( $instance->cosSimValue );
|
2045
|
|
|
|
|
|
|
print $fileHandle " - CosSim: undef" if !defined( $instance->cosSimValue );
|
2046
|
|
|
|
|
|
|
print $fileHandle "\n";
|
2047
|
|
|
|
|
|
|
}
|
2048
|
|
|
|
|
|
|
|
2049
|
|
|
|
|
|
|
close( $fileHandle );
|
2050
|
|
|
|
|
|
|
}
|
2051
|
|
|
|
|
|
|
|
2052
|
|
|
|
|
|
|
sub WSDGenerateAccuracyReport
|
2053
|
|
|
|
|
|
|
{
|
2054
|
|
|
|
|
|
|
my ( $self, $workingDir ) = @_;
|
2055
|
|
|
|
|
|
|
|
2056
|
|
|
|
|
|
|
# Check(s)
|
2057
|
|
|
|
|
|
|
$self->WriteLog( "WSDGenerateAccuracyReport - Working Directory Does Not Exist" ) if !( -e $workingDir );
|
2058
|
|
|
|
|
|
|
return -1 if !( -e $workingDir );
|
2059
|
|
|
|
|
|
|
|
2060
|
|
|
|
|
|
|
# Read File Name(s) From Specified Directory
|
2061
|
|
|
|
|
|
|
$self->WriteLog( "WSDGenerateAccuracyReport - Working Directory: $workingDir" );
|
2062
|
|
|
|
|
|
|
|
2063
|
|
|
|
|
|
|
my @filesToParse = ();
|
2064
|
|
|
|
|
|
|
|
2065
|
|
|
|
|
|
|
opendir( my $dirHandle, $workingDir ) or die "Error: Opening working directory\n";
|
2066
|
|
|
|
|
|
|
|
2067
|
|
|
|
|
|
|
for my $file ( readdir( $dirHandle ) )
|
2068
|
|
|
|
|
|
|
{
|
2069
|
|
|
|
|
|
|
push( @filesToParse, $file ) if ( index( $file, ".results" ) != -1 );
|
2070
|
|
|
|
|
|
|
}
|
2071
|
|
|
|
|
|
|
|
2072
|
|
|
|
|
|
|
close( $dirHandle );
|
2073
|
|
|
|
|
|
|
undef( $dirHandle );
|
2074
|
|
|
|
|
|
|
|
2075
|
|
|
|
|
|
|
|
2076
|
|
|
|
|
|
|
# Check(s)
|
2077
|
|
|
|
|
|
|
$self->WriteLog( "WSDGenerateAccuracyReport - Warning: No Results Files Found") if ( @filesToParse == 0 );
|
2078
|
|
|
|
|
|
|
return if ( @filesToParse == 0 );
|
2079
|
|
|
|
|
|
|
|
2080
|
|
|
|
|
|
|
$self->WriteLog( "WSDGenerateAccuracyReport - Fetching Results From Files" ) if ( @filesToParse != 0 );
|
2081
|
|
|
|
|
|
|
|
2082
|
|
|
|
|
|
|
my @resultAry = ();
|
2083
|
|
|
|
|
|
|
|
2084
|
|
|
|
|
|
|
# Fetch accuracy results from each file
|
2085
|
|
|
|
|
|
|
for my $resultFile ( @filesToParse )
|
2086
|
|
|
|
|
|
|
{
|
2087
|
|
|
|
|
|
|
open( my $tempHandle, "<:encoding(utf8)", "$workingDir/$resultFile" ) or die "Error opening: $resultFile\n";
|
2088
|
|
|
|
|
|
|
|
2089
|
|
|
|
|
|
|
while( my $line = <$tempHandle> )
|
2090
|
|
|
|
|
|
|
{
|
2091
|
|
|
|
|
|
|
chomp( $line );
|
2092
|
|
|
|
|
|
|
|
2093
|
|
|
|
|
|
|
if( index( $line, "Accuracy:" ) != -1 )
|
2094
|
|
|
|
|
|
|
{
|
2095
|
|
|
|
|
|
|
my $endIndex = index( $resultFile, ".results" );
|
2096
|
|
|
|
|
|
|
$resultFile = substr( $resultFile, 0, $endIndex );
|
2097
|
|
|
|
|
|
|
push( @resultAry, "$resultFile : $line" );
|
2098
|
|
|
|
|
|
|
last;
|
2099
|
|
|
|
|
|
|
}
|
2100
|
|
|
|
|
|
|
}
|
2101
|
|
|
|
|
|
|
|
2102
|
|
|
|
|
|
|
close( $tempHandle );
|
2103
|
|
|
|
|
|
|
undef( $tempHandle );
|
2104
|
|
|
|
|
|
|
}
|
2105
|
|
|
|
|
|
|
|
2106
|
|
|
|
|
|
|
$self->WriteLog( "WSDGenerateAccuracyReport - Done fetching results" ) if ( @filesToParse != 0 );
|
2107
|
|
|
|
|
|
|
$self->WriteLog( "WSDGenerateAccuracyReport - Saving data to file: \"AccuracyReport.txt\"" ) if ( @filesToParse != 0 );
|
2108
|
|
|
|
|
|
|
|
2109
|
|
|
|
|
|
|
# Save all results in file "AccuracyResults.txt"
|
2110
|
|
|
|
|
|
|
open( my $fileHandle, ">:encoding(utf8)", "$workingDir/AccuracyReport.txt" ) or die "Error creating file: \"AccuracyReport.txt\"\n";
|
2111
|
|
|
|
|
|
|
|
2112
|
|
|
|
|
|
|
@resultAry = sort( @resultAry );
|
2113
|
|
|
|
|
|
|
|
2114
|
|
|
|
|
|
|
for my $result ( @resultAry )
|
2115
|
|
|
|
|
|
|
{
|
2116
|
|
|
|
|
|
|
print $fileHandle $result . "\n";
|
2117
|
|
|
|
|
|
|
}
|
2118
|
|
|
|
|
|
|
|
2119
|
|
|
|
|
|
|
close( $fileHandle );
|
2120
|
|
|
|
|
|
|
undef( $fileHandle );
|
2121
|
|
|
|
|
|
|
|
2122
|
|
|
|
|
|
|
$self->WriteLog( "WSDGenerateAccuracyReport - Data saved" ) if ( @filesToParse != 0 );
|
2123
|
|
|
|
|
|
|
}
|
2124
|
|
|
|
|
|
|
|
2125
|
|
|
|
|
|
|
# Not my own code
|
2126
|
|
|
|
|
|
|
sub _WSDStop
|
2127
|
|
|
|
|
|
|
{
|
2128
|
|
|
|
|
|
|
my ( $self, $stopListFilePath ) = @_;
|
2129
|
|
|
|
|
|
|
|
2130
|
|
|
|
|
|
|
$self->WriteLog( "_WSDStop - Reading Stop List Path: \"$stopListFilePath\"" );
|
2131
|
|
|
|
|
|
|
|
2132
|
|
|
|
|
|
|
# Check(s)
|
2133
|
|
|
|
|
|
|
$self->WriteLog( "_WSDStop - Error: Stop List File Path Not Defined" ) if !defined( $stopListFilePath );
|
2134
|
|
|
|
|
|
|
return undef if !defined( $stopListFilePath );
|
2135
|
|
|
|
|
|
|
|
2136
|
|
|
|
|
|
|
$self->WriteLog( "_WSDStop - Error: Stop List File Path Does Not Exist" ) if !( -e $stopListFilePath );
|
2137
|
|
|
|
|
|
|
return undef if !( -e $stopListFilePath );
|
2138
|
|
|
|
|
|
|
|
2139
|
|
|
|
|
|
|
my $stop_regex = "";
|
2140
|
|
|
|
|
|
|
my $stop_mode = "AND";
|
2141
|
|
|
|
|
|
|
|
2142
|
|
|
|
|
|
|
open ( STP, $stopListFilePath ) || die ( "Error: Couldn't Open The Stoplist File: $stopListFilePath\n" );
|
2143
|
|
|
|
|
|
|
|
2144
|
|
|
|
|
|
|
while ( ) {
|
2145
|
|
|
|
|
|
|
chomp;
|
2146
|
|
|
|
|
|
|
|
2147
|
|
|
|
|
|
|
if(/\@stop.mode\s*=\s*(\w+)\s*$/) {
|
2148
|
|
|
|
|
|
|
$stop_mode=$1;
|
2149
|
|
|
|
|
|
|
if(!($stop_mode=~/^(AND|and|OR|or)$/)) {
|
2150
|
|
|
|
|
|
|
print STDERR "Requested Stop Mode $1 is not supported.\n";
|
2151
|
|
|
|
|
|
|
exit;
|
2152
|
|
|
|
|
|
|
}
|
2153
|
|
|
|
|
|
|
next;
|
2154
|
|
|
|
|
|
|
}
|
2155
|
|
|
|
|
|
|
|
2156
|
|
|
|
|
|
|
# accepting Perl Regexs from Stopfile
|
2157
|
|
|
|
|
|
|
s/^\s+//;
|
2158
|
|
|
|
|
|
|
s/\s+$//;
|
2159
|
|
|
|
|
|
|
|
2160
|
|
|
|
|
|
|
#handling a blank lines
|
2161
|
|
|
|
|
|
|
if(/^\s*$/) { next; }
|
2162
|
|
|
|
|
|
|
|
2163
|
|
|
|
|
|
|
#check if a valid Perl Regex
|
2164
|
|
|
|
|
|
|
if(!(/^\//)) {
|
2165
|
|
|
|
|
|
|
print STDERR "Stop token regular expression <$_> should start with '/'\n";
|
2166
|
|
|
|
|
|
|
exit;
|
2167
|
|
|
|
|
|
|
}
|
2168
|
|
|
|
|
|
|
if(!(/\/$/)) {
|
2169
|
|
|
|
|
|
|
print STDERR "Stop token regular expression <$_> should end with '/'\n";
|
2170
|
|
|
|
|
|
|
exit;
|
2171
|
|
|
|
|
|
|
}
|
2172
|
|
|
|
|
|
|
|
2173
|
|
|
|
|
|
|
#remove the / s from beginning and end
|
2174
|
|
|
|
|
|
|
s/^\///;
|
2175
|
|
|
|
|
|
|
s/\/$//;
|
2176
|
|
|
|
|
|
|
|
2177
|
|
|
|
|
|
|
#form a single big regex
|
2178
|
|
|
|
|
|
|
$stop_regex.="(".$_.")|";
|
2179
|
|
|
|
|
|
|
}
|
2180
|
|
|
|
|
|
|
|
2181
|
|
|
|
|
|
|
if(length($stop_regex)<=0) {
|
2182
|
|
|
|
|
|
|
print STDERR "No valid Perl Regular Experssion found in Stop file $stopListFilePath";
|
2183
|
|
|
|
|
|
|
exit;
|
2184
|
|
|
|
|
|
|
}
|
2185
|
|
|
|
|
|
|
|
2186
|
|
|
|
|
|
|
chop $stop_regex;
|
2187
|
|
|
|
|
|
|
|
2188
|
|
|
|
|
|
|
# making AND a default stop mode
|
2189
|
|
|
|
|
|
|
if(!defined $stop_mode) {
|
2190
|
|
|
|
|
|
|
$stop_mode="AND";
|
2191
|
|
|
|
|
|
|
}
|
2192
|
|
|
|
|
|
|
|
2193
|
|
|
|
|
|
|
close STP;
|
2194
|
|
|
|
|
|
|
|
2195
|
|
|
|
|
|
|
return $stop_regex;
|
2196
|
|
|
|
|
|
|
}
|
2197
|
|
|
|
|
|
|
|
2198
|
|
|
|
|
|
|
sub ConvertStringLineEndingsToTargetOS
|
2199
|
|
|
|
|
|
|
{
|
2200
|
|
|
|
|
|
|
my ( $self, $str ) = @_;
|
2201
|
|
|
|
|
|
|
|
2202
|
|
|
|
|
|
|
# Check(s)
|
2203
|
|
|
|
|
|
|
$self->WriteLog( "ConvertLineEndingToTargetOS - Error: String Parameter Is Undefined" ) if ( $str eq "" );
|
2204
|
|
|
|
|
|
|
return undef if !defined( $str );
|
2205
|
|
|
|
|
|
|
|
2206
|
|
|
|
|
|
|
$self->WriteLog( "ConvertLineEndingToTargetOS - Warning: Cannot Convert Empty String" ) if ( $str eq "" );
|
2207
|
|
|
|
|
|
|
return "" if ( $str eq "" );
|
2208
|
|
|
|
|
|
|
|
2209
|
|
|
|
|
|
|
# Convert String Line Ending Suitable To The Target
|
2210
|
|
|
|
|
|
|
my $lineEnding = "";
|
2211
|
|
|
|
|
|
|
my $os = "linux";
|
2212
|
|
|
|
|
|
|
|
2213
|
|
|
|
|
|
|
$lineEnding = "\015\012" if ( $os eq "MSWin32" );
|
2214
|
|
|
|
|
|
|
$lineEnding = "\012" if ( $os eq "linux" );
|
2215
|
|
|
|
|
|
|
$lineEnding = "\015" if ( $os eq "MacOS" );
|
2216
|
|
|
|
|
|
|
|
2217
|
|
|
|
|
|
|
$str =~ s/(\015\012|\012|\015)/($lineEnding)/g;
|
2218
|
|
|
|
|
|
|
|
2219
|
|
|
|
|
|
|
# Removes Spaces At Both Ends Of String And More Than Once Space In-Between Ends
|
2220
|
|
|
|
|
|
|
$str =~ s/^\s+|\s(?=\s)|\s+$//g;
|
2221
|
|
|
|
|
|
|
|
2222
|
|
|
|
|
|
|
return $str;
|
2223
|
|
|
|
|
|
|
}
|
2224
|
|
|
|
|
|
|
|
2225
|
|
|
|
|
|
|
######################################################################################
|
2226
|
|
|
|
|
|
|
# Accessors
|
2227
|
|
|
|
|
|
|
######################################################################################
|
2228
|
|
|
|
|
|
|
|
2229
|
|
|
|
|
|
|
sub GetWord2VecDir
|
2230
|
|
|
|
|
|
|
{
|
2231
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2232
|
|
|
|
|
|
|
$self->{ _word2vecDir } = "" if !defined ( $self->{ _word2vecDir } );
|
2233
|
|
|
|
|
|
|
return $self->{ _word2vecDir };
|
2234
|
|
|
|
|
|
|
}
|
2235
|
|
|
|
|
|
|
|
2236
|
|
|
|
|
|
|
sub GetDebugLog
|
2237
|
|
|
|
|
|
|
{
|
2238
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2239
|
|
|
|
|
|
|
$self->{ _debugLog } = 0 if !defined ( $self->{ _debugLog } );
|
2240
|
|
|
|
|
|
|
return $self->{ _debugLog };
|
2241
|
|
|
|
|
|
|
}
|
2242
|
|
|
|
|
|
|
|
2243
|
|
|
|
|
|
|
sub GetWriteLog
|
2244
|
|
|
|
|
|
|
{
|
2245
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2246
|
|
|
|
|
|
|
$self->{ _writeLog } = 0 if !defined ( $self->{ _writeLog } );
|
2247
|
|
|
|
|
|
|
return $self->{ _writeLog };
|
2248
|
|
|
|
|
|
|
}
|
2249
|
|
|
|
|
|
|
|
2250
|
|
|
|
|
|
|
sub GetIgnoreCompileErrors
|
2251
|
|
|
|
|
|
|
{
|
2252
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2253
|
|
|
|
|
|
|
$self->{ _ignoreCompileErrors } = 0 if !defined ( $self->{ _ignoreCompileErrors } );
|
2254
|
|
|
|
|
|
|
return $self->{ _ignoreCompileErrors };
|
2255
|
|
|
|
|
|
|
}
|
2256
|
|
|
|
|
|
|
|
2257
|
|
|
|
|
|
|
sub GetIgnoreFileChecks
|
2258
|
|
|
|
|
|
|
{
|
2259
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2260
|
|
|
|
|
|
|
$self->{ _ignoreFileChecks } = 0 if !defined ( $self->{ _ignoreFileChecks } );
|
2261
|
|
|
|
|
|
|
return $self->{ _ignoreFileChecks };
|
2262
|
|
|
|
|
|
|
}
|
2263
|
|
|
|
|
|
|
|
2264
|
|
|
|
|
|
|
sub GetExitFlag
|
2265
|
|
|
|
|
|
|
{
|
2266
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2267
|
|
|
|
|
|
|
$self->{ _exitFlag } = 0 if !defined ( $self->{ _exitFlag } );
|
2268
|
|
|
|
|
|
|
return $self->{ _exitFlag };
|
2269
|
|
|
|
|
|
|
}
|
2270
|
|
|
|
|
|
|
|
2271
|
|
|
|
|
|
|
sub GetFileHandle
|
2272
|
|
|
|
|
|
|
{
|
2273
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2274
|
|
|
|
|
|
|
|
2275
|
|
|
|
|
|
|
# Setup File Handle If Not Already Defined
|
2276
|
|
|
|
|
|
|
if( !defined( $self->{ _fileHandle } ) && $self->{ _writeLog } )
|
2277
|
|
|
|
|
|
|
{
|
2278
|
|
|
|
|
|
|
open( $self->{ _fileHandle }, '>:utf8', 'InterfaceLog.txt' );
|
2279
|
|
|
|
|
|
|
$self->{ _fileHandle }->autoflush( 1 ); # Auto-flushes writes to log file
|
2280
|
|
|
|
|
|
|
}
|
2281
|
|
|
|
|
|
|
|
2282
|
|
|
|
|
|
|
return $self->{ _fileHandle };
|
2283
|
|
|
|
|
|
|
}
|
2284
|
|
|
|
|
|
|
|
2285
|
|
|
|
|
|
|
sub GetWorkingDirectory
|
2286
|
|
|
|
|
|
|
{
|
2287
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2288
|
|
|
|
|
|
|
$self->{ _workingDir } = Cwd::getcwd() if !defined ( $self->{ _workingDir } );
|
2289
|
|
|
|
|
|
|
return $self->{ _workingDir };
|
2290
|
|
|
|
|
|
|
}
|
2291
|
|
|
|
|
|
|
|
2292
|
|
|
|
|
|
|
sub GetWord2VecHandler
|
2293
|
|
|
|
|
|
|
{
|
2294
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2295
|
|
|
|
|
|
|
my $debugLog = $self->GetDebugLog();
|
2296
|
|
|
|
|
|
|
my $writeLog = $self->GetWriteLog();
|
2297
|
|
|
|
|
|
|
$self->{ _word2vec } = new Word2vec::Word2vec( $debugLog, $writeLog ) if !defined ( $self->{ _word2vec } );
|
2298
|
|
|
|
|
|
|
return $self->{ _word2vec };
|
2299
|
|
|
|
|
|
|
}
|
2300
|
|
|
|
|
|
|
|
2301
|
|
|
|
|
|
|
sub GetWord2PhraseHandler
|
2302
|
|
|
|
|
|
|
{
|
2303
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2304
|
|
|
|
|
|
|
my $debugLog = $self->GetDebugLog();
|
2305
|
|
|
|
|
|
|
my $writeLog = $self->GetWriteLog();
|
2306
|
|
|
|
|
|
|
$self->{ _word2phrase } = new Word2vec::Word2phrase( $debugLog, $writeLog ) if !defined ( $self->{ _word2phrase } );
|
2307
|
|
|
|
|
|
|
return $self->{ _word2phrase };
|
2308
|
|
|
|
|
|
|
}
|
2309
|
|
|
|
|
|
|
|
2310
|
|
|
|
|
|
|
sub GetXMLToW2VHandler
|
2311
|
|
|
|
|
|
|
{
|
2312
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2313
|
|
|
|
|
|
|
my $debugLog = $self->GetDebugLog();
|
2314
|
|
|
|
|
|
|
my $writeLog = $self->GetWriteLog();
|
2315
|
|
|
|
|
|
|
$self->{ _xmltow2v } = new Word2vec::Xmltow2v( $debugLog, $writeLog, 1, 1, 1, 1, 2 ) if !defined ( $self->{ _xmltow2v } );
|
2316
|
|
|
|
|
|
|
return $self->{ _xmltow2v };
|
2317
|
|
|
|
|
|
|
}
|
2318
|
|
|
|
|
|
|
|
2319
|
|
|
|
|
|
|
sub GetUtilHandler
|
2320
|
|
|
|
|
|
|
{
|
2321
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2322
|
|
|
|
|
|
|
my $debugLog = $self->GetDebugLog();
|
2323
|
|
|
|
|
|
|
my $writeLog = $self->GetWriteLog();
|
2324
|
|
|
|
|
|
|
$self->{ _util } = new Word2vec::Util( $debugLog, $writeLog ) if !defined ( $self->{ _util } );
|
2325
|
|
|
|
|
|
|
return $self->{ _util };
|
2326
|
|
|
|
|
|
|
}
|
2327
|
|
|
|
|
|
|
|
2328
|
|
|
|
|
|
|
sub GetInstanceAry
|
2329
|
|
|
|
|
|
|
{
|
2330
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2331
|
|
|
|
|
|
|
@{ $self->{ _instanceAry } } = () if !defined( $self->{ _instanceAry } );
|
2332
|
|
|
|
|
|
|
return @{ $self->{ _instanceAry } };
|
2333
|
|
|
|
|
|
|
}
|
2334
|
|
|
|
|
|
|
|
2335
|
|
|
|
|
|
|
sub GetSenseAry
|
2336
|
|
|
|
|
|
|
{
|
2337
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2338
|
|
|
|
|
|
|
@{ $self->{ _senseAry } } = () if !defined( $self->{ _senseAry } );
|
2339
|
|
|
|
|
|
|
return @{ $self->{ _senseAry } };
|
2340
|
|
|
|
|
|
|
}
|
2341
|
|
|
|
|
|
|
|
2342
|
|
|
|
|
|
|
sub GetInstanceCount
|
2343
|
|
|
|
|
|
|
{
|
2344
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2345
|
|
|
|
|
|
|
$self->{ _instanceCount } = 0 if !defined( $self->{ _instanceCount } );
|
2346
|
|
|
|
|
|
|
return $self->{ _instanceCount };
|
2347
|
|
|
|
|
|
|
}
|
2348
|
|
|
|
|
|
|
|
2349
|
|
|
|
|
|
|
sub GetSenseCount
|
2350
|
|
|
|
|
|
|
{
|
2351
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2352
|
|
|
|
|
|
|
$self->{ _senseCount } = 0 if !defined( $self->{ _senseCount } );
|
2353
|
|
|
|
|
|
|
return $self->{ _senseCount };
|
2354
|
|
|
|
|
|
|
}
|
2355
|
|
|
|
|
|
|
|
2356
|
|
|
|
|
|
|
|
2357
|
|
|
|
|
|
|
######################################################################################
|
2358
|
|
|
|
|
|
|
# Mutators
|
2359
|
|
|
|
|
|
|
######################################################################################
|
2360
|
|
|
|
|
|
|
|
2361
|
|
|
|
|
|
|
sub SetWord2VecDir
|
2362
|
|
|
|
|
|
|
{
|
2363
|
|
|
|
|
|
|
my ( $self, $dir ) = @_;
|
2364
|
|
|
|
|
|
|
|
2365
|
|
|
|
|
|
|
$self->WriteLog( "SetWord2VecDir - Changing Word2Vec Executable Directory To $dir" ) if defined( $dir );
|
2366
|
|
|
|
|
|
|
$self->WriteLog( "SetWord2VecDir - Adjusting For \"word2vec\" And \"word2phrase\" Objects" ) if defined( $dir );
|
2367
|
|
|
|
|
|
|
|
2368
|
|
|
|
|
|
|
# Set word2vec Directory In Respective Objects
|
2369
|
|
|
|
|
|
|
$self->W2VSetWord2VecExeDir( $dir ) if defined( $dir );;
|
2370
|
|
|
|
|
|
|
$self->W2PSetWord2PhraseExeDir( $dir ) if defined( $dir );;
|
2371
|
|
|
|
|
|
|
|
2372
|
|
|
|
|
|
|
return $self->{ _word2vecDir } = $dir if defined( $dir );
|
2373
|
|
|
|
|
|
|
}
|
2374
|
|
|
|
|
|
|
|
2375
|
|
|
|
|
|
|
sub SetDebugLog
|
2376
|
|
|
|
|
|
|
{
|
2377
|
|
|
|
|
|
|
my ( $self, $temp ) = @_;
|
2378
|
|
|
|
|
|
|
return $self->{ _debugLog } = $temp if defined( $temp );
|
2379
|
|
|
|
|
|
|
}
|
2380
|
|
|
|
|
|
|
|
2381
|
|
|
|
|
|
|
sub SetWriteLog
|
2382
|
|
|
|
|
|
|
{
|
2383
|
|
|
|
|
|
|
my ( $self, $temp ) = @_;
|
2384
|
|
|
|
|
|
|
return $self->{ _writeLog } = $temp if defined( $temp );
|
2385
|
|
|
|
|
|
|
}
|
2386
|
|
|
|
|
|
|
|
2387
|
|
|
|
|
|
|
# Note: Useless Sub-routines - Remove Me
|
2388
|
|
|
|
|
|
|
sub SetIgnoreCompileErrors
|
2389
|
|
|
|
|
|
|
{
|
2390
|
|
|
|
|
|
|
my ( $self, $temp ) = @_;
|
2391
|
|
|
|
|
|
|
return $self->{ _ignoreCompileErrors } = $temp if defined( $temp );
|
2392
|
|
|
|
|
|
|
}
|
2393
|
|
|
|
|
|
|
|
2394
|
|
|
|
|
|
|
# Note: Useless Sub-routines - Remove Me
|
2395
|
|
|
|
|
|
|
sub SetIgnoreFileCheckErrors
|
2396
|
|
|
|
|
|
|
{
|
2397
|
|
|
|
|
|
|
my ( $self, $temp ) = @_;
|
2398
|
|
|
|
|
|
|
return $self->{ _ignoreFileChecks } = $temp if defined( $temp );
|
2399
|
|
|
|
|
|
|
}
|
2400
|
|
|
|
|
|
|
|
2401
|
|
|
|
|
|
|
sub SetWorkingDirectory
|
2402
|
|
|
|
|
|
|
{
|
2403
|
|
|
|
|
|
|
my ( $self, $temp ) = @_;
|
2404
|
|
|
|
|
|
|
$self->WriteLog( "SetWorkingDirectory - Directory Changed From: \"" . $self->{ _workingDir } . "\ To \"$temp\"" )
|
2405
|
|
|
|
|
|
|
if defined( $self->{ _workingDir } ) && defined( $temp );
|
2406
|
|
|
|
|
|
|
return $self->{ _workingDir } = $temp if defined( $temp );
|
2407
|
|
|
|
|
|
|
}
|
2408
|
|
|
|
|
|
|
|
2409
|
|
|
|
|
|
|
sub SetInstanceAry
|
2410
|
|
|
|
|
|
|
{
|
2411
|
|
|
|
|
|
|
my ( $self, $aryRef ) = @_;
|
2412
|
|
|
|
|
|
|
return @{ $self->{ _instanceAry } } = @{ $aryRef } if defined( $aryRef );
|
2413
|
|
|
|
|
|
|
}
|
2414
|
|
|
|
|
|
|
|
2415
|
|
|
|
|
|
|
sub ClearInstanceAry
|
2416
|
|
|
|
|
|
|
{
|
2417
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2418
|
|
|
|
|
|
|
undef( @{ $self->{ _instanceAry } } );
|
2419
|
|
|
|
|
|
|
return @{ $self->{ _instanceAry } } = ();
|
2420
|
|
|
|
|
|
|
}
|
2421
|
|
|
|
|
|
|
|
2422
|
|
|
|
|
|
|
sub SetSenseAry
|
2423
|
|
|
|
|
|
|
{
|
2424
|
|
|
|
|
|
|
my ( $self, $aryRef ) = @_;
|
2425
|
|
|
|
|
|
|
return @{ $self->{ _senseAry } } = @{ $aryRef } if defined( $aryRef );
|
2426
|
|
|
|
|
|
|
}
|
2427
|
|
|
|
|
|
|
|
2428
|
|
|
|
|
|
|
sub ClearSenseAry
|
2429
|
|
|
|
|
|
|
{
|
2430
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2431
|
|
|
|
|
|
|
undef( @{ $self->{ _senseAry } } );
|
2432
|
|
|
|
|
|
|
return @{ $self->{ _senseAry } } = ();
|
2433
|
|
|
|
|
|
|
}
|
2434
|
|
|
|
|
|
|
|
2435
|
|
|
|
|
|
|
sub SetInstanceCount
|
2436
|
|
|
|
|
|
|
{
|
2437
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
2438
|
|
|
|
|
|
|
return $self->{ _instanceCount } = $value if defined( $value );
|
2439
|
|
|
|
|
|
|
}
|
2440
|
|
|
|
|
|
|
|
2441
|
|
|
|
|
|
|
sub SetSenseCount
|
2442
|
|
|
|
|
|
|
{
|
2443
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
2444
|
|
|
|
|
|
|
return $self->{ _senseCount } = $value if defined( $value );
|
2445
|
|
|
|
|
|
|
}
|
2446
|
|
|
|
|
|
|
|
2447
|
|
|
|
|
|
|
|
2448
|
|
|
|
|
|
|
######################################################################################
|
2449
|
|
|
|
|
|
|
# Debug Functions
|
2450
|
|
|
|
|
|
|
######################################################################################
|
2451
|
|
|
|
|
|
|
|
2452
|
|
|
|
|
|
|
sub GetTime
|
2453
|
|
|
|
|
|
|
{
|
2454
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2455
|
|
|
|
|
|
|
my( $sec, $min, $hour ) = localtime();
|
2456
|
|
|
|
|
|
|
|
2457
|
|
|
|
|
|
|
if( $hour < 10 )
|
2458
|
|
|
|
|
|
|
{
|
2459
|
|
|
|
|
|
|
$hour = "0$hour";
|
2460
|
|
|
|
|
|
|
}
|
2461
|
|
|
|
|
|
|
|
2462
|
|
|
|
|
|
|
if( $min < 10 )
|
2463
|
|
|
|
|
|
|
{
|
2464
|
|
|
|
|
|
|
$min = "0$min";
|
2465
|
|
|
|
|
|
|
}
|
2466
|
|
|
|
|
|
|
|
2467
|
|
|
|
|
|
|
if( $sec < 10 )
|
2468
|
|
|
|
|
|
|
{
|
2469
|
|
|
|
|
|
|
$sec = "0$sec";
|
2470
|
|
|
|
|
|
|
}
|
2471
|
|
|
|
|
|
|
|
2472
|
|
|
|
|
|
|
return "$hour:$min:$sec";
|
2473
|
|
|
|
|
|
|
}
|
2474
|
|
|
|
|
|
|
|
2475
|
|
|
|
|
|
|
sub GetDate
|
2476
|
|
|
|
|
|
|
{
|
2477
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2478
|
|
|
|
|
|
|
my ( $sec, $min, $hour, $mday, $mon, $year ) = localtime();
|
2479
|
|
|
|
|
|
|
|
2480
|
|
|
|
|
|
|
$mon += 1;
|
2481
|
|
|
|
|
|
|
$year += 1900;
|
2482
|
|
|
|
|
|
|
|
2483
|
|
|
|
|
|
|
return "$mon/$mday/$year";
|
2484
|
|
|
|
|
|
|
}
|
2485
|
|
|
|
|
|
|
|
2486
|
|
|
|
|
|
|
sub WriteLog
|
2487
|
|
|
|
|
|
|
{
|
2488
|
|
|
|
|
|
|
my ( $self ) = shift;
|
2489
|
|
|
|
|
|
|
my $string = shift;
|
2490
|
|
|
|
|
|
|
my $printNewLine = shift;
|
2491
|
|
|
|
|
|
|
|
2492
|
|
|
|
|
|
|
return if !defined ( $string );
|
2493
|
|
|
|
|
|
|
$printNewLine = 1 if !defined ( $printNewLine );
|
2494
|
|
|
|
|
|
|
|
2495
|
|
|
|
|
|
|
|
2496
|
|
|
|
|
|
|
if( $self->GetDebugLog() )
|
2497
|
|
|
|
|
|
|
{
|
2498
|
|
|
|
|
|
|
if( ref ( $self ) ne "Word2vec::Interface" )
|
2499
|
|
|
|
|
|
|
{
|
2500
|
|
|
|
|
|
|
print( GetDate() . " " . GetTime() . " - interface: Cannot Call WriteLog() From Outside Module!\n" );
|
2501
|
|
|
|
|
|
|
return;
|
2502
|
|
|
|
|
|
|
}
|
2503
|
|
|
|
|
|
|
|
2504
|
|
|
|
|
|
|
$string = "" if !defined ( $string );
|
2505
|
|
|
|
|
|
|
print GetDate() . " " . GetTime() . " - interface::$string";
|
2506
|
|
|
|
|
|
|
print "\n" if( $printNewLine != 0 );
|
2507
|
|
|
|
|
|
|
}
|
2508
|
|
|
|
|
|
|
|
2509
|
|
|
|
|
|
|
if( $self->GetWriteLog() )
|
2510
|
|
|
|
|
|
|
{
|
2511
|
|
|
|
|
|
|
if( ref ( $self ) ne "Word2vec::Interface" )
|
2512
|
|
|
|
|
|
|
{
|
2513
|
|
|
|
|
|
|
print( GetDate() . " " . GetTime() . " - interface: Cannot Call WriteLog() From Outside Module!\n" );
|
2514
|
|
|
|
|
|
|
return;
|
2515
|
|
|
|
|
|
|
}
|
2516
|
|
|
|
|
|
|
|
2517
|
|
|
|
|
|
|
my $fileHandle = $self->GetFileHandle();
|
2518
|
|
|
|
|
|
|
|
2519
|
|
|
|
|
|
|
if( defined( $fileHandle ) )
|
2520
|
|
|
|
|
|
|
{
|
2521
|
|
|
|
|
|
|
print( $fileHandle GetDate() . " " . GetTime() . " - interface::$string" );
|
2522
|
|
|
|
|
|
|
print( $fileHandle "\n" ) if( $printNewLine != 0 );
|
2523
|
|
|
|
|
|
|
}
|
2524
|
|
|
|
|
|
|
}
|
2525
|
|
|
|
|
|
|
}
|
2526
|
|
|
|
|
|
|
|
2527
|
|
|
|
|
|
|
|
2528
|
|
|
|
|
|
|
######################################################################################
|
2529
|
|
|
|
|
|
|
# Utility Module Functions
|
2530
|
|
|
|
|
|
|
######################################################################################
|
2531
|
|
|
|
|
|
|
|
2532
|
|
|
|
|
|
|
sub IsFileOrDirectory
|
2533
|
|
|
|
|
|
|
{
|
2534
|
|
|
|
|
|
|
my ( $self, $path ) = @_;
|
2535
|
|
|
|
|
|
|
return $self->GetUtilHandler()->IsFileOrDirectory( $path );
|
2536
|
|
|
|
|
|
|
}
|
2537
|
|
|
|
|
|
|
|
2538
|
|
|
|
|
|
|
sub GetFilesInDirectory
|
2539
|
|
|
|
|
|
|
{
|
2540
|
|
|
|
|
|
|
my ( $self, $directoryPath, $fileTagStr ) = @_;
|
2541
|
|
|
|
|
|
|
return $self->GetUtilHandler()->GetFilesInDirectory( $directoryPath, $fileTagStr );
|
2542
|
|
|
|
|
|
|
}
|
2543
|
|
|
|
|
|
|
|
2544
|
|
|
|
|
|
|
|
2545
|
|
|
|
|
|
|
######################################################################################
|
2546
|
|
|
|
|
|
|
# Word2Vec Module Functions
|
2547
|
|
|
|
|
|
|
######################################################################################
|
2548
|
|
|
|
|
|
|
|
2549
|
|
|
|
|
|
|
sub W2VExecuteTraining
|
2550
|
|
|
|
|
|
|
{
|
2551
|
|
|
|
|
|
|
my ( $self, $trainFilePath, $outputFilePath, $vectorSize, $windowSize, $minCount,
|
2552
|
|
|
|
|
|
|
$sample, $negative, $alpha, $hs, $binary, $numOfThreads, $iterations,
|
2553
|
|
|
|
|
|
|
$useCBOW, $classes, $readVocab, $saveVocab, $debug, $overwrite ) = @_;
|
2554
|
|
|
|
|
|
|
|
2555
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->ExecuteTraining( $trainFilePath, $outputFilePath, $vectorSize, $windowSize,
|
2556
|
|
|
|
|
|
|
$minCount, $sample, $negative, $alpha, $hs, $binary,
|
2557
|
|
|
|
|
|
|
$numOfThreads, $iterations, $useCBOW, $classes, $readVocab,
|
2558
|
|
|
|
|
|
|
$saveVocab, $debug, $overwrite );
|
2559
|
|
|
|
|
|
|
}
|
2560
|
|
|
|
|
|
|
|
2561
|
|
|
|
|
|
|
sub W2VExecuteStringTraining
|
2562
|
|
|
|
|
|
|
{
|
2563
|
|
|
|
|
|
|
my ( $self, $trainingStr, $outputFilePath, $vectorSize, $windowSize, $minCount,
|
2564
|
|
|
|
|
|
|
$sample, $negative, $alpha, $hs, $binary, $numOfThreads, $iterations,
|
2565
|
|
|
|
|
|
|
$useCBOW, $classes, $readVocab, $saveVocab, $debug, $overwrite ) = @_;
|
2566
|
|
|
|
|
|
|
|
2567
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->ExecuteStringTraining( $trainingStr, $outputFilePath, $vectorSize, $windowSize,
|
2568
|
|
|
|
|
|
|
$minCount, $sample, $negative, $alpha, $hs, $binary,
|
2569
|
|
|
|
|
|
|
$numOfThreads, $iterations, $useCBOW, $classes, $readVocab,
|
2570
|
|
|
|
|
|
|
$saveVocab, $debug, $overwrite );
|
2571
|
|
|
|
|
|
|
}
|
2572
|
|
|
|
|
|
|
|
2573
|
|
|
|
|
|
|
sub W2VComputeCosineSimilarity
|
2574
|
|
|
|
|
|
|
{
|
2575
|
|
|
|
|
|
|
my ( $self, $wordA, $wordB ) = @_;
|
2576
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->ComputeCosineSimilarity( $wordA, $wordB );
|
2577
|
|
|
|
|
|
|
}
|
2578
|
|
|
|
|
|
|
|
2579
|
|
|
|
|
|
|
sub W2VComputeAvgOfWordsCosineSimilarity
|
2580
|
|
|
|
|
|
|
{
|
2581
|
|
|
|
|
|
|
my ( $self, $avgStrA, $avgStrB ) = @_;
|
2582
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->ComputeAvgOfWordsCosineSimilarity( $avgStrA, $avgStrB );
|
2583
|
|
|
|
|
|
|
}
|
2584
|
|
|
|
|
|
|
|
2585
|
|
|
|
|
|
|
sub W2VComputeMultiWordCosineSimilarity
|
2586
|
|
|
|
|
|
|
{
|
2587
|
|
|
|
|
|
|
my ( $self, $wordA, $wordB ) = @_;
|
2588
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->ComputeMultiWordCosineSimilarity( $wordA, $wordB );
|
2589
|
|
|
|
|
|
|
}
|
2590
|
|
|
|
|
|
|
|
2591
|
|
|
|
|
|
|
sub W2VComputeCosineSimilarityOfWordVectors
|
2592
|
|
|
|
|
|
|
{
|
2593
|
|
|
|
|
|
|
my( $self, $wordAData, $wordBData ) = @_;
|
2594
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->ComputeCosineSimilarityOfWordVectors( $wordAData, $wordBData );
|
2595
|
|
|
|
|
|
|
}
|
2596
|
|
|
|
|
|
|
|
2597
|
|
|
|
|
|
|
sub W2VCosSimWithUserInput
|
2598
|
|
|
|
|
|
|
{
|
2599
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2600
|
|
|
|
|
|
|
$self->GetWord2VecHandler()->CosSimWithUserInput();
|
2601
|
|
|
|
|
|
|
}
|
2602
|
|
|
|
|
|
|
|
2603
|
|
|
|
|
|
|
sub W2VMultiWordCosSimWithUserInput
|
2604
|
|
|
|
|
|
|
{
|
2605
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2606
|
|
|
|
|
|
|
$self->GetWord2VecHandler()->MultiWordCosSimWithUserInput();
|
2607
|
|
|
|
|
|
|
}
|
2608
|
|
|
|
|
|
|
|
2609
|
|
|
|
|
|
|
sub W2VComputeAverageOfWords
|
2610
|
|
|
|
|
|
|
{
|
2611
|
|
|
|
|
|
|
my ( $self, $wordAryRef ) = @_;
|
2612
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->ComputeAverageOfWords( $wordAryRef );
|
2613
|
|
|
|
|
|
|
}
|
2614
|
|
|
|
|
|
|
|
2615
|
|
|
|
|
|
|
sub W2VAddTwoWords
|
2616
|
|
|
|
|
|
|
{
|
2617
|
|
|
|
|
|
|
my ( $self, $wordA, $wordB ) = @_;
|
2618
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->AddTwoWords( $wordA, $wordB );
|
2619
|
|
|
|
|
|
|
}
|
2620
|
|
|
|
|
|
|
|
2621
|
|
|
|
|
|
|
sub W2VSubtractTwoWords
|
2622
|
|
|
|
|
|
|
{
|
2623
|
|
|
|
|
|
|
my ( $self, $wordA, $wordB ) = @_;
|
2624
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SubtractTwoWords( $wordA, $wordB );
|
2625
|
|
|
|
|
|
|
}
|
2626
|
|
|
|
|
|
|
|
2627
|
|
|
|
|
|
|
sub W2VAddTwoWordVectors
|
2628
|
|
|
|
|
|
|
{
|
2629
|
|
|
|
|
|
|
my ( $self, $wordA, $wordB ) = @_;
|
2630
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->AddTwoWordVectors( $wordA, $wordB );
|
2631
|
|
|
|
|
|
|
}
|
2632
|
|
|
|
|
|
|
|
2633
|
|
|
|
|
|
|
sub W2VSubtractTwoWordVectors
|
2634
|
|
|
|
|
|
|
{
|
2635
|
|
|
|
|
|
|
my ( $self, $wordA, $wordB ) = @_;
|
2636
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SubtractTwoWordVectors( $wordA, $wordB );
|
2637
|
|
|
|
|
|
|
}
|
2638
|
|
|
|
|
|
|
|
2639
|
|
|
|
|
|
|
sub W2VAverageOfTwoWordVectors
|
2640
|
|
|
|
|
|
|
{
|
2641
|
|
|
|
|
|
|
my ( $self, $wordA, $wordB ) = @_;
|
2642
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->W2VAverageOfTwoWordVectors( $wordA, $wordB );
|
2643
|
|
|
|
|
|
|
}
|
2644
|
|
|
|
|
|
|
|
2645
|
|
|
|
|
|
|
sub W2VGetWordVector
|
2646
|
|
|
|
|
|
|
{
|
2647
|
|
|
|
|
|
|
my ( $self, $searchWord ) = @_;
|
2648
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetWordVector( $searchWord );
|
2649
|
|
|
|
|
|
|
}
|
2650
|
|
|
|
|
|
|
|
2651
|
|
|
|
|
|
|
sub W2VIsVectorDataInMemory
|
2652
|
|
|
|
|
|
|
{
|
2653
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2654
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->IsVectorDataInMemory();
|
2655
|
|
|
|
|
|
|
}
|
2656
|
|
|
|
|
|
|
|
2657
|
|
|
|
|
|
|
sub W2VIsVectorDataSorted
|
2658
|
|
|
|
|
|
|
{
|
2659
|
|
|
|
|
|
|
my ( $self, $aryRef ) = @_;
|
2660
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->IsVectorDataSorted( $aryRef );
|
2661
|
|
|
|
|
|
|
}
|
2662
|
|
|
|
|
|
|
|
2663
|
|
|
|
|
|
|
sub W2VCheckWord2VecDataFileType
|
2664
|
|
|
|
|
|
|
{
|
2665
|
|
|
|
|
|
|
my ( $self, $fileDir ) = @_;
|
2666
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->CheckWord2VecDataFileType( $fileDir );
|
2667
|
|
|
|
|
|
|
}
|
2668
|
|
|
|
|
|
|
|
2669
|
|
|
|
|
|
|
sub W2VReadTrainedVectorDataFromFile
|
2670
|
|
|
|
|
|
|
{
|
2671
|
|
|
|
|
|
|
my ( $self, $fileDir ) = @_;
|
2672
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->ReadTrainedVectorDataFromFile( $fileDir );
|
2673
|
|
|
|
|
|
|
}
|
2674
|
|
|
|
|
|
|
|
2675
|
|
|
|
|
|
|
sub W2VSaveTrainedVectorDataToFile
|
2676
|
|
|
|
|
|
|
{
|
2677
|
|
|
|
|
|
|
my ( $self, $filename, $saveFormat ) = @_;
|
2678
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SaveTrainedVectorDataToFile( $filename, $saveFormat );
|
2679
|
|
|
|
|
|
|
}
|
2680
|
|
|
|
|
|
|
|
2681
|
|
|
|
|
|
|
sub W2VStringsAreEqual
|
2682
|
|
|
|
|
|
|
{
|
2683
|
|
|
|
|
|
|
my ( $self, $strA, $strB ) = @_;
|
2684
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->StringsAreEqual( $strA, $strB );
|
2685
|
|
|
|
|
|
|
}
|
2686
|
|
|
|
|
|
|
|
2687
|
|
|
|
|
|
|
sub W2VConvertRawSparseTextToVectorDataAry
|
2688
|
|
|
|
|
|
|
{
|
2689
|
|
|
|
|
|
|
my ( $self, $strData ) = @_;
|
2690
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->ConvertRawSparseTextToVectorDataAry( $strData );
|
2691
|
|
|
|
|
|
|
}
|
2692
|
|
|
|
|
|
|
|
2693
|
|
|
|
|
|
|
sub W2VConvertRawSparseTextToVectorDataHash
|
2694
|
|
|
|
|
|
|
{
|
2695
|
|
|
|
|
|
|
my ( $self, $strData ) = @_;
|
2696
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->ConvertRawSparseTextToVectorDataHash( $strData );
|
2697
|
|
|
|
|
|
|
}
|
2698
|
|
|
|
|
|
|
|
2699
|
|
|
|
|
|
|
sub W2VGetDebugLog
|
2700
|
|
|
|
|
|
|
{
|
2701
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2702
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetDebugLog();
|
2703
|
|
|
|
|
|
|
}
|
2704
|
|
|
|
|
|
|
|
2705
|
|
|
|
|
|
|
sub W2VGetWriteLog
|
2706
|
|
|
|
|
|
|
{
|
2707
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2708
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetWriteLog();
|
2709
|
|
|
|
|
|
|
}
|
2710
|
|
|
|
|
|
|
|
2711
|
|
|
|
|
|
|
sub W2VGetFileHandle
|
2712
|
|
|
|
|
|
|
{
|
2713
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2714
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetFileHandle();
|
2715
|
|
|
|
|
|
|
}
|
2716
|
|
|
|
|
|
|
|
2717
|
|
|
|
|
|
|
sub W2VGetTrainFilePath
|
2718
|
|
|
|
|
|
|
{
|
2719
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2720
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetTrainFilePath();
|
2721
|
|
|
|
|
|
|
}
|
2722
|
|
|
|
|
|
|
|
2723
|
|
|
|
|
|
|
sub W2VGetOutputFilePath
|
2724
|
|
|
|
|
|
|
{
|
2725
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2726
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetOutputFilePath();
|
2727
|
|
|
|
|
|
|
}
|
2728
|
|
|
|
|
|
|
|
2729
|
|
|
|
|
|
|
sub W2VGetWordVecSize
|
2730
|
|
|
|
|
|
|
{
|
2731
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2732
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetWordVecSize();
|
2733
|
|
|
|
|
|
|
}
|
2734
|
|
|
|
|
|
|
|
2735
|
|
|
|
|
|
|
sub W2VGetWindowSize
|
2736
|
|
|
|
|
|
|
{
|
2737
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2738
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetWindowSize();
|
2739
|
|
|
|
|
|
|
}
|
2740
|
|
|
|
|
|
|
|
2741
|
|
|
|
|
|
|
sub W2VGetSample
|
2742
|
|
|
|
|
|
|
{
|
2743
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2744
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetSample();
|
2745
|
|
|
|
|
|
|
}
|
2746
|
|
|
|
|
|
|
|
2747
|
|
|
|
|
|
|
sub W2VGetHSoftMax
|
2748
|
|
|
|
|
|
|
{
|
2749
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2750
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetHSoftMax();
|
2751
|
|
|
|
|
|
|
}
|
2752
|
|
|
|
|
|
|
|
2753
|
|
|
|
|
|
|
sub W2VGetNegative
|
2754
|
|
|
|
|
|
|
{
|
2755
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2756
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetNegative();
|
2757
|
|
|
|
|
|
|
}
|
2758
|
|
|
|
|
|
|
|
2759
|
|
|
|
|
|
|
sub W2VGetNumOfThreads
|
2760
|
|
|
|
|
|
|
{
|
2761
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2762
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetNumOfThreads();
|
2763
|
|
|
|
|
|
|
}
|
2764
|
|
|
|
|
|
|
|
2765
|
|
|
|
|
|
|
sub W2VGetNumOfIterations
|
2766
|
|
|
|
|
|
|
{
|
2767
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2768
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetNumOfIterations();
|
2769
|
|
|
|
|
|
|
}
|
2770
|
|
|
|
|
|
|
|
2771
|
|
|
|
|
|
|
sub W2VGetMinCount
|
2772
|
|
|
|
|
|
|
{
|
2773
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2774
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetMinCount();
|
2775
|
|
|
|
|
|
|
}
|
2776
|
|
|
|
|
|
|
|
2777
|
|
|
|
|
|
|
sub W2VGetAlpha
|
2778
|
|
|
|
|
|
|
{
|
2779
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2780
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetAlpha();
|
2781
|
|
|
|
|
|
|
}
|
2782
|
|
|
|
|
|
|
|
2783
|
|
|
|
|
|
|
sub W2VGetClasses
|
2784
|
|
|
|
|
|
|
{
|
2785
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2786
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetClasses();
|
2787
|
|
|
|
|
|
|
}
|
2788
|
|
|
|
|
|
|
|
2789
|
|
|
|
|
|
|
sub W2VGetDebugTraining
|
2790
|
|
|
|
|
|
|
{
|
2791
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2792
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetDebugTraining();
|
2793
|
|
|
|
|
|
|
}
|
2794
|
|
|
|
|
|
|
|
2795
|
|
|
|
|
|
|
sub W2VGetBinaryOutput
|
2796
|
|
|
|
|
|
|
{
|
2797
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2798
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetBinaryOutput();
|
2799
|
|
|
|
|
|
|
}
|
2800
|
|
|
|
|
|
|
|
2801
|
|
|
|
|
|
|
sub W2VGetSaveVocabFilePath
|
2802
|
|
|
|
|
|
|
{
|
2803
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2804
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetSaveVocabFilePath();
|
2805
|
|
|
|
|
|
|
}
|
2806
|
|
|
|
|
|
|
|
2807
|
|
|
|
|
|
|
sub W2VGetReadVocabFilePath
|
2808
|
|
|
|
|
|
|
{
|
2809
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2810
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetReadVocabFilePath();
|
2811
|
|
|
|
|
|
|
}
|
2812
|
|
|
|
|
|
|
|
2813
|
|
|
|
|
|
|
sub W2VGetUseCBOW
|
2814
|
|
|
|
|
|
|
{
|
2815
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2816
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetUseCBOW();
|
2817
|
|
|
|
|
|
|
}
|
2818
|
|
|
|
|
|
|
|
2819
|
|
|
|
|
|
|
sub W2VGetWorkingDir
|
2820
|
|
|
|
|
|
|
{
|
2821
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2822
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetWorkingDir();
|
2823
|
|
|
|
|
|
|
}
|
2824
|
|
|
|
|
|
|
|
2825
|
|
|
|
|
|
|
sub W2VGetWord2VecExeDir
|
2826
|
|
|
|
|
|
|
{
|
2827
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2828
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetWord2VecExeDir();
|
2829
|
|
|
|
|
|
|
}
|
2830
|
|
|
|
|
|
|
|
2831
|
|
|
|
|
|
|
sub W2VGetVocabularyHash
|
2832
|
|
|
|
|
|
|
{
|
2833
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2834
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetVocabularyHash();
|
2835
|
|
|
|
|
|
|
}
|
2836
|
|
|
|
|
|
|
|
2837
|
|
|
|
|
|
|
sub W2VGetOverwriteOldFile
|
2838
|
|
|
|
|
|
|
{
|
2839
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2840
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetOverwriteOldFile();
|
2841
|
|
|
|
|
|
|
}
|
2842
|
|
|
|
|
|
|
|
2843
|
|
|
|
|
|
|
sub W2VGetSparseVectorMode
|
2844
|
|
|
|
|
|
|
{
|
2845
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2846
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetSparseVectorMode();
|
2847
|
|
|
|
|
|
|
}
|
2848
|
|
|
|
|
|
|
|
2849
|
|
|
|
|
|
|
sub W2VGetVectorLength
|
2850
|
|
|
|
|
|
|
{
|
2851
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2852
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetVectorLength();
|
2853
|
|
|
|
|
|
|
}
|
2854
|
|
|
|
|
|
|
|
2855
|
|
|
|
|
|
|
sub W2VGetNumberOfWords
|
2856
|
|
|
|
|
|
|
{
|
2857
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2858
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetNumberOfWords();
|
2859
|
|
|
|
|
|
|
}
|
2860
|
|
|
|
|
|
|
|
2861
|
|
|
|
|
|
|
sub W2VGetMinimizeMemoryUsage
|
2862
|
|
|
|
|
|
|
{
|
2863
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2864
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->GetMinimizeMemoryUsage();
|
2865
|
|
|
|
|
|
|
}
|
2866
|
|
|
|
|
|
|
|
2867
|
|
|
|
|
|
|
sub W2VSetTrainFilePath
|
2868
|
|
|
|
|
|
|
{
|
2869
|
|
|
|
|
|
|
my ( $self, $str ) = @_;
|
2870
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SetTrainFilePath( $str );
|
2871
|
|
|
|
|
|
|
}
|
2872
|
|
|
|
|
|
|
|
2873
|
|
|
|
|
|
|
sub W2VSetOutputFilePath
|
2874
|
|
|
|
|
|
|
{
|
2875
|
|
|
|
|
|
|
my ( $self, $str ) = @_;
|
2876
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SetOutputFilePath( $str );
|
2877
|
|
|
|
|
|
|
}
|
2878
|
|
|
|
|
|
|
|
2879
|
|
|
|
|
|
|
sub W2VSetWordVecSize
|
2880
|
|
|
|
|
|
|
{
|
2881
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
2882
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SetWordVecSize( $value );
|
2883
|
|
|
|
|
|
|
}
|
2884
|
|
|
|
|
|
|
|
2885
|
|
|
|
|
|
|
sub W2VSetWindowSize
|
2886
|
|
|
|
|
|
|
{
|
2887
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
2888
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SetWindowSize( $value );
|
2889
|
|
|
|
|
|
|
}
|
2890
|
|
|
|
|
|
|
|
2891
|
|
|
|
|
|
|
sub W2VSetSample
|
2892
|
|
|
|
|
|
|
{
|
2893
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
2894
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SetSample( $value );
|
2895
|
|
|
|
|
|
|
}
|
2896
|
|
|
|
|
|
|
|
2897
|
|
|
|
|
|
|
sub W2VSetHSoftMax
|
2898
|
|
|
|
|
|
|
{
|
2899
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
2900
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SetHSoftMax( $value );
|
2901
|
|
|
|
|
|
|
}
|
2902
|
|
|
|
|
|
|
|
2903
|
|
|
|
|
|
|
sub W2VSetNegative
|
2904
|
|
|
|
|
|
|
{
|
2905
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
2906
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SetNegative( $value );
|
2907
|
|
|
|
|
|
|
}
|
2908
|
|
|
|
|
|
|
|
2909
|
|
|
|
|
|
|
sub W2VSetNumOfThreads
|
2910
|
|
|
|
|
|
|
{
|
2911
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
2912
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SetNumOfThreads( $value );
|
2913
|
|
|
|
|
|
|
}
|
2914
|
|
|
|
|
|
|
|
2915
|
|
|
|
|
|
|
sub W2VSetNumOfIterations
|
2916
|
|
|
|
|
|
|
{
|
2917
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
2918
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SetNumOfIterations( $value );
|
2919
|
|
|
|
|
|
|
}
|
2920
|
|
|
|
|
|
|
|
2921
|
|
|
|
|
|
|
sub W2VSetMinCount
|
2922
|
|
|
|
|
|
|
{
|
2923
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
2924
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SetMinCount( $value );
|
2925
|
|
|
|
|
|
|
}
|
2926
|
|
|
|
|
|
|
|
2927
|
|
|
|
|
|
|
sub W2VSetAlpha
|
2928
|
|
|
|
|
|
|
{
|
2929
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
2930
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SetAlpha( $value );
|
2931
|
|
|
|
|
|
|
}
|
2932
|
|
|
|
|
|
|
|
2933
|
|
|
|
|
|
|
sub W2VSetClasses
|
2934
|
|
|
|
|
|
|
{
|
2935
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
2936
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SetClasses( $value );
|
2937
|
|
|
|
|
|
|
}
|
2938
|
|
|
|
|
|
|
|
2939
|
|
|
|
|
|
|
sub W2VSetDebugTraining
|
2940
|
|
|
|
|
|
|
{
|
2941
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
2942
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SetDebugTraining( $value );
|
2943
|
|
|
|
|
|
|
}
|
2944
|
|
|
|
|
|
|
|
2945
|
|
|
|
|
|
|
sub W2VSetBinaryOutput
|
2946
|
|
|
|
|
|
|
{
|
2947
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
2948
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SetBinaryOutput( $value );
|
2949
|
|
|
|
|
|
|
}
|
2950
|
|
|
|
|
|
|
|
2951
|
|
|
|
|
|
|
sub W2VSetSaveVocabFilePath
|
2952
|
|
|
|
|
|
|
{
|
2953
|
|
|
|
|
|
|
my ( $self, $str ) = @_;
|
2954
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SetSaveVocabFilePath( $str );
|
2955
|
|
|
|
|
|
|
}
|
2956
|
|
|
|
|
|
|
|
2957
|
|
|
|
|
|
|
sub W2VSetReadVocabFilePath
|
2958
|
|
|
|
|
|
|
{
|
2959
|
|
|
|
|
|
|
my ( $self, $str ) = @_;
|
2960
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SetReadVocabFilePath( $str );
|
2961
|
|
|
|
|
|
|
}
|
2962
|
|
|
|
|
|
|
|
2963
|
|
|
|
|
|
|
sub W2VSetUseCBOW
|
2964
|
|
|
|
|
|
|
{
|
2965
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
2966
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SetUseCBOW( $value );
|
2967
|
|
|
|
|
|
|
}
|
2968
|
|
|
|
|
|
|
|
2969
|
|
|
|
|
|
|
sub W2VSetWorkingDir
|
2970
|
|
|
|
|
|
|
{
|
2971
|
|
|
|
|
|
|
my ( $self, $dir ) = @_;
|
2972
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SetWorkingDir( $dir );
|
2973
|
|
|
|
|
|
|
}
|
2974
|
|
|
|
|
|
|
|
2975
|
|
|
|
|
|
|
sub W2VSetWord2VecExeDir
|
2976
|
|
|
|
|
|
|
{
|
2977
|
|
|
|
|
|
|
my ( $self, $dir ) = @_;
|
2978
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SetWord2VecExeDir( $dir );
|
2979
|
|
|
|
|
|
|
}
|
2980
|
|
|
|
|
|
|
|
2981
|
|
|
|
|
|
|
sub W2VSetVocabularyHash
|
2982
|
|
|
|
|
|
|
{
|
2983
|
|
|
|
|
|
|
my ( $self, $ref ) = @_;
|
2984
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SetVocabularyHash( $ref );
|
2985
|
|
|
|
|
|
|
}
|
2986
|
|
|
|
|
|
|
|
2987
|
|
|
|
|
|
|
sub W2VClearVocabularyHash
|
2988
|
|
|
|
|
|
|
{
|
2989
|
|
|
|
|
|
|
my ( $self ) = @_;
|
2990
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->ClearVocabularyHash();
|
2991
|
|
|
|
|
|
|
}
|
2992
|
|
|
|
|
|
|
|
2993
|
|
|
|
|
|
|
sub W2VAddWordVectorToVocabHash
|
2994
|
|
|
|
|
|
|
{
|
2995
|
|
|
|
|
|
|
my ( $self, $wordVectorStr ) = @_;
|
2996
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->AddWordVectorToVocabHash( $wordVectorStr );
|
2997
|
|
|
|
|
|
|
}
|
2998
|
|
|
|
|
|
|
|
2999
|
|
|
|
|
|
|
sub W2VSetOverwriteOldFile
|
3000
|
|
|
|
|
|
|
{
|
3001
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
3002
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SetOverwriteOldFile( $value );
|
3003
|
|
|
|
|
|
|
}
|
3004
|
|
|
|
|
|
|
|
3005
|
|
|
|
|
|
|
sub W2VSetSparseVectorMode
|
3006
|
|
|
|
|
|
|
{
|
3007
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
3008
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SetSparseVectorMode( $value );
|
3009
|
|
|
|
|
|
|
}
|
3010
|
|
|
|
|
|
|
|
3011
|
|
|
|
|
|
|
sub W2VSetVectorLength
|
3012
|
|
|
|
|
|
|
{
|
3013
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
3014
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SetVectorLength( $value );
|
3015
|
|
|
|
|
|
|
}
|
3016
|
|
|
|
|
|
|
|
3017
|
|
|
|
|
|
|
sub W2VSetNumberOfWords
|
3018
|
|
|
|
|
|
|
{
|
3019
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
3020
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SetNumberOfWords( $value );
|
3021
|
|
|
|
|
|
|
}
|
3022
|
|
|
|
|
|
|
|
3023
|
|
|
|
|
|
|
sub W2VSetMinimizeMemoryUsage
|
3024
|
|
|
|
|
|
|
{
|
3025
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
3026
|
|
|
|
|
|
|
return $self->GetWord2VecHandler()->SetMinimizeMemoryUsage( $value );
|
3027
|
|
|
|
|
|
|
}
|
3028
|
|
|
|
|
|
|
|
3029
|
|
|
|
|
|
|
######################################################################################
|
3030
|
|
|
|
|
|
|
# Word2Phrase Module Functions
|
3031
|
|
|
|
|
|
|
######################################################################################
|
3032
|
|
|
|
|
|
|
|
3033
|
|
|
|
|
|
|
sub W2PExecuteTraining
|
3034
|
|
|
|
|
|
|
{
|
3035
|
|
|
|
|
|
|
my( $self, $trainFilePath, $outputFilePath, $minCount, $threshold, $debug, $overwrite ) = @_;
|
3036
|
|
|
|
|
|
|
return $self->GetWord2PhraseHandler()->ExecuteTraining( $trainFilePath, $outputFilePath, $minCount, $threshold, $debug, $overwrite );
|
3037
|
|
|
|
|
|
|
}
|
3038
|
|
|
|
|
|
|
|
3039
|
|
|
|
|
|
|
sub W2PExecuteStringTraining
|
3040
|
|
|
|
|
|
|
{
|
3041
|
|
|
|
|
|
|
my( $self, $trainingStr, $outputFilePath, $minCount, $threshold, $debug, $overwrite ) = @_;
|
3042
|
|
|
|
|
|
|
return $self->GetWord2PhraseHandler()->ExecuteStringTraining( $trainingStr, $outputFilePath, $minCount, $threshold, $debug, $overwrite );
|
3043
|
|
|
|
|
|
|
}
|
3044
|
|
|
|
|
|
|
|
3045
|
|
|
|
|
|
|
sub W2PGetDebugLog
|
3046
|
|
|
|
|
|
|
{
|
3047
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3048
|
|
|
|
|
|
|
return $self->GetWord2PhraseHandler()->GetDebugLog();
|
3049
|
|
|
|
|
|
|
}
|
3050
|
|
|
|
|
|
|
|
3051
|
|
|
|
|
|
|
sub W2PGetWriteLog
|
3052
|
|
|
|
|
|
|
{
|
3053
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3054
|
|
|
|
|
|
|
return $self->GetWord2PhraseHandler()->GetWriteLog();
|
3055
|
|
|
|
|
|
|
}
|
3056
|
|
|
|
|
|
|
|
3057
|
|
|
|
|
|
|
sub W2PGetFileHandle
|
3058
|
|
|
|
|
|
|
{
|
3059
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3060
|
|
|
|
|
|
|
return $self->GetWord2PhraseHandler()->GetFileHandle();
|
3061
|
|
|
|
|
|
|
}
|
3062
|
|
|
|
|
|
|
|
3063
|
|
|
|
|
|
|
sub W2PGetTrainFilePath
|
3064
|
|
|
|
|
|
|
{
|
3065
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3066
|
|
|
|
|
|
|
return $self->GetWord2PhraseHandler()->GetTrainFilePath()
|
3067
|
|
|
|
|
|
|
}
|
3068
|
|
|
|
|
|
|
|
3069
|
|
|
|
|
|
|
sub W2PGetOutputFilePath
|
3070
|
|
|
|
|
|
|
{
|
3071
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3072
|
|
|
|
|
|
|
return $self->GetWord2PhraseHandler()->GetOutputFilePath();
|
3073
|
|
|
|
|
|
|
}
|
3074
|
|
|
|
|
|
|
|
3075
|
|
|
|
|
|
|
sub W2PGetMinCount
|
3076
|
|
|
|
|
|
|
{
|
3077
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3078
|
|
|
|
|
|
|
return $self->GetWord2PhraseHandler()->GetMinCount();
|
3079
|
|
|
|
|
|
|
}
|
3080
|
|
|
|
|
|
|
|
3081
|
|
|
|
|
|
|
sub W2PGetThreshold
|
3082
|
|
|
|
|
|
|
{
|
3083
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3084
|
|
|
|
|
|
|
return $self->GetWord2PhraseHandler()->GetThreshold();
|
3085
|
|
|
|
|
|
|
}
|
3086
|
|
|
|
|
|
|
|
3087
|
|
|
|
|
|
|
sub W2PGetW2PDebug
|
3088
|
|
|
|
|
|
|
{
|
3089
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3090
|
|
|
|
|
|
|
return $self->GetWord2PhraseHandler()->GetW2PDebug();
|
3091
|
|
|
|
|
|
|
}
|
3092
|
|
|
|
|
|
|
|
3093
|
|
|
|
|
|
|
sub W2PGetWorkingDir
|
3094
|
|
|
|
|
|
|
{
|
3095
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3096
|
|
|
|
|
|
|
return $self->GetWord2PhraseHandler()->GetWorkingDir();
|
3097
|
|
|
|
|
|
|
}
|
3098
|
|
|
|
|
|
|
|
3099
|
|
|
|
|
|
|
sub W2PGetWord2PhraseExeDir
|
3100
|
|
|
|
|
|
|
{
|
3101
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3102
|
|
|
|
|
|
|
return $self->GetWord2PhraseHandler()->GetWord2PhraseExeDir();
|
3103
|
|
|
|
|
|
|
}
|
3104
|
|
|
|
|
|
|
|
3105
|
|
|
|
|
|
|
sub W2PGetOverwriteOldFile
|
3106
|
|
|
|
|
|
|
{
|
3107
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3108
|
|
|
|
|
|
|
return $self->GetWord2PhraseHandler()->GetOverwriteOldFile();
|
3109
|
|
|
|
|
|
|
}
|
3110
|
|
|
|
|
|
|
|
3111
|
|
|
|
|
|
|
sub W2PSetTrainFilePath
|
3112
|
|
|
|
|
|
|
{
|
3113
|
|
|
|
|
|
|
my ( $self, $dir ) = @_;
|
3114
|
|
|
|
|
|
|
return $self->GetWord2PhraseHandler()->SetTrainFilePath( $dir );
|
3115
|
|
|
|
|
|
|
}
|
3116
|
|
|
|
|
|
|
|
3117
|
|
|
|
|
|
|
sub W2PSetOutputFilePath
|
3118
|
|
|
|
|
|
|
{
|
3119
|
|
|
|
|
|
|
my ( $self, $dir ) = @_;
|
3120
|
|
|
|
|
|
|
return $self->GetWord2PhraseHandler()->SetOutputFilePath( $dir );
|
3121
|
|
|
|
|
|
|
}
|
3122
|
|
|
|
|
|
|
|
3123
|
|
|
|
|
|
|
sub W2PSetMinCount
|
3124
|
|
|
|
|
|
|
{
|
3125
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
3126
|
|
|
|
|
|
|
return $self->GetWord2PhraseHandler()->SetMinCount( $value );
|
3127
|
|
|
|
|
|
|
}
|
3128
|
|
|
|
|
|
|
|
3129
|
|
|
|
|
|
|
sub W2PSetThreshold
|
3130
|
|
|
|
|
|
|
{
|
3131
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
3132
|
|
|
|
|
|
|
return $self->GetWord2PhraseHandler()->SetThreshold( $value );
|
3133
|
|
|
|
|
|
|
}
|
3134
|
|
|
|
|
|
|
|
3135
|
|
|
|
|
|
|
sub W2PSetW2PDebug
|
3136
|
|
|
|
|
|
|
{
|
3137
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
3138
|
|
|
|
|
|
|
return $self->GetWord2PhraseHandler()->SetW2PDebug( $value );
|
3139
|
|
|
|
|
|
|
}
|
3140
|
|
|
|
|
|
|
|
3141
|
|
|
|
|
|
|
sub W2PSetWorkingDir
|
3142
|
|
|
|
|
|
|
{
|
3143
|
|
|
|
|
|
|
my ( $self, $dir ) = @_;
|
3144
|
|
|
|
|
|
|
return $self->GetWord2PhraseHandler()->SetWorkingDir( $dir );
|
3145
|
|
|
|
|
|
|
}
|
3146
|
|
|
|
|
|
|
|
3147
|
|
|
|
|
|
|
sub W2PSetWord2PhraseExeDir
|
3148
|
|
|
|
|
|
|
{
|
3149
|
|
|
|
|
|
|
my ( $self, $dir ) = @_;
|
3150
|
|
|
|
|
|
|
return $self->GetWord2PhraseHandler()->SetWord2PhraseExeDir( $dir );
|
3151
|
|
|
|
|
|
|
}
|
3152
|
|
|
|
|
|
|
|
3153
|
|
|
|
|
|
|
sub W2PSetOverwriteOldFile
|
3154
|
|
|
|
|
|
|
{
|
3155
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
3156
|
|
|
|
|
|
|
return $self->GetWord2PhraseHandler()->SetOverwriteOldFile( $value );
|
3157
|
|
|
|
|
|
|
}
|
3158
|
|
|
|
|
|
|
|
3159
|
|
|
|
|
|
|
|
3160
|
|
|
|
|
|
|
######################################################################################
|
3161
|
|
|
|
|
|
|
# XMLToWW2V Module Functions
|
3162
|
|
|
|
|
|
|
######################################################################################
|
3163
|
|
|
|
|
|
|
|
3164
|
|
|
|
|
|
|
sub XTWConvertMedlineXMLToW2V
|
3165
|
|
|
|
|
|
|
{
|
3166
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3167
|
|
|
|
|
|
|
$self->GetXMLToW2VHandler()->ConvertMedlineXMLToW2V();
|
3168
|
|
|
|
|
|
|
}
|
3169
|
|
|
|
|
|
|
|
3170
|
|
|
|
|
|
|
sub XTWCreateCompoundWordBST
|
3171
|
|
|
|
|
|
|
{
|
3172
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3173
|
|
|
|
|
|
|
$self->GetXMLToW2VHandler()->CreateCompoundWordBST();
|
3174
|
|
|
|
|
|
|
}
|
3175
|
|
|
|
|
|
|
|
3176
|
|
|
|
|
|
|
sub XTWCompoundifyString
|
3177
|
|
|
|
|
|
|
{
|
3178
|
|
|
|
|
|
|
my ( $self, $str ) = @_;
|
3179
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->CompoundifyString( $str );
|
3180
|
|
|
|
|
|
|
}
|
3181
|
|
|
|
|
|
|
|
3182
|
|
|
|
|
|
|
sub XTWReadCompoundWordDataFromFile
|
3183
|
|
|
|
|
|
|
{
|
3184
|
|
|
|
|
|
|
my ( $self, $fileDir, $autoSetMaxCompoundWordLength ) = @_;
|
3185
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->ReadCompoundWordDataFromFile( $fileDir, $autoSetMaxCompoundWordLength );
|
3186
|
|
|
|
|
|
|
}
|
3187
|
|
|
|
|
|
|
|
3188
|
|
|
|
|
|
|
sub XTWSaveCompoundWordListToFile
|
3189
|
|
|
|
|
|
|
{
|
3190
|
|
|
|
|
|
|
my ( $self, $savePath ) = @_;
|
3191
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->SaveCompoundWordListToFile( $savePath );
|
3192
|
|
|
|
|
|
|
}
|
3193
|
|
|
|
|
|
|
|
3194
|
|
|
|
|
|
|
sub XTWReadTextFromFile
|
3195
|
|
|
|
|
|
|
{
|
3196
|
|
|
|
|
|
|
my ( $self, $fileDir ) = @_;
|
3197
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->ReadTextFromFile( $fileDir );
|
3198
|
|
|
|
|
|
|
}
|
3199
|
|
|
|
|
|
|
|
3200
|
|
|
|
|
|
|
sub XTWSaveTextToFile
|
3201
|
|
|
|
|
|
|
{
|
3202
|
|
|
|
|
|
|
my ( $self, $fileName, $str ) = @_;
|
3203
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->SaveTextToFile( $fileName, $str );
|
3204
|
|
|
|
|
|
|
}
|
3205
|
|
|
|
|
|
|
|
3206
|
|
|
|
|
|
|
sub XTWReadXMLDataFromFile
|
3207
|
|
|
|
|
|
|
{
|
3208
|
|
|
|
|
|
|
my ( $self, $fileDir ) = @_;
|
3209
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->_ReadXMLDataFromFile( $fileDir );
|
3210
|
|
|
|
|
|
|
}
|
3211
|
|
|
|
|
|
|
|
3212
|
|
|
|
|
|
|
sub XTWSaveTextCorpusToFile
|
3213
|
|
|
|
|
|
|
{
|
3214
|
|
|
|
|
|
|
my ( $self, $saveDir, $appendToFile ) = @_;
|
3215
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->_SaveTextCorpusToFile( $saveDir, $appendToFile );
|
3216
|
|
|
|
|
|
|
}
|
3217
|
|
|
|
|
|
|
|
3218
|
|
|
|
|
|
|
sub XTWIsDateInSpecifiedRange
|
3219
|
|
|
|
|
|
|
{
|
3220
|
|
|
|
|
|
|
my ( $self, $date, $beginDate, $endDate ) = @_;
|
3221
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->IsDateInSpecifiedRange( $date, $beginDate, $endDate );
|
3222
|
|
|
|
|
|
|
}
|
3223
|
|
|
|
|
|
|
|
3224
|
|
|
|
|
|
|
sub XTWIsFileOrDirectory
|
3225
|
|
|
|
|
|
|
{
|
3226
|
|
|
|
|
|
|
my ( $self, $path ) = @_;
|
3227
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->IsFileOrDirectory( $path );
|
3228
|
|
|
|
|
|
|
}
|
3229
|
|
|
|
|
|
|
|
3230
|
|
|
|
|
|
|
sub XTWRemoveSpecialCharactersFromString
|
3231
|
|
|
|
|
|
|
{
|
3232
|
|
|
|
|
|
|
my ( $self, $str ) = @_;
|
3233
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->RemoveSpecialCharactersFromString( $str );
|
3234
|
|
|
|
|
|
|
}
|
3235
|
|
|
|
|
|
|
|
3236
|
|
|
|
|
|
|
sub XTWGetFileType
|
3237
|
|
|
|
|
|
|
{
|
3238
|
|
|
|
|
|
|
my ( $self, $filePath ) = @_;
|
3239
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->GetFileType( $filePath );
|
3240
|
|
|
|
|
|
|
}
|
3241
|
|
|
|
|
|
|
|
3242
|
|
|
|
|
|
|
sub XTWDateCheck
|
3243
|
|
|
|
|
|
|
{
|
3244
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3245
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->_DateCheck();
|
3246
|
|
|
|
|
|
|
}
|
3247
|
|
|
|
|
|
|
|
3248
|
|
|
|
|
|
|
sub XTWGetDebugLog
|
3249
|
|
|
|
|
|
|
{
|
3250
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3251
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->GetDebugLog();
|
3252
|
|
|
|
|
|
|
}
|
3253
|
|
|
|
|
|
|
|
3254
|
|
|
|
|
|
|
sub XTWGetWriteLog
|
3255
|
|
|
|
|
|
|
{
|
3256
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3257
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->GetWriteLog();
|
3258
|
|
|
|
|
|
|
}
|
3259
|
|
|
|
|
|
|
|
3260
|
|
|
|
|
|
|
sub XTWGetStoreTitle
|
3261
|
|
|
|
|
|
|
{
|
3262
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3263
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->GetStoreTitle();
|
3264
|
|
|
|
|
|
|
}
|
3265
|
|
|
|
|
|
|
|
3266
|
|
|
|
|
|
|
sub XTWGetStoreAbstract
|
3267
|
|
|
|
|
|
|
{
|
3268
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3269
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->GetStoreAbstract();
|
3270
|
|
|
|
|
|
|
}
|
3271
|
|
|
|
|
|
|
|
3272
|
|
|
|
|
|
|
sub XTWGetQuickParse
|
3273
|
|
|
|
|
|
|
{
|
3274
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3275
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->GetQuickParse();
|
3276
|
|
|
|
|
|
|
}
|
3277
|
|
|
|
|
|
|
|
3278
|
|
|
|
|
|
|
sub XTWGetCompoundifyText
|
3279
|
|
|
|
|
|
|
{
|
3280
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3281
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->GetCompoundifyText();
|
3282
|
|
|
|
|
|
|
}
|
3283
|
|
|
|
|
|
|
|
3284
|
|
|
|
|
|
|
sub XTWGetNumOfThreads
|
3285
|
|
|
|
|
|
|
{
|
3286
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3287
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->GetNumOfThreads();
|
3288
|
|
|
|
|
|
|
}
|
3289
|
|
|
|
|
|
|
|
3290
|
|
|
|
|
|
|
sub XTWGetWorkingDir
|
3291
|
|
|
|
|
|
|
{
|
3292
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3293
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->GetWorkingDir();
|
3294
|
|
|
|
|
|
|
}
|
3295
|
|
|
|
|
|
|
|
3296
|
|
|
|
|
|
|
sub XTWGetSaveDir
|
3297
|
|
|
|
|
|
|
{
|
3298
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3299
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->GetSaveDir();
|
3300
|
|
|
|
|
|
|
}
|
3301
|
|
|
|
|
|
|
|
3302
|
|
|
|
|
|
|
sub XTWGetBeginDate
|
3303
|
|
|
|
|
|
|
{
|
3304
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3305
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->GetBeginDate();
|
3306
|
|
|
|
|
|
|
}
|
3307
|
|
|
|
|
|
|
|
3308
|
|
|
|
|
|
|
sub XTWGetEndDate
|
3309
|
|
|
|
|
|
|
{
|
3310
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3311
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->GetEndDate();
|
3312
|
|
|
|
|
|
|
}
|
3313
|
|
|
|
|
|
|
|
3314
|
|
|
|
|
|
|
sub XTWGetXMLStringToParse
|
3315
|
|
|
|
|
|
|
{
|
3316
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3317
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->GetXMLStringToParse();
|
3318
|
|
|
|
|
|
|
}
|
3319
|
|
|
|
|
|
|
|
3320
|
|
|
|
|
|
|
sub XTWGetTextCorpusStr
|
3321
|
|
|
|
|
|
|
{
|
3322
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3323
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->GetTextCorpusStr();
|
3324
|
|
|
|
|
|
|
}
|
3325
|
|
|
|
|
|
|
|
3326
|
|
|
|
|
|
|
sub XTWGetFileHandle
|
3327
|
|
|
|
|
|
|
{
|
3328
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3329
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->GetFileHandle();
|
3330
|
|
|
|
|
|
|
}
|
3331
|
|
|
|
|
|
|
|
3332
|
|
|
|
|
|
|
sub XTWGetTwigHandler
|
3333
|
|
|
|
|
|
|
{
|
3334
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3335
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->GetTwigHandler();
|
3336
|
|
|
|
|
|
|
}
|
3337
|
|
|
|
|
|
|
|
3338
|
|
|
|
|
|
|
sub XTWGetParsedCount
|
3339
|
|
|
|
|
|
|
{
|
3340
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3341
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->GetParsedCount();
|
3342
|
|
|
|
|
|
|
}
|
3343
|
|
|
|
|
|
|
|
3344
|
|
|
|
|
|
|
sub XTWGetTempStr
|
3345
|
|
|
|
|
|
|
{
|
3346
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3347
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->GetTempStr();
|
3348
|
|
|
|
|
|
|
}
|
3349
|
|
|
|
|
|
|
|
3350
|
|
|
|
|
|
|
sub XTWGetTempDate
|
3351
|
|
|
|
|
|
|
{
|
3352
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3353
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->GetTempDate();
|
3354
|
|
|
|
|
|
|
}
|
3355
|
|
|
|
|
|
|
|
3356
|
|
|
|
|
|
|
sub XTWGetOutputFileName
|
3357
|
|
|
|
|
|
|
{
|
3358
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3359
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->GetOutputFilePath();
|
3360
|
|
|
|
|
|
|
}
|
3361
|
|
|
|
|
|
|
|
3362
|
|
|
|
|
|
|
sub XTWGetCompoundWordAry
|
3363
|
|
|
|
|
|
|
{
|
3364
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3365
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->GetCompoundWordAry();
|
3366
|
|
|
|
|
|
|
}
|
3367
|
|
|
|
|
|
|
|
3368
|
|
|
|
|
|
|
sub XTWGetCompoundWordBST
|
3369
|
|
|
|
|
|
|
{
|
3370
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3371
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->GetCompoundWordBST();
|
3372
|
|
|
|
|
|
|
}
|
3373
|
|
|
|
|
|
|
|
3374
|
|
|
|
|
|
|
sub XTWGetMaxCompoundWordLength
|
3375
|
|
|
|
|
|
|
{
|
3376
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3377
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->GetMaxCompoundWordLength();
|
3378
|
|
|
|
|
|
|
}
|
3379
|
|
|
|
|
|
|
|
3380
|
|
|
|
|
|
|
sub XTWSetStoreTitle
|
3381
|
|
|
|
|
|
|
{
|
3382
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
3383
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->SetStoreTitle( $value );
|
3384
|
|
|
|
|
|
|
}
|
3385
|
|
|
|
|
|
|
|
3386
|
|
|
|
|
|
|
sub XTWSetStoreAbstract
|
3387
|
|
|
|
|
|
|
{
|
3388
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
3389
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->SetStoreAbstract( $value );
|
3390
|
|
|
|
|
|
|
}
|
3391
|
|
|
|
|
|
|
|
3392
|
|
|
|
|
|
|
sub XTWSetWorkingDir
|
3393
|
|
|
|
|
|
|
{
|
3394
|
|
|
|
|
|
|
my ( $self, $dir ) = @_;
|
3395
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->SetWorkingDir( $dir );
|
3396
|
|
|
|
|
|
|
}
|
3397
|
|
|
|
|
|
|
|
3398
|
|
|
|
|
|
|
sub XTWSetSavePath
|
3399
|
|
|
|
|
|
|
{
|
3400
|
|
|
|
|
|
|
my ( $self, $dir ) = @_;
|
3401
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->SetSavePath( $dir );
|
3402
|
|
|
|
|
|
|
}
|
3403
|
|
|
|
|
|
|
|
3404
|
|
|
|
|
|
|
sub XTWSetQuickParse
|
3405
|
|
|
|
|
|
|
{
|
3406
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
3407
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->SetQuickParse( $value );
|
3408
|
|
|
|
|
|
|
}
|
3409
|
|
|
|
|
|
|
|
3410
|
|
|
|
|
|
|
sub XTWSetCompoundifyText
|
3411
|
|
|
|
|
|
|
{
|
3412
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
3413
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->SetCompoundifyText( $value );
|
3414
|
|
|
|
|
|
|
}
|
3415
|
|
|
|
|
|
|
|
3416
|
|
|
|
|
|
|
sub XTWSetBeginDate
|
3417
|
|
|
|
|
|
|
{
|
3418
|
|
|
|
|
|
|
my ( $self, $str ) = @_;
|
3419
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->SetBeginDate( $str );
|
3420
|
|
|
|
|
|
|
}
|
3421
|
|
|
|
|
|
|
|
3422
|
|
|
|
|
|
|
sub XTWSetEndDate
|
3423
|
|
|
|
|
|
|
{
|
3424
|
|
|
|
|
|
|
my ( $self, $str ) = @_;
|
3425
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->SetEndDate( $str );
|
3426
|
|
|
|
|
|
|
}
|
3427
|
|
|
|
|
|
|
|
3428
|
|
|
|
|
|
|
sub XTWSetXMLStringToParse
|
3429
|
|
|
|
|
|
|
{
|
3430
|
|
|
|
|
|
|
my ( $self, $str ) = @_;
|
3431
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->SetXMLStringToParse( $str );
|
3432
|
|
|
|
|
|
|
}
|
3433
|
|
|
|
|
|
|
|
3434
|
|
|
|
|
|
|
sub XTWSetTextCorpusStr
|
3435
|
|
|
|
|
|
|
{
|
3436
|
|
|
|
|
|
|
my ( $self, $str ) = @_;
|
3437
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->SetTextCorpusStr( $str );
|
3438
|
|
|
|
|
|
|
}
|
3439
|
|
|
|
|
|
|
|
3440
|
|
|
|
|
|
|
sub XTWAppendStrToTextCorpus
|
3441
|
|
|
|
|
|
|
{
|
3442
|
|
|
|
|
|
|
my ( $self, $str ) = @_;
|
3443
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->AppendStrToTextCorpus( $str );
|
3444
|
|
|
|
|
|
|
}
|
3445
|
|
|
|
|
|
|
|
3446
|
|
|
|
|
|
|
sub XTWClearTextCorpusStr
|
3447
|
|
|
|
|
|
|
{
|
3448
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
3449
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->ClearTextCorpusStr();
|
3450
|
|
|
|
|
|
|
}
|
3451
|
|
|
|
|
|
|
|
3452
|
|
|
|
|
|
|
sub XTWSetTempStr
|
3453
|
|
|
|
|
|
|
{
|
3454
|
|
|
|
|
|
|
my ( $self, $str ) = @_;
|
3455
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->SetTempStr( $str );
|
3456
|
|
|
|
|
|
|
}
|
3457
|
|
|
|
|
|
|
|
3458
|
|
|
|
|
|
|
sub XTWAppendToTempStr
|
3459
|
|
|
|
|
|
|
{
|
3460
|
|
|
|
|
|
|
my ( $self, $str ) = @_;
|
3461
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->AppendToTempStr( $str );
|
3462
|
|
|
|
|
|
|
}
|
3463
|
|
|
|
|
|
|
|
3464
|
|
|
|
|
|
|
sub XTWClearTempStr
|
3465
|
|
|
|
|
|
|
{
|
3466
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
3467
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->ClearTempStr();
|
3468
|
|
|
|
|
|
|
}
|
3469
|
|
|
|
|
|
|
|
3470
|
|
|
|
|
|
|
sub XTWSetTempDate
|
3471
|
|
|
|
|
|
|
{
|
3472
|
|
|
|
|
|
|
my ( $self, $str ) = @_;
|
3473
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->SetTempDate( $str );
|
3474
|
|
|
|
|
|
|
}
|
3475
|
|
|
|
|
|
|
|
3476
|
|
|
|
|
|
|
sub XTWClearTempDate
|
3477
|
|
|
|
|
|
|
{
|
3478
|
|
|
|
|
|
|
my ( $self ) = @_;
|
3479
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->ClearTempDate();
|
3480
|
|
|
|
|
|
|
}
|
3481
|
|
|
|
|
|
|
|
3482
|
|
|
|
|
|
|
sub XTWSetCompoundWordAry
|
3483
|
|
|
|
|
|
|
{
|
3484
|
|
|
|
|
|
|
my ( $self, $aryRef ) = @_;
|
3485
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->SetCompoundWordAry( $aryRef );
|
3486
|
|
|
|
|
|
|
}
|
3487
|
|
|
|
|
|
|
|
3488
|
|
|
|
|
|
|
sub XTWClearCompoundWordAry
|
3489
|
|
|
|
|
|
|
{
|
3490
|
|
|
|
|
|
|
my ( $self, $str ) = @_;
|
3491
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->ClearCompoundWordAry();
|
3492
|
|
|
|
|
|
|
}
|
3493
|
|
|
|
|
|
|
|
3494
|
|
|
|
|
|
|
sub XTWSetCompoundWordBST
|
3495
|
|
|
|
|
|
|
{
|
3496
|
|
|
|
|
|
|
my ( $self, $bst ) = @_;
|
3497
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->SetCompoundWordBST( $bst );
|
3498
|
|
|
|
|
|
|
}
|
3499
|
|
|
|
|
|
|
|
3500
|
|
|
|
|
|
|
sub XTWClearCompoundWordBST
|
3501
|
|
|
|
|
|
|
{
|
3502
|
|
|
|
|
|
|
my ( $self, $str ) = @_;
|
3503
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->ClearCompoundWordBST();
|
3504
|
|
|
|
|
|
|
}
|
3505
|
|
|
|
|
|
|
|
3506
|
|
|
|
|
|
|
sub XTWSetMaxCompoundWordLength
|
3507
|
|
|
|
|
|
|
{
|
3508
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
3509
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->SetMaxCompoundWordLength( $value );
|
3510
|
|
|
|
|
|
|
}
|
3511
|
|
|
|
|
|
|
|
3512
|
|
|
|
|
|
|
sub XTWSetOverwriteExistingFile
|
3513
|
|
|
|
|
|
|
{
|
3514
|
|
|
|
|
|
|
my ( $self, $value ) = @_;
|
3515
|
|
|
|
|
|
|
return $self->GetXMLToW2VHandler()->SetOverwriteExistingFile( $value );
|
3516
|
|
|
|
|
|
|
}
|
3517
|
|
|
|
|
|
|
|
3518
|
|
|
|
|
|
|
#################### All Modules Are To Output "1"(True) at EOF ######################
|
3519
|
|
|
|
|
|
|
1;
|
3520
|
|
|
|
|
|
|
|
3521
|
|
|
|
|
|
|
|
3522
|
|
|
|
|
|
|
=head1 NAME
|
3523
|
|
|
|
|
|
|
|
3524
|
|
|
|
|
|
|
Word2vec::Interface - Interface module for word2vec.pm, word2phrase.pm, interface.pm modules and associated utilities.
|
3525
|
|
|
|
|
|
|
|
3526
|
|
|
|
|
|
|
=head1 SYNOPSIS
|
3527
|
|
|
|
|
|
|
|
3528
|
|
|
|
|
|
|
use Word2vec::Interface;
|
3529
|
|
|
|
|
|
|
|
3530
|
|
|
|
|
|
|
my $result = 0;
|
3531
|
|
|
|
|
|
|
|
3532
|
|
|
|
|
|
|
# Compile a text corpus, execute word2vec training and compute cosine similarity of two words
|
3533
|
|
|
|
|
|
|
my $w2vinterface = Word2vec::Interface->new();
|
3534
|
|
|
|
|
|
|
|
3535
|
|
|
|
|
|
|
my $xmlconv = $w2vinterface->GetXMLToW2VHandler();
|
3536
|
|
|
|
|
|
|
$xmlconv->SetWorkingDir( "Medline/XML/Directory/Here" );
|
3537
|
|
|
|
|
|
|
$xmlconv->SetSavePath( "textcorpus.txt" );
|
3538
|
|
|
|
|
|
|
$xmlconv->SetStoreTitle( 1 );
|
3539
|
|
|
|
|
|
|
$xmlconv->SetStoreAbstract( 1 );
|
3540
|
|
|
|
|
|
|
$xmlconv->SetBeginDate( "01/01/2004" );
|
3541
|
|
|
|
|
|
|
$xmlconv->SetEndDate( "08/13/2016" );
|
3542
|
|
|
|
|
|
|
$xmlconv->SetOverwriteExistingFile( 1 );
|
3543
|
|
|
|
|
|
|
|
3544
|
|
|
|
|
|
|
# If Compound Word File Exists, Store It In Memory
|
3545
|
|
|
|
|
|
|
# And Create Compound Word Binary Search Tree Using The Compound Word Data
|
3546
|
|
|
|
|
|
|
$xmlconv->ReadCompoundWordDataFromFile( "compoundword.txt" );
|
3547
|
|
|
|
|
|
|
$xmlconv->CreateCompoundWordBST();
|
3548
|
|
|
|
|
|
|
|
3549
|
|
|
|
|
|
|
# Parse XML Files or Directory Of Files
|
3550
|
|
|
|
|
|
|
$result = $xmlconv->ConvertMedlineXMLToW2V( "/xmlDirectory/" );
|
3551
|
|
|
|
|
|
|
|
3552
|
|
|
|
|
|
|
# Check(s)
|
3553
|
|
|
|
|
|
|
print( "Error Parsing Medline XML Files\n" ) if ( $result == -1 );
|
3554
|
|
|
|
|
|
|
exit if ( $result == -1 );
|
3555
|
|
|
|
|
|
|
|
3556
|
|
|
|
|
|
|
# Setup And Execute word2vec Training
|
3557
|
|
|
|
|
|
|
my $word2vec = $w2vinterface->GetWord2VecHandler();
|
3558
|
|
|
|
|
|
|
$word2vec->SetTrainFilePath( "textcorpus.txt" );
|
3559
|
|
|
|
|
|
|
$word2vec->SetOutputFilePath( "vectors.bin" );
|
3560
|
|
|
|
|
|
|
$word2vec->SetWordVecSize( 200 );
|
3561
|
|
|
|
|
|
|
$word2vec->SetWindowSize( 8 );
|
3562
|
|
|
|
|
|
|
$word2vec->SetSample( 0.0001 );
|
3563
|
|
|
|
|
|
|
$word2vec->SetNegative( 25 );
|
3564
|
|
|
|
|
|
|
$word2vec->SetHSoftMax( 0 );
|
3565
|
|
|
|
|
|
|
$word2vec->SetBinaryOutput( 0 );
|
3566
|
|
|
|
|
|
|
$word2vec->SetNumOfThreads( 20 );
|
3567
|
|
|
|
|
|
|
$word2vec->SetNumOfIterations( 12 );
|
3568
|
|
|
|
|
|
|
$word2vec->SetUseCBOW( 1 );
|
3569
|
|
|
|
|
|
|
$word2vec->SetOverwriteOldFile( 0 );
|
3570
|
|
|
|
|
|
|
|
3571
|
|
|
|
|
|
|
# Execute word2vec Training
|
3572
|
|
|
|
|
|
|
$result = $word2vec->ExecuteTraining();
|
3573
|
|
|
|
|
|
|
|
3574
|
|
|
|
|
|
|
# Check(s)
|
3575
|
|
|
|
|
|
|
print( "Error Training Word2vec On File: \"textcorpus.txt\"" ) if ( $result == -1 );
|
3576
|
|
|
|
|
|
|
exit if ( $result == -1 );
|
3577
|
|
|
|
|
|
|
|
3578
|
|
|
|
|
|
|
# Read word2vec Training Data Into Memory And Store As A Binary Search Tree
|
3579
|
|
|
|
|
|
|
$result = $word2vec->ReadTrainedVectorDataFromFile( "vectors.bin" );
|
3580
|
|
|
|
|
|
|
|
3581
|
|
|
|
|
|
|
# Check(s)
|
3582
|
|
|
|
|
|
|
print( "Error Unable To Read Word2vec Trained Vector Data From File\n" ) if ( $result == -1 );
|
3583
|
|
|
|
|
|
|
exit if ( $result == -1 );
|
3584
|
|
|
|
|
|
|
|
3585
|
|
|
|
|
|
|
# Compute Cosine Similarity Between "respiratory" and "arrest"
|
3586
|
|
|
|
|
|
|
$result = $word2vec->ComputeCosineSimilarity( "respiratory", "arrest" );
|
3587
|
|
|
|
|
|
|
print( "Cosine Similarity Between \"respiratory\" and \"arrest\": $result\n" ) if defined( $result );
|
3588
|
|
|
|
|
|
|
print( "Error Computing Cosine Similarity\n" ) if !defined( $result );
|
3589
|
|
|
|
|
|
|
|
3590
|
|
|
|
|
|
|
# Compute Cosine Similarity Between "respiratory arrest" and "heart attack"
|
3591
|
|
|
|
|
|
|
$result = $word2vec->ComputeMultiWordCosineSimilarity( "respiratory arrest", "heart attack" );
|
3592
|
|
|
|
|
|
|
print( "Cosine Similarity Between \"respiratory arrest\" and \"heart attack\": $result\n" ) if defined( $result );
|
3593
|
|
|
|
|
|
|
print( "Error Computing Cosine Similarity\n" ) if !defined( $result );
|
3594
|
|
|
|
|
|
|
|
3595
|
|
|
|
|
|
|
undef( $w2vinterface );
|
3596
|
|
|
|
|
|
|
|
3597
|
|
|
|
|
|
|
# or
|
3598
|
|
|
|
|
|
|
|
3599
|
|
|
|
|
|
|
use Word2vec::Interface;
|
3600
|
|
|
|
|
|
|
|
3601
|
|
|
|
|
|
|
my $result = 0;
|
3602
|
|
|
|
|
|
|
my $w2vinterface = Word2vec::Interface->new();
|
3603
|
|
|
|
|
|
|
$w2vinterface->XTWSetWorkingDir( "Medline/XML/Directory/Here" );
|
3604
|
|
|
|
|
|
|
$w2vinterface->XTWSetSavePath( "textcorpus.txt" );
|
3605
|
|
|
|
|
|
|
$w2vinterface->XTWSetStoreTitle( 1 );
|
3606
|
|
|
|
|
|
|
$w2vinterface->XTWSetStoreAbstract( 1 );
|
3607
|
|
|
|
|
|
|
$w2vinterface->XTWSetBeginDate( "01/01/2004" );
|
3608
|
|
|
|
|
|
|
$w2vinterface->XTWSetEndDate( "08/13/2016" );
|
3609
|
|
|
|
|
|
|
$w2vinterface->XTWSetOverwriteExistingFile( 1 );
|
3610
|
|
|
|
|
|
|
|
3611
|
|
|
|
|
|
|
# If Compound Word File Exists, Store It In Memory
|
3612
|
|
|
|
|
|
|
# And Create Compound Word Binary Search Tree Using The Compound Word Data
|
3613
|
|
|
|
|
|
|
$w2vinterface->XTWReadCompoundWordDataFromFile( "compoundword.txt" );
|
3614
|
|
|
|
|
|
|
$w2vinterface->XTWCreateCompoundWordBST();
|
3615
|
|
|
|
|
|
|
|
3616
|
|
|
|
|
|
|
# Parse XML Files or Directory Of Files
|
3617
|
|
|
|
|
|
|
$result = $w2vinterface->XTWConvertMedlineXMLToW2V( "/xmlDirectory/" );
|
3618
|
|
|
|
|
|
|
|
3619
|
|
|
|
|
|
|
$result = $w2vinterface->W2VExecuteTraining( "textcorpus.txt", "vectors.bin", 200, 8, undef, 0.001, 25,
|
3620
|
|
|
|
|
|
|
undef, 0, 0, 20, 15, 1, 0, undef, undef, undef, 1 );
|
3621
|
|
|
|
|
|
|
|
3622
|
|
|
|
|
|
|
# Read word2vec Training Data Into Memory And Store As A Binary Search Tree
|
3623
|
|
|
|
|
|
|
$result = $w2vinterface->W2VReadTrainedVectorDataFromFile( "vectors.bin" );
|
3624
|
|
|
|
|
|
|
|
3625
|
|
|
|
|
|
|
# Check(s)
|
3626
|
|
|
|
|
|
|
print( "Error Unable To Read Word2vec Trained Vector Data From File\n" ) if ( $result == -1 );
|
3627
|
|
|
|
|
|
|
exit if ( $result == -1 );
|
3628
|
|
|
|
|
|
|
|
3629
|
|
|
|
|
|
|
# Compute Cosine Similarity Between "respiratory" and "arrest"
|
3630
|
|
|
|
|
|
|
$result = $w2vinterface->W2VComputeCosineSimilarity( "respiratory", "arrest" );
|
3631
|
|
|
|
|
|
|
print( "Cosine Similarity Between \"respiratory\" and \"arrest\": $result\n" ) if defined( $result );
|
3632
|
|
|
|
|
|
|
print( "Error Computing Cosine Similarity\n" ) if !defined( $result );
|
3633
|
|
|
|
|
|
|
|
3634
|
|
|
|
|
|
|
# Compute Cosine Similarity Between "respiratory arrest" and "heart attack"
|
3635
|
|
|
|
|
|
|
$result = $w2vinterface->W2VComputeMultiWordCosineSimilarity( "respiratory arrest", "heart attack" );
|
3636
|
|
|
|
|
|
|
print( "Cosine Similarity Between \"respiratory arrest\" and \"heart attack\": $result\n" ) if defined( $result );
|
3637
|
|
|
|
|
|
|
print( "Error Computing Cosine Similarity\n" ) if !defined( $result );
|
3638
|
|
|
|
|
|
|
|
3639
|
|
|
|
|
|
|
undef( $w2vinterface );
|
3640
|
|
|
|
|
|
|
|
3641
|
|
|
|
|
|
|
=head1 DESCRIPTION
|
3642
|
|
|
|
|
|
|
|
3643
|
|
|
|
|
|
|
Word2vec::Interface is an interface module for utilization of word2vec, word2phrase, xmltow2v and their associated functions.
|
3644
|
|
|
|
|
|
|
This program houses a set of functions, modules and utilities for use with UMLS Similarity.
|
3645
|
|
|
|
|
|
|
|
3646
|
|
|
|
|
|
|
XmlToW2v Features:
|
3647
|
|
|
|
|
|
|
- Compilation of a text corpus from plain or gun-zipped Medline XML files.
|
3648
|
|
|
|
|
|
|
- Multi-threaded text corpus compilation support.
|
3649
|
|
|
|
|
|
|
- Include text corpus articles via date range.
|
3650
|
|
|
|
|
|
|
- Include text corpus articles via title, abstract or both.
|
3651
|
|
|
|
|
|
|
- Compoundifying on-the-fly while building text corpus given a compound word file.
|
3652
|
|
|
|
|
|
|
|
3653
|
|
|
|
|
|
|
Word2vec Features:
|
3654
|
|
|
|
|
|
|
- Word2vec training with user specified settings.
|
3655
|
|
|
|
|
|
|
- Manipulation of Word2vec word vectors. (Addition/Subtraction/Average)
|
3656
|
|
|
|
|
|
|
- Word2vec binary format to plain text file conversion.
|
3657
|
|
|
|
|
|
|
- Word2vec plain text to binary format file conversion.
|
3658
|
|
|
|
|
|
|
- Multi-word cosine similarity computation. (Sudo-compound word cosine similarity).
|
3659
|
|
|
|
|
|
|
|
3660
|
|
|
|
|
|
|
Word2phrase Features:
|
3661
|
|
|
|
|
|
|
- Word2phrase training with user specified settings.
|
3662
|
|
|
|
|
|
|
|
3663
|
|
|
|
|
|
|
Interface Features:
|
3664
|
|
|
|
|
|
|
- Word Sense Disambiguation via trained word2vec data.
|
3665
|
|
|
|
|
|
|
|
3666
|
|
|
|
|
|
|
=head2 Interface Main Functions
|
3667
|
|
|
|
|
|
|
|
3668
|
|
|
|
|
|
|
=head3 new
|
3669
|
|
|
|
|
|
|
|
3670
|
|
|
|
|
|
|
Description:
|
3671
|
|
|
|
|
|
|
|
3672
|
|
|
|
|
|
|
Returns a new "Word2vec::Interface" module object.
|
3673
|
|
|
|
|
|
|
|
3674
|
|
|
|
|
|
|
Note: Specifying no parameters implies default options.
|
3675
|
|
|
|
|
|
|
|
3676
|
|
|
|
|
|
|
Default Parameters:
|
3677
|
|
|
|
|
|
|
word2vecDir = "../../External/word2vec"
|
3678
|
|
|
|
|
|
|
debugLog = 0
|
3679
|
|
|
|
|
|
|
writeLog = 0
|
3680
|
|
|
|
|
|
|
ignoreCompileErrors = 0
|
3681
|
|
|
|
|
|
|
ignoreFileChecks = 0
|
3682
|
|
|
|
|
|
|
exitFlag = 0
|
3683
|
|
|
|
|
|
|
workingDir = ""
|
3684
|
|
|
|
|
|
|
word2vec = Word2vec::Word2vec->new()
|
3685
|
|
|
|
|
|
|
word2phrase = Word2vec::Word2phrase->new()
|
3686
|
|
|
|
|
|
|
xmltow2v = Word2vec::Xmltow2v->new()
|
3687
|
|
|
|
|
|
|
util = Word2vec::Interface()
|
3688
|
|
|
|
|
|
|
instanceAry = ()
|
3689
|
|
|
|
|
|
|
senseAry = ()
|
3690
|
|
|
|
|
|
|
instanceCount = 0
|
3691
|
|
|
|
|
|
|
senseCount = 0
|
3692
|
|
|
|
|
|
|
|
3693
|
|
|
|
|
|
|
|
3694
|
|
|
|
|
|
|
Input:
|
3695
|
|
|
|
|
|
|
|
3696
|
|
|
|
|
|
|
$word2vecDir -> Specifies word2vec package source/executable directory.
|
3697
|
|
|
|
|
|
|
$debugLog -> Instructs module to print debug statements to the console. ('1' = True / '0' = False)
|
3698
|
|
|
|
|
|
|
$writeLog -> Instructs module to print debug statements to a log file. ('1' = True / '0' = False)
|
3699
|
|
|
|
|
|
|
$ignoreCompileErrors -> Instructs module to ignore source code compilation errors. ('1' = True / '0' = False)
|
3700
|
|
|
|
|
|
|
$ignoreFileChecks -> Instructs module to ignore file checks. ('1' = True / '0' = False)
|
3701
|
|
|
|
|
|
|
$exitFlag -> In the event of a run-time check error, exitFlag is set to '1' which gracefully terminates the script.
|
3702
|
|
|
|
|
|
|
$workingDir -> Specifies the current working directory.
|
3703
|
|
|
|
|
|
|
$word2vec -> Word2vec::Word2vec object.
|
3704
|
|
|
|
|
|
|
$word2phrase -> Word2vec::Word2phrase object.
|
3705
|
|
|
|
|
|
|
$xmltow2v -> Word2vec::Xmltow2v object.
|
3706
|
|
|
|
|
|
|
$interface -> Word2vec::Interface object.
|
3707
|
|
|
|
|
|
|
$instanceAry -> Word Sense Disambiguation: Array of instances.
|
3708
|
|
|
|
|
|
|
$senseAry -> Word Sense Disambiguation: Array of senses.
|
3709
|
|
|
|
|
|
|
$instanceCount -> Number of Word Sense Disambiguation instances loaded in memory.
|
3710
|
|
|
|
|
|
|
$senseCount -> Number of Word Sense Disambiguation senses loaded in memory.
|
3711
|
|
|
|
|
|
|
|
3712
|
|
|
|
|
|
|
Note: It is not recommended to specify all new() parameters, as it has not been thoroughly tested. Maximum recommended parameters to be specified include:
|
3713
|
|
|
|
|
|
|
"word2vecDir, debugLog, writeLog, ignoreCompileErrors, ignoreFileChecks"
|
3714
|
|
|
|
|
|
|
|
3715
|
|
|
|
|
|
|
Output:
|
3716
|
|
|
|
|
|
|
|
3717
|
|
|
|
|
|
|
Word2vec::Interface object.
|
3718
|
|
|
|
|
|
|
|
3719
|
|
|
|
|
|
|
Example:
|
3720
|
|
|
|
|
|
|
|
3721
|
|
|
|
|
|
|
use Word2vec::Interface;
|
3722
|
|
|
|
|
|
|
|
3723
|
|
|
|
|
|
|
# Parameters: Word2Vec Directory = undef, DebugLog = True, WriteLog = False, IgnoreCompileErrors = False, IgnoreFileChecks = False
|
3724
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new( undef, 1, 0 );
|
3725
|
|
|
|
|
|
|
|
3726
|
|
|
|
|
|
|
undef( $interface );
|
3727
|
|
|
|
|
|
|
|
3728
|
|
|
|
|
|
|
# Or
|
3729
|
|
|
|
|
|
|
|
3730
|
|
|
|
|
|
|
# Parameters: Word2Vec Directory = undef, DebugLog = False, WriteLog = False, IgnoreCompileErrors = False, IgnoreFileChecks = False
|
3731
|
|
|
|
|
|
|
use Word2vec::Interface;
|
3732
|
|
|
|
|
|
|
|
3733
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
3734
|
|
|
|
|
|
|
|
3735
|
|
|
|
|
|
|
undef( $interface );
|
3736
|
|
|
|
|
|
|
|
3737
|
|
|
|
|
|
|
=head3 DESTROY
|
3738
|
|
|
|
|
|
|
|
3739
|
|
|
|
|
|
|
Description:
|
3740
|
|
|
|
|
|
|
|
3741
|
|
|
|
|
|
|
Removes member variables and file handle from memory.
|
3742
|
|
|
|
|
|
|
|
3743
|
|
|
|
|
|
|
Input:
|
3744
|
|
|
|
|
|
|
|
3745
|
|
|
|
|
|
|
None
|
3746
|
|
|
|
|
|
|
|
3747
|
|
|
|
|
|
|
Output:
|
3748
|
|
|
|
|
|
|
|
3749
|
|
|
|
|
|
|
None
|
3750
|
|
|
|
|
|
|
|
3751
|
|
|
|
|
|
|
Example:
|
3752
|
|
|
|
|
|
|
|
3753
|
|
|
|
|
|
|
use Word2vec::Interface;
|
3754
|
|
|
|
|
|
|
|
3755
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
3756
|
|
|
|
|
|
|
$interface->DESTROY();
|
3757
|
|
|
|
|
|
|
|
3758
|
|
|
|
|
|
|
undef( $interface );
|
3759
|
|
|
|
|
|
|
|
3760
|
|
|
|
|
|
|
=head3 RunFileChecks
|
3761
|
|
|
|
|
|
|
|
3762
|
|
|
|
|
|
|
Description:
|
3763
|
|
|
|
|
|
|
|
3764
|
|
|
|
|
|
|
Runs word2vec file checks. Looks for word2vec executable files, if not found
|
3765
|
|
|
|
|
|
|
it will then look for the source code and compile automatically placing the
|
3766
|
|
|
|
|
|
|
executable files in the same directory. Errors out gracefully when word2vec
|
3767
|
|
|
|
|
|
|
executable files are not present and source files cannot be located.
|
3768
|
|
|
|
|
|
|
|
3769
|
|
|
|
|
|
|
Notes : Word2vec Executable File List: word2vec, word2phrase, word-analogy, distance, compute-accuracy.
|
3770
|
|
|
|
|
|
|
|
3771
|
|
|
|
|
|
|
: This method is called automatically in interface::new() function. It can be disabled by setting
|
3772
|
|
|
|
|
|
|
_ignoreFileChecks new() parameter to 1.
|
3773
|
|
|
|
|
|
|
|
3774
|
|
|
|
|
|
|
Input:
|
3775
|
|
|
|
|
|
|
|
3776
|
|
|
|
|
|
|
$string -> Word2vec source/executable directory.
|
3777
|
|
|
|
|
|
|
|
3778
|
|
|
|
|
|
|
Output:
|
3779
|
|
|
|
|
|
|
|
3780
|
|
|
|
|
|
|
$value -> Returns '1' if checks passed and '0' if file checks failed.
|
3781
|
|
|
|
|
|
|
|
3782
|
|
|
|
|
|
|
Example:
|
3783
|
|
|
|
|
|
|
|
3784
|
|
|
|
|
|
|
use Word2vec::Interface;
|
3785
|
|
|
|
|
|
|
|
3786
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new( undef, 1, 0, 1, 1 );
|
3787
|
|
|
|
|
|
|
my $result = $interface->RunFileChecks();
|
3788
|
|
|
|
|
|
|
|
3789
|
|
|
|
|
|
|
print( "Passed Word2Vec File Checks!\n" ) if $result == 0;
|
3790
|
|
|
|
|
|
|
print( "Failed Word2Vec File Checks!\n" ) if $result == 1;
|
3791
|
|
|
|
|
|
|
|
3792
|
|
|
|
|
|
|
undef( $interface );
|
3793
|
|
|
|
|
|
|
|
3794
|
|
|
|
|
|
|
=head3 CheckIfExecutableFileExists
|
3795
|
|
|
|
|
|
|
|
3796
|
|
|
|
|
|
|
Description:
|
3797
|
|
|
|
|
|
|
|
3798
|
|
|
|
|
|
|
Checks specified executable file exists in a given directory.
|
3799
|
|
|
|
|
|
|
|
3800
|
|
|
|
|
|
|
Input:
|
3801
|
|
|
|
|
|
|
|
3802
|
|
|
|
|
|
|
$filePath -> Executable file path
|
3803
|
|
|
|
|
|
|
$fileName -> Executable file name
|
3804
|
|
|
|
|
|
|
|
3805
|
|
|
|
|
|
|
Output:
|
3806
|
|
|
|
|
|
|
|
3807
|
|
|
|
|
|
|
$value -> Returns '1' if file is found and '0' if otherwise.
|
3808
|
|
|
|
|
|
|
|
3809
|
|
|
|
|
|
|
Example:
|
3810
|
|
|
|
|
|
|
|
3811
|
|
|
|
|
|
|
use Word2vec::Interface;
|
3812
|
|
|
|
|
|
|
|
3813
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
3814
|
|
|
|
|
|
|
my $result = $interface->CheckIfExecutableFileExists( "../../External/word2vec", "word2vec" );
|
3815
|
|
|
|
|
|
|
|
3816
|
|
|
|
|
|
|
print( "Executable File Exists!\n" ) if $result == 1;
|
3817
|
|
|
|
|
|
|
print( "Executable File Does Not Exist!\n" ) if $result == 0;
|
3818
|
|
|
|
|
|
|
|
3819
|
|
|
|
|
|
|
undef( $interface );
|
3820
|
|
|
|
|
|
|
|
3821
|
|
|
|
|
|
|
=head3 CheckIfSourceFileExists
|
3822
|
|
|
|
|
|
|
|
3823
|
|
|
|
|
|
|
Description:
|
3824
|
|
|
|
|
|
|
|
3825
|
|
|
|
|
|
|
Checks specified directory (string) for the filename (string).
|
3826
|
|
|
|
|
|
|
This ensures the specified files are of file type "text/cpp".
|
3827
|
|
|
|
|
|
|
|
3828
|
|
|
|
|
|
|
Input:
|
3829
|
|
|
|
|
|
|
|
3830
|
|
|
|
|
|
|
$filePath -> Executable file path
|
3831
|
|
|
|
|
|
|
$fileName -> Executable file name
|
3832
|
|
|
|
|
|
|
|
3833
|
|
|
|
|
|
|
Output:
|
3834
|
|
|
|
|
|
|
|
3835
|
|
|
|
|
|
|
$value -> Returns '1' if file is found and of type "text/cpp" and '0' if otherwise.
|
3836
|
|
|
|
|
|
|
|
3837
|
|
|
|
|
|
|
Example:
|
3838
|
|
|
|
|
|
|
|
3839
|
|
|
|
|
|
|
use Word2vec::Interface;
|
3840
|
|
|
|
|
|
|
|
3841
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
3842
|
|
|
|
|
|
|
my $result = $interface->CheckIfSourceFileExists( "../../External/word2vec", "word2vec" );
|
3843
|
|
|
|
|
|
|
|
3844
|
|
|
|
|
|
|
print( "Source File Exists!\n" ) if $result == 1;
|
3845
|
|
|
|
|
|
|
print( "Source File Does Not Exist!\n" ) if $result == 0;
|
3846
|
|
|
|
|
|
|
|
3847
|
|
|
|
|
|
|
undef( $interface );
|
3848
|
|
|
|
|
|
|
|
3849
|
|
|
|
|
|
|
=head3 CompileSourceFile
|
3850
|
|
|
|
|
|
|
|
3851
|
|
|
|
|
|
|
Description:
|
3852
|
|
|
|
|
|
|
|
3853
|
|
|
|
|
|
|
Compiles C++ source filename in a specified directory.
|
3854
|
|
|
|
|
|
|
|
3855
|
|
|
|
|
|
|
Input:
|
3856
|
|
|
|
|
|
|
|
3857
|
|
|
|
|
|
|
$filePath -> Source file path (string)
|
3858
|
|
|
|
|
|
|
$fileName -> Source file name (string)
|
3859
|
|
|
|
|
|
|
|
3860
|
|
|
|
|
|
|
Output:
|
3861
|
|
|
|
|
|
|
|
3862
|
|
|
|
|
|
|
$value -> Returns '1' if successful and '0' if un-successful.
|
3863
|
|
|
|
|
|
|
|
3864
|
|
|
|
|
|
|
Example:
|
3865
|
|
|
|
|
|
|
|
3866
|
|
|
|
|
|
|
use Word2vec::Interface;
|
3867
|
|
|
|
|
|
|
|
3868
|
|
|
|
|
|
|
my $interface = Word2vec::Interface;
|
3869
|
|
|
|
|
|
|
my $result = $interface->CompileSourceFile( "../../External/word2vec", "word2vec" );
|
3870
|
|
|
|
|
|
|
|
3871
|
|
|
|
|
|
|
print( "Compiled Source Successfully!\n" ) if $result == 1;
|
3872
|
|
|
|
|
|
|
print( "Source Compilation Attempt Unsuccessful!\n" ) if $result == 0;
|
3873
|
|
|
|
|
|
|
|
3874
|
|
|
|
|
|
|
undef( $interface );
|
3875
|
|
|
|
|
|
|
|
3876
|
|
|
|
|
|
|
=head3 GetFileType
|
3877
|
|
|
|
|
|
|
|
3878
|
|
|
|
|
|
|
Description:
|
3879
|
|
|
|
|
|
|
|
3880
|
|
|
|
|
|
|
Checks file in given file path and if it exists, returns the file type.
|
3881
|
|
|
|
|
|
|
|
3882
|
|
|
|
|
|
|
Input:
|
3883
|
|
|
|
|
|
|
|
3884
|
|
|
|
|
|
|
$filePath -> File path
|
3885
|
|
|
|
|
|
|
|
3886
|
|
|
|
|
|
|
Output:
|
3887
|
|
|
|
|
|
|
|
3888
|
|
|
|
|
|
|
$string -> Returns file type (string).
|
3889
|
|
|
|
|
|
|
|
3890
|
|
|
|
|
|
|
Example:
|
3891
|
|
|
|
|
|
|
|
3892
|
|
|
|
|
|
|
use Word2vec::Interface;
|
3893
|
|
|
|
|
|
|
|
3894
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
3895
|
|
|
|
|
|
|
my $fileType = $interface->GetFileType( "samples/textcorpus.txt" );
|
3896
|
|
|
|
|
|
|
|
3897
|
|
|
|
|
|
|
print( "File Type: $fileType\n" );
|
3898
|
|
|
|
|
|
|
|
3899
|
|
|
|
|
|
|
undef( $interface );
|
3900
|
|
|
|
|
|
|
|
3901
|
|
|
|
|
|
|
=head3 GetOSType
|
3902
|
|
|
|
|
|
|
|
3903
|
|
|
|
|
|
|
Description:
|
3904
|
|
|
|
|
|
|
|
3905
|
|
|
|
|
|
|
Returns current operating system (string).
|
3906
|
|
|
|
|
|
|
|
3907
|
|
|
|
|
|
|
Input:
|
3908
|
|
|
|
|
|
|
|
3909
|
|
|
|
|
|
|
None
|
3910
|
|
|
|
|
|
|
|
3911
|
|
|
|
|
|
|
Output:
|
3912
|
|
|
|
|
|
|
|
3913
|
|
|
|
|
|
|
$string -> Operating System Type. (String)
|
3914
|
|
|
|
|
|
|
|
3915
|
|
|
|
|
|
|
Example:
|
3916
|
|
|
|
|
|
|
|
3917
|
|
|
|
|
|
|
use Word2vec::Interface;
|
3918
|
|
|
|
|
|
|
|
3919
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
3920
|
|
|
|
|
|
|
my $os = $interface->GetOSType();
|
3921
|
|
|
|
|
|
|
|
3922
|
|
|
|
|
|
|
print( "Operating System: $os\n" );
|
3923
|
|
|
|
|
|
|
|
3924
|
|
|
|
|
|
|
undef( $interface );
|
3925
|
|
|
|
|
|
|
|
3926
|
|
|
|
|
|
|
=head3 _ModifyWord2VecSourceForWindows
|
3927
|
|
|
|
|
|
|
|
3928
|
|
|
|
|
|
|
Description:
|
3929
|
|
|
|
|
|
|
|
3930
|
|
|
|
|
|
|
Modifies "word2vec.c" file for compilation under windows operating system.
|
3931
|
|
|
|
|
|
|
|
3932
|
|
|
|
|
|
|
Input:
|
3933
|
|
|
|
|
|
|
|
3934
|
|
|
|
|
|
|
None
|
3935
|
|
|
|
|
|
|
|
3936
|
|
|
|
|
|
|
Output:
|
3937
|
|
|
|
|
|
|
|
3938
|
|
|
|
|
|
|
$value -> '1' = Successful / '0' = Un-successful
|
3939
|
|
|
|
|
|
|
|
3940
|
|
|
|
|
|
|
Example:
|
3941
|
|
|
|
|
|
|
|
3942
|
|
|
|
|
|
|
This is a private function and should not be utilized.
|
3943
|
|
|
|
|
|
|
|
3944
|
|
|
|
|
|
|
=head3 _RemoveWord2VecSourceModification
|
3945
|
|
|
|
|
|
|
|
3946
|
|
|
|
|
|
|
Description:
|
3947
|
|
|
|
|
|
|
|
3948
|
|
|
|
|
|
|
Removes modification of "word2vec.c". Returns source file to its original state.
|
3949
|
|
|
|
|
|
|
|
3950
|
|
|
|
|
|
|
Input:
|
3951
|
|
|
|
|
|
|
|
3952
|
|
|
|
|
|
|
None
|
3953
|
|
|
|
|
|
|
|
3954
|
|
|
|
|
|
|
Output:
|
3955
|
|
|
|
|
|
|
|
3956
|
|
|
|
|
|
|
$value -> '1' = Successful / '0' = Un-successful.
|
3957
|
|
|
|
|
|
|
|
3958
|
|
|
|
|
|
|
Example:
|
3959
|
|
|
|
|
|
|
|
3960
|
|
|
|
|
|
|
This is a private function and should not be utilized.
|
3961
|
|
|
|
|
|
|
|
3962
|
|
|
|
|
|
|
=head2 Interface Command-Line Functions
|
3963
|
|
|
|
|
|
|
|
3964
|
|
|
|
|
|
|
=head3 CLComputeCosineSimilarity
|
3965
|
|
|
|
|
|
|
|
3966
|
|
|
|
|
|
|
Description:
|
3967
|
|
|
|
|
|
|
|
3968
|
|
|
|
|
|
|
Command-line Method: Computes cosine similarity between 'wordA' and 'wordB' using the specified 'filePath' for
|
3969
|
|
|
|
|
|
|
loading trained word2vec word vector data.
|
3970
|
|
|
|
|
|
|
|
3971
|
|
|
|
|
|
|
Input:
|
3972
|
|
|
|
|
|
|
|
3973
|
|
|
|
|
|
|
$filePath -> Word2Vec trained word vectors binary file path. (String)
|
3974
|
|
|
|
|
|
|
$wordA -> First word for cosine similarity comparison.
|
3975
|
|
|
|
|
|
|
$wordB -> Second word for cosine similarity comparison.
|
3976
|
|
|
|
|
|
|
|
3977
|
|
|
|
|
|
|
Output:
|
3978
|
|
|
|
|
|
|
|
3979
|
|
|
|
|
|
|
$value -> Cosine similarity value (float) or undefined.
|
3980
|
|
|
|
|
|
|
|
3981
|
|
|
|
|
|
|
Example:
|
3982
|
|
|
|
|
|
|
|
3983
|
|
|
|
|
|
|
use Word2vec::Interface;
|
3984
|
|
|
|
|
|
|
|
3985
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
3986
|
|
|
|
|
|
|
my $value = $interface->CLComputeCosineSimilarity( "../../samples/samplevectors.bin", "of", "the" );
|
3987
|
|
|
|
|
|
|
print( "Cosine Similarity Between \"of\" and \"the\": $value\n" ) if defined( $value );
|
3988
|
|
|
|
|
|
|
print( "Error: Cosine Similarity Could Not Be Computed\n" ) if !defined( $value );
|
3989
|
|
|
|
|
|
|
|
3990
|
|
|
|
|
|
|
undef( $interface );
|
3991
|
|
|
|
|
|
|
|
3992
|
|
|
|
|
|
|
=head3 CLComputeMultiWordCosineSimilarity
|
3993
|
|
|
|
|
|
|
|
3994
|
|
|
|
|
|
|
Description:
|
3995
|
|
|
|
|
|
|
|
3996
|
|
|
|
|
|
|
Command-line Method: Computes cosine similarity between 'phraseA' and 'phraseB' using the specified 'filePath'
|
3997
|
|
|
|
|
|
|
for loading trained word2vec word vector data.
|
3998
|
|
|
|
|
|
|
|
3999
|
|
|
|
|
|
|
Note: Supports multiple words concatenated by ':' for each string.
|
4000
|
|
|
|
|
|
|
|
4001
|
|
|
|
|
|
|
Input:
|
4002
|
|
|
|
|
|
|
|
4003
|
|
|
|
|
|
|
$filePath -> Word2Vec trained word vectors binary file path. (String)
|
4004
|
|
|
|
|
|
|
$phraseA -> First phrase for cosine similarity comparison. (String)
|
4005
|
|
|
|
|
|
|
$phraseB -> Second phrase for cosine similarity comparison. (String)
|
4006
|
|
|
|
|
|
|
|
4007
|
|
|
|
|
|
|
Output:
|
4008
|
|
|
|
|
|
|
|
4009
|
|
|
|
|
|
|
$value -> Cosine similarity value (float) or undefined.
|
4010
|
|
|
|
|
|
|
|
4011
|
|
|
|
|
|
|
Example:
|
4012
|
|
|
|
|
|
|
|
4013
|
|
|
|
|
|
|
use Word2vec::Interface;
|
4014
|
|
|
|
|
|
|
|
4015
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
4016
|
|
|
|
|
|
|
my $value = $interface->CLComputeMultiWordCosineSimilarity( "../../samples/samplevectors.bin", "heart:attack", "myocardial:infarction" );
|
4017
|
|
|
|
|
|
|
print( "Cosine Similarity Between \"heart attack\" and \"myocardial infarction\": $value\n" ) if defined( $value );
|
4018
|
|
|
|
|
|
|
print( "Error: Cosine Similarity Could Not Be Computed\n" ) if !defined( $value );
|
4019
|
|
|
|
|
|
|
|
4020
|
|
|
|
|
|
|
undef( $instance );
|
4021
|
|
|
|
|
|
|
|
4022
|
|
|
|
|
|
|
=head3 CLComputeAvgOfWordsCosineSimilarity
|
4023
|
|
|
|
|
|
|
|
4024
|
|
|
|
|
|
|
Description:
|
4025
|
|
|
|
|
|
|
|
4026
|
|
|
|
|
|
|
Command-line Method: Computes cosine similarity average of all words in 'phraseA' and 'phraseB',
|
4027
|
|
|
|
|
|
|
then takes cosine similarity between 'phraseA' and 'phraseB' average values using the
|
4028
|
|
|
|
|
|
|
specified 'filePath' for loading trained word2vec word vector data.
|
4029
|
|
|
|
|
|
|
|
4030
|
|
|
|
|
|
|
Note: Supports multiple words concatenated by ':' for each string.
|
4031
|
|
|
|
|
|
|
|
4032
|
|
|
|
|
|
|
Input:
|
4033
|
|
|
|
|
|
|
|
4034
|
|
|
|
|
|
|
$filePath -> Word2Vec trained word vectors binary file path. (String)
|
4035
|
|
|
|
|
|
|
$phraseA -> First phrase for cosine similarity comparison.
|
4036
|
|
|
|
|
|
|
$phraseB -> Second phrase for cosine similarity comparison.
|
4037
|
|
|
|
|
|
|
|
4038
|
|
|
|
|
|
|
Output:
|
4039
|
|
|
|
|
|
|
|
4040
|
|
|
|
|
|
|
$value -> Cosine similarity value (float) or undefined.
|
4041
|
|
|
|
|
|
|
|
4042
|
|
|
|
|
|
|
Example:
|
4043
|
|
|
|
|
|
|
|
4044
|
|
|
|
|
|
|
use Word2vec::Interface;
|
4045
|
|
|
|
|
|
|
|
4046
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
4047
|
|
|
|
|
|
|
my $value = $interface->CLComputeAvgOfWordsCosineSimilarity( "../../samples/samplevectors.bin", "heart:attack", "myocardial:infarction" );
|
4048
|
|
|
|
|
|
|
print( "Cosine Similarity Between \"heart attack\" and \"myocardial infarction\": $value\n" ) if defined( $value );
|
4049
|
|
|
|
|
|
|
print( "Error: Cosine Similarity Could Not Be Computed\n" ) if !defined( $value );
|
4050
|
|
|
|
|
|
|
|
4051
|
|
|
|
|
|
|
undef( $instance );
|
4052
|
|
|
|
|
|
|
|
4053
|
|
|
|
|
|
|
=head3 CLMultiWordCosSimWithUserInput
|
4054
|
|
|
|
|
|
|
|
4055
|
|
|
|
|
|
|
Description:
|
4056
|
|
|
|
|
|
|
|
4057
|
|
|
|
|
|
|
Command-line Method: Computes cosine similarity depending on user input given a vectorBinaryFile (string).
|
4058
|
|
|
|
|
|
|
|
4059
|
|
|
|
|
|
|
Note: Words can be compounded by the ':' character.
|
4060
|
|
|
|
|
|
|
|
4061
|
|
|
|
|
|
|
Input:
|
4062
|
|
|
|
|
|
|
|
4063
|
|
|
|
|
|
|
$filePath -> Word2Vec trained word vectors binary file path. (String)
|
4064
|
|
|
|
|
|
|
|
4065
|
|
|
|
|
|
|
Output:
|
4066
|
|
|
|
|
|
|
|
4067
|
|
|
|
|
|
|
None
|
4068
|
|
|
|
|
|
|
|
4069
|
|
|
|
|
|
|
Example:
|
4070
|
|
|
|
|
|
|
|
4071
|
|
|
|
|
|
|
use Word2vec::Interface;
|
4072
|
|
|
|
|
|
|
|
4073
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
4074
|
|
|
|
|
|
|
$interface->CLMultiWordCosSimWithUserInput( "../../samples/samplevectors.bin" );
|
4075
|
|
|
|
|
|
|
|
4076
|
|
|
|
|
|
|
undef( $instance );
|
4077
|
|
|
|
|
|
|
|
4078
|
|
|
|
|
|
|
=head3 CLAddTwoWordVectors
|
4079
|
|
|
|
|
|
|
|
4080
|
|
|
|
|
|
|
Description:
|
4081
|
|
|
|
|
|
|
|
4082
|
|
|
|
|
|
|
Command-line Method: Loads the specified word2vec trained binary data file, adds word vectors and returns the summed result.
|
4083
|
|
|
|
|
|
|
|
4084
|
|
|
|
|
|
|
Input:
|
4085
|
|
|
|
|
|
|
|
4086
|
|
|
|
|
|
|
$filePath -> Word2Vec trained word vectors binary file path. (String)
|
4087
|
|
|
|
|
|
|
$wordDataA -> Word2Vec word data (String)
|
4088
|
|
|
|
|
|
|
$wordDataB -> Word2Vec word data (String)
|
4089
|
|
|
|
|
|
|
|
4090
|
|
|
|
|
|
|
Output:
|
4091
|
|
|
|
|
|
|
|
4092
|
|
|
|
|
|
|
$vectorData -> Summed '$wordDataA' and '$wordDataB' vectors
|
4093
|
|
|
|
|
|
|
|
4094
|
|
|
|
|
|
|
Example:
|
4095
|
|
|
|
|
|
|
|
4096
|
|
|
|
|
|
|
use Word2vec::Interface;
|
4097
|
|
|
|
|
|
|
|
4098
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
4099
|
|
|
|
|
|
|
my $wordVtr = $interface->CLAddTwoWordVectors( "../../samples/samplevectors.bin", "of", "the" );
|
4100
|
|
|
|
|
|
|
|
4101
|
|
|
|
|
|
|
print( "Word Vector for \"of\" + \"the\": $wordVtr\n" ) if defined( $wordVtr );
|
4102
|
|
|
|
|
|
|
print( "Word Vector Cannot Be Computed\n" ) if !defined( $wordVtr );
|
4103
|
|
|
|
|
|
|
|
4104
|
|
|
|
|
|
|
undef( $instance );
|
4105
|
|
|
|
|
|
|
|
4106
|
|
|
|
|
|
|
=head3 CLSubtractTwoWordVectors
|
4107
|
|
|
|
|
|
|
|
4108
|
|
|
|
|
|
|
Description:
|
4109
|
|
|
|
|
|
|
|
4110
|
|
|
|
|
|
|
Command-line Method: Loads the specified word2vec trained binary data file, subtracts word vectors and returns the difference result.
|
4111
|
|
|
|
|
|
|
|
4112
|
|
|
|
|
|
|
Input:
|
4113
|
|
|
|
|
|
|
|
4114
|
|
|
|
|
|
|
$filePath -> Word2Vec trained word vectors binary file path. (String)
|
4115
|
|
|
|
|
|
|
$wordDataA -> Word2Vec word data (String)
|
4116
|
|
|
|
|
|
|
$wordDataB -> Word2Vec word data (String)
|
4117
|
|
|
|
|
|
|
|
4118
|
|
|
|
|
|
|
Output:
|
4119
|
|
|
|
|
|
|
|
4120
|
|
|
|
|
|
|
$vectorData -> Difference of '$wordDataA' and '$wordDataB' vectors
|
4121
|
|
|
|
|
|
|
|
4122
|
|
|
|
|
|
|
Example:
|
4123
|
|
|
|
|
|
|
|
4124
|
|
|
|
|
|
|
use Word2vec::Interface;
|
4125
|
|
|
|
|
|
|
|
4126
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
4127
|
|
|
|
|
|
|
my $wordVtr = $interface->CLSubtractTwoWordVectors( "../../samples/samplevectors.bin", "of", "the" );
|
4128
|
|
|
|
|
|
|
|
4129
|
|
|
|
|
|
|
print( "Word Vector for \"of\" - \"the\": $wordVtr\n" ) if defined( $wordVtr );
|
4130
|
|
|
|
|
|
|
print( "Word Vector Cannot Be Computed\n" ) if !defined( $wordVtr );
|
4131
|
|
|
|
|
|
|
|
4132
|
|
|
|
|
|
|
undef( $instance );
|
4133
|
|
|
|
|
|
|
|
4134
|
|
|
|
|
|
|
=head3 CLStartWord2VecTraining
|
4135
|
|
|
|
|
|
|
|
4136
|
|
|
|
|
|
|
Description:
|
4137
|
|
|
|
|
|
|
|
4138
|
|
|
|
|
|
|
Command-line Method: Executes word2vec training given the specified options hash.
|
4139
|
|
|
|
|
|
|
|
4140
|
|
|
|
|
|
|
Input:
|
4141
|
|
|
|
|
|
|
|
4142
|
|
|
|
|
|
|
$hashRef -> Hash reference of word2vec options
|
4143
|
|
|
|
|
|
|
|
4144
|
|
|
|
|
|
|
Output:
|
4145
|
|
|
|
|
|
|
|
4146
|
|
|
|
|
|
|
$value -> Returns '0' = Successful / '-1' = Un-successful.
|
4147
|
|
|
|
|
|
|
|
4148
|
|
|
|
|
|
|
Example:
|
4149
|
|
|
|
|
|
|
|
4150
|
|
|
|
|
|
|
use Word2vec::Interface;
|
4151
|
|
|
|
|
|
|
|
4152
|
|
|
|
|
|
|
my %options;
|
4153
|
|
|
|
|
|
|
$options{'-trainfile'} = "../../samples/textcorpus.txt";
|
4154
|
|
|
|
|
|
|
$options{'-outputfile'} = "../../samples/tempvectors.bin";
|
4155
|
|
|
|
|
|
|
|
4156
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
4157
|
|
|
|
|
|
|
my $result = $interface->CLStartWord2VecTraining( \%options );
|
4158
|
|
|
|
|
|
|
|
4159
|
|
|
|
|
|
|
print( "Success!\n" ) if $result == 0;
|
4160
|
|
|
|
|
|
|
print( "Failed!\n" ) if $result == -1;
|
4161
|
|
|
|
|
|
|
|
4162
|
|
|
|
|
|
|
undef( $interface );
|
4163
|
|
|
|
|
|
|
|
4164
|
|
|
|
|
|
|
=head3 CLStartWord2PhraseTraining
|
4165
|
|
|
|
|
|
|
|
4166
|
|
|
|
|
|
|
Description:
|
4167
|
|
|
|
|
|
|
|
4168
|
|
|
|
|
|
|
Command-line Method: Executes word2phrase training given the specified options hash.
|
4169
|
|
|
|
|
|
|
|
4170
|
|
|
|
|
|
|
Input:
|
4171
|
|
|
|
|
|
|
|
4172
|
|
|
|
|
|
|
$hashRef -> Hash reference of word2vec options.
|
4173
|
|
|
|
|
|
|
|
4174
|
|
|
|
|
|
|
Output:
|
4175
|
|
|
|
|
|
|
|
4176
|
|
|
|
|
|
|
$value -> Returns '0' = Successful / '-1' = Un-successful.
|
4177
|
|
|
|
|
|
|
|
4178
|
|
|
|
|
|
|
Example:
|
4179
|
|
|
|
|
|
|
|
4180
|
|
|
|
|
|
|
use Word2vec::Interface;
|
4181
|
|
|
|
|
|
|
|
4182
|
|
|
|
|
|
|
my %options;
|
4183
|
|
|
|
|
|
|
$options{'-trainfile'} = "../../samples/textcorpus.txt";
|
4184
|
|
|
|
|
|
|
$options{'-outputfile'} = "../../samples/tempvectors.bin";
|
4185
|
|
|
|
|
|
|
|
4186
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
4187
|
|
|
|
|
|
|
my $result = $interface->CLStartWord2PhraseTraining( \%options );
|
4188
|
|
|
|
|
|
|
|
4189
|
|
|
|
|
|
|
print( "Success!\n" ) if $result == 0;
|
4190
|
|
|
|
|
|
|
print( "Failed!\n" ) if $result == -1;
|
4191
|
|
|
|
|
|
|
|
4192
|
|
|
|
|
|
|
undef( $interface );
|
4193
|
|
|
|
|
|
|
|
4194
|
|
|
|
|
|
|
=head3 CLCompileTextCorpus
|
4195
|
|
|
|
|
|
|
|
4196
|
|
|
|
|
|
|
Description:
|
4197
|
|
|
|
|
|
|
|
4198
|
|
|
|
|
|
|
Command-line Method: Compiles a text corpus given the specified options hash.
|
4199
|
|
|
|
|
|
|
|
4200
|
|
|
|
|
|
|
Input:
|
4201
|
|
|
|
|
|
|
|
4202
|
|
|
|
|
|
|
$hashRef -> Hash reference of xmltow2v options.
|
4203
|
|
|
|
|
|
|
|
4204
|
|
|
|
|
|
|
Output:
|
4205
|
|
|
|
|
|
|
|
4206
|
|
|
|
|
|
|
$value -> Returns '0' = Successful / '-1' = Un-successful.
|
4207
|
|
|
|
|
|
|
|
4208
|
|
|
|
|
|
|
Example:
|
4209
|
|
|
|
|
|
|
|
4210
|
|
|
|
|
|
|
use Word2vec::Interface;
|
4211
|
|
|
|
|
|
|
|
4212
|
|
|
|
|
|
|
my %options;
|
4213
|
|
|
|
|
|
|
$options{'-workdir'} = "../../samples";
|
4214
|
|
|
|
|
|
|
$options{'-savedir'} = "../../samples/textcorpus.txt";
|
4215
|
|
|
|
|
|
|
|
4216
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
4217
|
|
|
|
|
|
|
my $result = $interface->CLCompileTextCorpus( \%options );
|
4218
|
|
|
|
|
|
|
|
4219
|
|
|
|
|
|
|
print( "Success!\n" ) if $result == 0;
|
4220
|
|
|
|
|
|
|
print( "Failed!\n" ) if $result == -1;
|
4221
|
|
|
|
|
|
|
|
4222
|
|
|
|
|
|
|
undef( $interface );
|
4223
|
|
|
|
|
|
|
|
4224
|
|
|
|
|
|
|
=head3 CLConvertWord2VecVectorFileToText
|
4225
|
|
|
|
|
|
|
|
4226
|
|
|
|
|
|
|
Description:
|
4227
|
|
|
|
|
|
|
|
4228
|
|
|
|
|
|
|
Command-line Method: Converts conversion of word2vec binary format to plain text word vector data.
|
4229
|
|
|
|
|
|
|
|
4230
|
|
|
|
|
|
|
Input:
|
4231
|
|
|
|
|
|
|
|
4232
|
|
|
|
|
|
|
$filePath -> Word2Vec binary file path
|
4233
|
|
|
|
|
|
|
$savePath -> Path to save converted file
|
4234
|
|
|
|
|
|
|
|
4235
|
|
|
|
|
|
|
Output:
|
4236
|
|
|
|
|
|
|
|
4237
|
|
|
|
|
|
|
$value -> '0' = Successful / '-1' = Un-successful
|
4238
|
|
|
|
|
|
|
|
4239
|
|
|
|
|
|
|
Example:
|
4240
|
|
|
|
|
|
|
|
4241
|
|
|
|
|
|
|
use Word2vec::Interface;
|
4242
|
|
|
|
|
|
|
|
4243
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
4244
|
|
|
|
|
|
|
my $result = $interface->CLConvertWord2VecVectorFileToText( "../../samples/samplevectors.bin", "../../samples/convertedvectors.bin" );
|
4245
|
|
|
|
|
|
|
|
4246
|
|
|
|
|
|
|
print( "Success!\n" ) if $result == 0;
|
4247
|
|
|
|
|
|
|
print( "Failed!\n" ) if $result == -1;
|
4248
|
|
|
|
|
|
|
|
4249
|
|
|
|
|
|
|
undef( $interface );
|
4250
|
|
|
|
|
|
|
|
4251
|
|
|
|
|
|
|
=head3 CLConvertWord2VecVectorFileToBinary
|
4252
|
|
|
|
|
|
|
|
4253
|
|
|
|
|
|
|
Description:
|
4254
|
|
|
|
|
|
|
|
4255
|
|
|
|
|
|
|
Command-line Method: Converts conversion of plain text word vector data to word2vec binary format.
|
4256
|
|
|
|
|
|
|
|
4257
|
|
|
|
|
|
|
Input:
|
4258
|
|
|
|
|
|
|
|
4259
|
|
|
|
|
|
|
$filePath -> Word2Vec binary file path
|
4260
|
|
|
|
|
|
|
$savePath -> Path to save converted file
|
4261
|
|
|
|
|
|
|
|
4262
|
|
|
|
|
|
|
Output:
|
4263
|
|
|
|
|
|
|
|
4264
|
|
|
|
|
|
|
$value -> '0' = Successful / '-1' = Un-successful
|
4265
|
|
|
|
|
|
|
|
4266
|
|
|
|
|
|
|
Example:
|
4267
|
|
|
|
|
|
|
|
4268
|
|
|
|
|
|
|
use Word2vec::Interface;
|
4269
|
|
|
|
|
|
|
|
4270
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
4271
|
|
|
|
|
|
|
my $result = $interface->CLConvertWord2VecVectorFileToBinary( "../../samples/samplevectors.bin", "../../samples/convertedvectors.bin" );
|
4272
|
|
|
|
|
|
|
|
4273
|
|
|
|
|
|
|
print( "Success!\n" ) if $result == 0;
|
4274
|
|
|
|
|
|
|
print( "Failed!\n" ) if $result == -1;
|
4275
|
|
|
|
|
|
|
|
4276
|
|
|
|
|
|
|
undef( $interface );
|
4277
|
|
|
|
|
|
|
|
4278
|
|
|
|
|
|
|
=head3 CLConvertWord2VecVectorFileToSparse
|
4279
|
|
|
|
|
|
|
|
4280
|
|
|
|
|
|
|
Description:
|
4281
|
|
|
|
|
|
|
|
4282
|
|
|
|
|
|
|
Command-line Method: Converts conversion of plain text word vector data to sparse vector data format.
|
4283
|
|
|
|
|
|
|
|
4284
|
|
|
|
|
|
|
Input:
|
4285
|
|
|
|
|
|
|
|
4286
|
|
|
|
|
|
|
$filePath -> Vectors file path
|
4287
|
|
|
|
|
|
|
$savePath -> Path to save converted file
|
4288
|
|
|
|
|
|
|
|
4289
|
|
|
|
|
|
|
Output:
|
4290
|
|
|
|
|
|
|
|
4291
|
|
|
|
|
|
|
$value -> '0' = Successful / '-1' = Un-successful
|
4292
|
|
|
|
|
|
|
|
4293
|
|
|
|
|
|
|
Example:
|
4294
|
|
|
|
|
|
|
|
4295
|
|
|
|
|
|
|
use Word2vec::Interface;
|
4296
|
|
|
|
|
|
|
|
4297
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
4298
|
|
|
|
|
|
|
my $result = $interface->CLConvertWord2VecVectorFileToSparse( "../../samples/samplevectors.bin", "../../samples/convertedvectors.bin" );
|
4299
|
|
|
|
|
|
|
|
4300
|
|
|
|
|
|
|
print( "Success!\n" ) if $result == 0;
|
4301
|
|
|
|
|
|
|
print( "Failed!\n" ) if $result == -1;
|
4302
|
|
|
|
|
|
|
|
4303
|
|
|
|
|
|
|
undef( $interface );
|
4304
|
|
|
|
|
|
|
|
4305
|
|
|
|
|
|
|
=head3 CLCompoundifyTextInFile
|
4306
|
|
|
|
|
|
|
|
4307
|
|
|
|
|
|
|
Description:
|
4308
|
|
|
|
|
|
|
|
4309
|
|
|
|
|
|
|
Command-line Method: Reads a specified plain text file at 'filePath' and 'compoundWordFile', then compoundifies and saves the file to 'savePath'.
|
4310
|
|
|
|
|
|
|
|
4311
|
|
|
|
|
|
|
Input:
|
4312
|
|
|
|
|
|
|
|
4313
|
|
|
|
|
|
|
$filePath -> Text file to compoundify
|
4314
|
|
|
|
|
|
|
$savePath -> Path to save compoundified file
|
4315
|
|
|
|
|
|
|
$compoundWordFile -> Compound word file path
|
4316
|
|
|
|
|
|
|
|
4317
|
|
|
|
|
|
|
Output:
|
4318
|
|
|
|
|
|
|
|
4319
|
|
|
|
|
|
|
$value -> Result '0' = Successful / '-1' = Un-successful
|
4320
|
|
|
|
|
|
|
|
4321
|
|
|
|
|
|
|
Example:
|
4322
|
|
|
|
|
|
|
|
4323
|
|
|
|
|
|
|
use Word2vec::Interface;
|
4324
|
|
|
|
|
|
|
|
4325
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
4326
|
|
|
|
|
|
|
my $result = $interface->CLCompoundifyTextInFile( "../../samples/textcorpus.txt", "../../samples/compoundcorpus.txt", "../../samples/compoundword.txt" );
|
4327
|
|
|
|
|
|
|
|
4328
|
|
|
|
|
|
|
print( "Success!\n" ) if $result == 0;
|
4329
|
|
|
|
|
|
|
print( "Failed!\n" ) if $result == -1;
|
4330
|
|
|
|
|
|
|
|
4331
|
|
|
|
|
|
|
undef( $interface );
|
4332
|
|
|
|
|
|
|
|
4333
|
|
|
|
|
|
|
=head3 CLSortVectorFile
|
4334
|
|
|
|
|
|
|
|
4335
|
|
|
|
|
|
|
Description:
|
4336
|
|
|
|
|
|
|
|
4337
|
|
|
|
|
|
|
Reads a specifed vector file in memory, sorts alphanumerically and saves to a file.
|
4338
|
|
|
|
|
|
|
|
4339
|
|
|
|
|
|
|
Input:
|
4340
|
|
|
|
|
|
|
|
4341
|
|
|
|
|
|
|
$hashRef -> Hash reference of parameters. (File path and overwrite parameters)
|
4342
|
|
|
|
|
|
|
|
4343
|
|
|
|
|
|
|
Output:
|
4344
|
|
|
|
|
|
|
|
4345
|
|
|
|
|
|
|
$value -> Result '0' = Successful / '-1' = Un-successful
|
4346
|
|
|
|
|
|
|
|
4347
|
|
|
|
|
|
|
Example:
|
4348
|
|
|
|
|
|
|
|
4349
|
|
|
|
|
|
|
use Word2vec::Interface;
|
4350
|
|
|
|
|
|
|
|
4351
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
4352
|
|
|
|
|
|
|
|
4353
|
|
|
|
|
|
|
my %options;
|
4354
|
|
|
|
|
|
|
%options{ "-filepath" } = "vectors.bin";
|
4355
|
|
|
|
|
|
|
%options{ "-overwrite" } = 1;
|
4356
|
|
|
|
|
|
|
|
4357
|
|
|
|
|
|
|
my $result = $interface->CLSortVectorFile();
|
4358
|
|
|
|
|
|
|
|
4359
|
|
|
|
|
|
|
print( "Success!\n" ) if $result == 0;
|
4360
|
|
|
|
|
|
|
print( "Failed!\n" ) if $result == -1;
|
4361
|
|
|
|
|
|
|
|
4362
|
|
|
|
|
|
|
undef( $interface );
|
4363
|
|
|
|
|
|
|
|
4364
|
|
|
|
|
|
|
=head3 CleanWord2VecDirectory
|
4365
|
|
|
|
|
|
|
|
4366
|
|
|
|
|
|
|
Description:
|
4367
|
|
|
|
|
|
|
|
4368
|
|
|
|
|
|
|
Cleans up C object and executable files in word2vec directory.
|
4369
|
|
|
|
|
|
|
|
4370
|
|
|
|
|
|
|
Input:
|
4371
|
|
|
|
|
|
|
|
4372
|
|
|
|
|
|
|
None
|
4373
|
|
|
|
|
|
|
|
4374
|
|
|
|
|
|
|
Output:
|
4375
|
|
|
|
|
|
|
|
4376
|
|
|
|
|
|
|
$value -> Result '0' = Successful / '-1' = Un-successful
|
4377
|
|
|
|
|
|
|
|
4378
|
|
|
|
|
|
|
Example:
|
4379
|
|
|
|
|
|
|
|
4380
|
|
|
|
|
|
|
use Word2vec::Interface;
|
4381
|
|
|
|
|
|
|
|
4382
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
4383
|
|
|
|
|
|
|
my $result = $interface->CleanWord2VecDirectory();
|
4384
|
|
|
|
|
|
|
|
4385
|
|
|
|
|
|
|
print( "Success!\n" ) if $result == 0;
|
4386
|
|
|
|
|
|
|
print( "Failed!\n" ) if $result == -1;
|
4387
|
|
|
|
|
|
|
|
4388
|
|
|
|
|
|
|
undef( $interface );
|
4389
|
|
|
|
|
|
|
|
4390
|
|
|
|
|
|
|
=head3 CLSimilarityAvg
|
4391
|
|
|
|
|
|
|
|
4392
|
|
|
|
|
|
|
Description:
|
4393
|
|
|
|
|
|
|
|
4394
|
|
|
|
|
|
|
Computes cosine similarity of average values for a list of specified word comparisons given a file.
|
4395
|
|
|
|
|
|
|
|
4396
|
|
|
|
|
|
|
Note: Trained vector data must be loaded in memory previously before calling this method.
|
4397
|
|
|
|
|
|
|
|
4398
|
|
|
|
|
|
|
Input:
|
4399
|
|
|
|
|
|
|
|
4400
|
|
|
|
|
|
|
$filePath -> Text file with list of word comparisons by line.
|
4401
|
|
|
|
|
|
|
|
4402
|
|
|
|
|
|
|
Output:
|
4403
|
|
|
|
|
|
|
|
4404
|
|
|
|
|
|
|
$value -> Result '0' = Successful / '-1' = Un-successful
|
4405
|
|
|
|
|
|
|
|
4406
|
|
|
|
|
|
|
Example:
|
4407
|
|
|
|
|
|
|
|
4408
|
|
|
|
|
|
|
use Word2vec::Interface;
|
4409
|
|
|
|
|
|
|
|
4410
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
4411
|
|
|
|
|
|
|
my $result = $interface->W2VReadTrainedVectorDataFromFile( "vectors.bin" );
|
4412
|
|
|
|
|
|
|
$result = $interface->CLSimilarityAvg( "MiniMayoSRS.terms" ) if $result == 0;
|
4413
|
|
|
|
|
|
|
|
4414
|
|
|
|
|
|
|
print( "Success!\n" ) if $result == 0;
|
4415
|
|
|
|
|
|
|
print( "Failed!\n" ) if $result == -1;
|
4416
|
|
|
|
|
|
|
|
4417
|
|
|
|
|
|
|
undef( $interface );
|
4418
|
|
|
|
|
|
|
|
4419
|
|
|
|
|
|
|
=head3 CLSimilarityComp
|
4420
|
|
|
|
|
|
|
|
4421
|
|
|
|
|
|
|
Description:
|
4422
|
|
|
|
|
|
|
|
4423
|
|
|
|
|
|
|
Computes cosine similarity values for a list of specified compound word comparisons given a file.
|
4424
|
|
|
|
|
|
|
|
4425
|
|
|
|
|
|
|
Note: Trained vector data must be loaded in memory previously before calling this method.
|
4426
|
|
|
|
|
|
|
|
4427
|
|
|
|
|
|
|
Input:
|
4428
|
|
|
|
|
|
|
|
4429
|
|
|
|
|
|
|
$filePath -> Text file with list of word comparisons by line.
|
4430
|
|
|
|
|
|
|
|
4431
|
|
|
|
|
|
|
Output:
|
4432
|
|
|
|
|
|
|
|
4433
|
|
|
|
|
|
|
$value -> Result '0' = Successful / '-1' = Un-successful
|
4434
|
|
|
|
|
|
|
|
4435
|
|
|
|
|
|
|
Example:
|
4436
|
|
|
|
|
|
|
|
4437
|
|
|
|
|
|
|
use Word2vec::Interface;
|
4438
|
|
|
|
|
|
|
|
4439
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
4440
|
|
|
|
|
|
|
my $result = $interface->W2VReadTrainedVectorDataFromFile( "vectors.bin" );
|
4441
|
|
|
|
|
|
|
$result = $interface->CLSimilarityComp( "MiniMayoSRS.terms" ) if $result == 0;
|
4442
|
|
|
|
|
|
|
|
4443
|
|
|
|
|
|
|
print( "Success!\n" ) if $result == 0;
|
4444
|
|
|
|
|
|
|
print( "Failed!\n" ) if $result == -1;
|
4445
|
|
|
|
|
|
|
|
4446
|
|
|
|
|
|
|
undef( $interface );
|
4447
|
|
|
|
|
|
|
|
4448
|
|
|
|
|
|
|
=head3 CLSimilaritySum
|
4449
|
|
|
|
|
|
|
|
4450
|
|
|
|
|
|
|
Description:
|
4451
|
|
|
|
|
|
|
|
4452
|
|
|
|
|
|
|
Computes cosine similarity of summed values for a list of specified word comparisons given a file.
|
4453
|
|
|
|
|
|
|
|
4454
|
|
|
|
|
|
|
Note: Trained vector data must be loaded in memory previously before calling this method.
|
4455
|
|
|
|
|
|
|
|
4456
|
|
|
|
|
|
|
Input:
|
4457
|
|
|
|
|
|
|
|
4458
|
|
|
|
|
|
|
$filePath -> Text file with list of word comparisons by line.
|
4459
|
|
|
|
|
|
|
|
4460
|
|
|
|
|
|
|
Output:
|
4461
|
|
|
|
|
|
|
|
4462
|
|
|
|
|
|
|
$value -> Result '0' = Successful / '-1' = Un-successful
|
4463
|
|
|
|
|
|
|
|
4464
|
|
|
|
|
|
|
Example:
|
4465
|
|
|
|
|
|
|
|
4466
|
|
|
|
|
|
|
use Word2vec::Interface;
|
4467
|
|
|
|
|
|
|
|
4468
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
4469
|
|
|
|
|
|
|
my $result = $interface->W2VReadTrainedVectorDataFromFile( "vectors.bin" );
|
4470
|
|
|
|
|
|
|
$result = $interface->CLSimilaritySum( "MiniMayoSRS.terms" ) if $result == 0;
|
4471
|
|
|
|
|
|
|
|
4472
|
|
|
|
|
|
|
print( "Success!\n" ) if $result == 0;
|
4473
|
|
|
|
|
|
|
print( "Failed!\n" ) if $result == -1;
|
4474
|
|
|
|
|
|
|
|
4475
|
|
|
|
|
|
|
undef( $interface );
|
4476
|
|
|
|
|
|
|
|
4477
|
|
|
|
|
|
|
=head3 CLWordSenseDisambiguation
|
4478
|
|
|
|
|
|
|
|
4479
|
|
|
|
|
|
|
Description:
|
4480
|
|
|
|
|
|
|
|
4481
|
|
|
|
|
|
|
Command-line Method: Assigns a particular sense to each instance using word2vec trained word vector data.
|
4482
|
|
|
|
|
|
|
Stop words are removed if a stoplist is specified before computing cosine similarity average of each instance
|
4483
|
|
|
|
|
|
|
and sense context.
|
4484
|
|
|
|
|
|
|
|
4485
|
|
|
|
|
|
|
Input:
|
4486
|
|
|
|
|
|
|
|
4487
|
|
|
|
|
|
|
$instanceFilePath -> WSD instance file path
|
4488
|
|
|
|
|
|
|
$senseFilePath -> WSD sense file path
|
4489
|
|
|
|
|
|
|
$stopListfilePath -> Stop list file path
|
4490
|
|
|
|
|
|
|
|
4491
|
|
|
|
|
|
|
Output:
|
4492
|
|
|
|
|
|
|
|
4493
|
|
|
|
|
|
|
$value -> Returns '0' = Successful / '-1' = Un-successful
|
4494
|
|
|
|
|
|
|
|
4495
|
|
|
|
|
|
|
Example:
|
4496
|
|
|
|
|
|
|
|
4497
|
|
|
|
|
|
|
use Word2vec::Interface;
|
4498
|
|
|
|
|
|
|
|
4499
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
4500
|
|
|
|
|
|
|
my $result = $interface->CLWordSenseDisambiguation( "ACE.instances.sval", "ACE.senses.sval", "vectors.bin", "stoplist" );
|
4501
|
|
|
|
|
|
|
|
4502
|
|
|
|
|
|
|
print( "Success!\n" ) if $result == 0;
|
4503
|
|
|
|
|
|
|
print( "Failed!\n" ) if $result == -1;
|
4504
|
|
|
|
|
|
|
|
4505
|
|
|
|
|
|
|
undef( $interface );
|
4506
|
|
|
|
|
|
|
|
4507
|
|
|
|
|
|
|
=head3 WSDAnalyzeSenseData
|
4508
|
|
|
|
|
|
|
|
4509
|
|
|
|
|
|
|
Description:
|
4510
|
|
|
|
|
|
|
|
4511
|
|
|
|
|
|
|
Analyzes sense sval files for identification number mismatch and adjusts accordingly in memory.
|
4512
|
|
|
|
|
|
|
|
4513
|
|
|
|
|
|
|
Input:
|
4514
|
|
|
|
|
|
|
|
4515
|
|
|
|
|
|
|
None
|
4516
|
|
|
|
|
|
|
|
4517
|
|
|
|
|
|
|
Output:
|
4518
|
|
|
|
|
|
|
|
4519
|
|
|
|
|
|
|
None
|
4520
|
|
|
|
|
|
|
|
4521
|
|
|
|
|
|
|
Example:
|
4522
|
|
|
|
|
|
|
|
4523
|
|
|
|
|
|
|
This is a private function and should not be utilized.
|
4524
|
|
|
|
|
|
|
|
4525
|
|
|
|
|
|
|
=head3 WSDReadList
|
4526
|
|
|
|
|
|
|
|
4527
|
|
|
|
|
|
|
Description:
|
4528
|
|
|
|
|
|
|
|
4529
|
|
|
|
|
|
|
Reads a WSD list when the '-list' parameter is specified.
|
4530
|
|
|
|
|
|
|
|
4531
|
|
|
|
|
|
|
Input:
|
4532
|
|
|
|
|
|
|
|
4533
|
|
|
|
|
|
|
$listPath -> WSD list file path
|
4534
|
|
|
|
|
|
|
|
4535
|
|
|
|
|
|
|
Output:
|
4536
|
|
|
|
|
|
|
|
4537
|
|
|
|
|
|
|
\%listOfFile -> List of files hash reference
|
4538
|
|
|
|
|
|
|
|
4539
|
|
|
|
|
|
|
Example:
|
4540
|
|
|
|
|
|
|
|
4541
|
|
|
|
|
|
|
This is a private function and should not be utilized.
|
4542
|
|
|
|
|
|
|
|
4543
|
|
|
|
|
|
|
=head3 WSDParseList
|
4544
|
|
|
|
|
|
|
|
4545
|
|
|
|
|
|
|
Description:
|
4546
|
|
|
|
|
|
|
|
4547
|
|
|
|
|
|
|
Parses the specified list of files for Word Sense Disambiguation computation.
|
4548
|
|
|
|
|
|
|
|
4549
|
|
|
|
|
|
|
Input:
|
4550
|
|
|
|
|
|
|
|
4551
|
|
|
|
|
|
|
$listOfFilesHashRef -> Hash reference to a hash of file paths
|
4552
|
|
|
|
|
|
|
$vectorBinaryFile -> Word2vec trained word vector data file
|
4553
|
|
|
|
|
|
|
$stopListFilePath -> Stop list file path
|
4554
|
|
|
|
|
|
|
|
4555
|
|
|
|
|
|
|
Output:
|
4556
|
|
|
|
|
|
|
|
4557
|
|
|
|
|
|
|
$value -> '0' = Successful / '-1' = Un-successful
|
4558
|
|
|
|
|
|
|
|
4559
|
|
|
|
|
|
|
Example:
|
4560
|
|
|
|
|
|
|
|
4561
|
|
|
|
|
|
|
This is a private function and should not be utilized.
|
4562
|
|
|
|
|
|
|
|
4563
|
|
|
|
|
|
|
=head3 WSDParseFile
|
4564
|
|
|
|
|
|
|
|
4565
|
|
|
|
|
|
|
Description:
|
4566
|
|
|
|
|
|
|
|
4567
|
|
|
|
|
|
|
Parses a specified file in SVL format and stores all context in memory. Utilized for
|
4568
|
|
|
|
|
|
|
Word Sense Disambiguation cosine similarity computation.
|
4569
|
|
|
|
|
|
|
|
4570
|
|
|
|
|
|
|
Input:
|
4571
|
|
|
|
|
|
|
|
4572
|
|
|
|
|
|
|
$filePath -> WSD instance or sense file path
|
4573
|
|
|
|
|
|
|
$stopListRegex -> Stop list regex ( Automatically generated with stop list file )
|
4574
|
|
|
|
|
|
|
|
4575
|
|
|
|
|
|
|
Output:
|
4576
|
|
|
|
|
|
|
|
4577
|
|
|
|
|
|
|
$arrayReference -> Array reference of WSD instances or WSD senses in memory.
|
4578
|
|
|
|
|
|
|
|
4579
|
|
|
|
|
|
|
Example:
|
4580
|
|
|
|
|
|
|
|
4581
|
|
|
|
|
|
|
This is a private function and should not be utilized.
|
4582
|
|
|
|
|
|
|
|
4583
|
|
|
|
|
|
|
=head3 WSDCalculateCosineAvgSimiliarity
|
4584
|
|
|
|
|
|
|
|
4585
|
|
|
|
|
|
|
Description:
|
4586
|
|
|
|
|
|
|
|
4587
|
|
|
|
|
|
|
For each instance stored in memory, this method computes an average cosine similarity for the context
|
4588
|
|
|
|
|
|
|
of each instance and sense with stop words removed via stop list regex. After average cosine similarity
|
4589
|
|
|
|
|
|
|
values are calculated for each instance and sense, the cosine similarity of each instance and sense is
|
4590
|
|
|
|
|
|
|
computed. The highest cosine similarity value of a given instance to a particular sense is assigned and
|
4591
|
|
|
|
|
|
|
stored.
|
4592
|
|
|
|
|
|
|
|
4593
|
|
|
|
|
|
|
Input:
|
4594
|
|
|
|
|
|
|
|
4595
|
|
|
|
|
|
|
None
|
4596
|
|
|
|
|
|
|
|
4597
|
|
|
|
|
|
|
Output:
|
4598
|
|
|
|
|
|
|
|
4599
|
|
|
|
|
|
|
$value -> Returns '0' = Successful / '-1' = Un-successful
|
4600
|
|
|
|
|
|
|
|
4601
|
|
|
|
|
|
|
Example:
|
4602
|
|
|
|
|
|
|
|
4603
|
|
|
|
|
|
|
This is a private function and should not be utilized.
|
4604
|
|
|
|
|
|
|
|
4605
|
|
|
|
|
|
|
=head3 WSDCalculateAccuracy
|
4606
|
|
|
|
|
|
|
|
4607
|
|
|
|
|
|
|
Description:
|
4608
|
|
|
|
|
|
|
|
4609
|
|
|
|
|
|
|
Computes accuracy of assigned sense identification for each instance in memory.
|
4610
|
|
|
|
|
|
|
|
4611
|
|
|
|
|
|
|
Input:
|
4612
|
|
|
|
|
|
|
|
4613
|
|
|
|
|
|
|
None
|
4614
|
|
|
|
|
|
|
|
4615
|
|
|
|
|
|
|
Output:
|
4616
|
|
|
|
|
|
|
|
4617
|
|
|
|
|
|
|
$value -> Returns accuracy percentage (float) or '-1' if un-successful.
|
4618
|
|
|
|
|
|
|
|
4619
|
|
|
|
|
|
|
Example:
|
4620
|
|
|
|
|
|
|
|
4621
|
|
|
|
|
|
|
This is a private function and should not be utilized.
|
4622
|
|
|
|
|
|
|
|
4623
|
|
|
|
|
|
|
=head3 WSDPrintResults
|
4624
|
|
|
|
|
|
|
|
4625
|
|
|
|
|
|
|
Description:
|
4626
|
|
|
|
|
|
|
|
4627
|
|
|
|
|
|
|
For each instance, this method prints standard information to the console window consisting of:
|
4628
|
|
|
|
|
|
|
|
4629
|
|
|
|
|
|
|
=over 4
|
4630
|
|
|
|
|
|
|
|
4631
|
|
|
|
|
|
|
=item I
|
4632
|
|
|
|
|
|
|
|
4633
|
|
|
|
|
|
|
=item I
|
4634
|
|
|
|
|
|
|
|
4635
|
|
|
|
|
|
|
=item I
|
4636
|
|
|
|
|
|
|
|
4637
|
|
|
|
|
|
|
=item I
|
4638
|
|
|
|
|
|
|
|
4639
|
|
|
|
|
|
|
=back
|
4640
|
|
|
|
|
|
|
|
4641
|
|
|
|
|
|
|
Note: Only prints to console if '--debuglog' or 'writelog' option is passed.
|
4642
|
|
|
|
|
|
|
|
4643
|
|
|
|
|
|
|
Input:
|
4644
|
|
|
|
|
|
|
|
4645
|
|
|
|
|
|
|
None
|
4646
|
|
|
|
|
|
|
|
4647
|
|
|
|
|
|
|
Output:
|
4648
|
|
|
|
|
|
|
|
4649
|
|
|
|
|
|
|
None
|
4650
|
|
|
|
|
|
|
|
4651
|
|
|
|
|
|
|
Example:
|
4652
|
|
|
|
|
|
|
|
4653
|
|
|
|
|
|
|
This is a private function and should not be utilized.
|
4654
|
|
|
|
|
|
|
|
4655
|
|
|
|
|
|
|
=head3 WSDSaveResults
|
4656
|
|
|
|
|
|
|
|
4657
|
|
|
|
|
|
|
Description:
|
4658
|
|
|
|
|
|
|
|
4659
|
|
|
|
|
|
|
Saves WSD results post sense identification assignment in the 'instanceFilePath' (string) location. Saved data consists of:
|
4660
|
|
|
|
|
|
|
|
4661
|
|
|
|
|
|
|
=over 4
|
4662
|
|
|
|
|
|
|
|
4663
|
|
|
|
|
|
|
=item I
|
4664
|
|
|
|
|
|
|
|
4665
|
|
|
|
|
|
|
=item I
|
4666
|
|
|
|
|
|
|
|
4667
|
|
|
|
|
|
|
=item I
|
4668
|
|
|
|
|
|
|
|
4669
|
|
|
|
|
|
|
=item I
|
4670
|
|
|
|
|
|
|
|
4671
|
|
|
|
|
|
|
=back
|
4672
|
|
|
|
|
|
|
|
4673
|
|
|
|
|
|
|
Input:
|
4674
|
|
|
|
|
|
|
|
4675
|
|
|
|
|
|
|
$instanceFilePath -> WSD instance file path
|
4676
|
|
|
|
|
|
|
|
4677
|
|
|
|
|
|
|
Output:
|
4678
|
|
|
|
|
|
|
|
4679
|
|
|
|
|
|
|
None
|
4680
|
|
|
|
|
|
|
|
4681
|
|
|
|
|
|
|
Example:
|
4682
|
|
|
|
|
|
|
|
4683
|
|
|
|
|
|
|
This is a private function and should not be utilized.
|
4684
|
|
|
|
|
|
|
|
4685
|
|
|
|
|
|
|
=head3 WSDGenerateAccuracyReport
|
4686
|
|
|
|
|
|
|
|
4687
|
|
|
|
|
|
|
Description:
|
4688
|
|
|
|
|
|
|
|
4689
|
|
|
|
|
|
|
Fetches saved results for all instance files and stores accuracies for each in a text file.
|
4690
|
|
|
|
|
|
|
|
4691
|
|
|
|
|
|
|
Input:
|
4692
|
|
|
|
|
|
|
|
4693
|
|
|
|
|
|
|
$workingDirectory -> Directory of "*.results.txt" files
|
4694
|
|
|
|
|
|
|
|
4695
|
|
|
|
|
|
|
Output:
|
4696
|
|
|
|
|
|
|
|
4697
|
|
|
|
|
|
|
None
|
4698
|
|
|
|
|
|
|
|
4699
|
|
|
|
|
|
|
Example:
|
4700
|
|
|
|
|
|
|
|
4701
|
|
|
|
|
|
|
This is a private function and should not be utilized.
|
4702
|
|
|
|
|
|
|
|
4703
|
|
|
|
|
|
|
=head3 _WSDStop
|
4704
|
|
|
|
|
|
|
|
4705
|
|
|
|
|
|
|
Description:
|
4706
|
|
|
|
|
|
|
|
4707
|
|
|
|
|
|
|
Generates and returns a stop list regex given a 'stopListFilePath' (string). Returns undefined in the event of an error.
|
4708
|
|
|
|
|
|
|
|
4709
|
|
|
|
|
|
|
Input:
|
4710
|
|
|
|
|
|
|
|
4711
|
|
|
|
|
|
|
$stopListFilePath -> WSD Stop list file path
|
4712
|
|
|
|
|
|
|
|
4713
|
|
|
|
|
|
|
Output:
|
4714
|
|
|
|
|
|
|
|
4715
|
|
|
|
|
|
|
$stopListRegex -> Returns stop list regex of the WSD stop list file.
|
4716
|
|
|
|
|
|
|
|
4717
|
|
|
|
|
|
|
Example:
|
4718
|
|
|
|
|
|
|
|
4719
|
|
|
|
|
|
|
This is a private function and should not be utilized.
|
4720
|
|
|
|
|
|
|
|
4721
|
|
|
|
|
|
|
=head3 ConvertStringLineEndingsToTargetOS
|
4722
|
|
|
|
|
|
|
|
4723
|
|
|
|
|
|
|
Description:
|
4724
|
|
|
|
|
|
|
|
4725
|
|
|
|
|
|
|
Converts passed string parameter to current OS line ending format.
|
4726
|
|
|
|
|
|
|
|
4727
|
|
|
|
|
|
|
ie. DOS/Windows to Unix/Linux or Unix/Linux to DOS/Windows.
|
4728
|
|
|
|
|
|
|
|
4729
|
|
|
|
|
|
|
Warning: This is incompatible with the legacy MacOS format, errors may occur as it is not supported.
|
4730
|
|
|
|
|
|
|
|
4731
|
|
|
|
|
|
|
Input:
|
4732
|
|
|
|
|
|
|
|
4733
|
|
|
|
|
|
|
$string -> String to convert
|
4734
|
|
|
|
|
|
|
|
4735
|
|
|
|
|
|
|
Output:
|
4736
|
|
|
|
|
|
|
|
4737
|
|
|
|
|
|
|
$string -> Output data with target OS line endings.
|
4738
|
|
|
|
|
|
|
|
4739
|
|
|
|
|
|
|
Example:
|
4740
|
|
|
|
|
|
|
|
4741
|
|
|
|
|
|
|
use Word2vec::Interface;
|
4742
|
|
|
|
|
|
|
|
4743
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
4744
|
|
|
|
|
|
|
|
4745
|
|
|
|
|
|
|
my $tempStr = "samples text\r\n;
|
4746
|
|
|
|
|
|
|
$tempStr = $interface->ConvertStringLineEndingsToTargetOS( $tempStr );
|
4747
|
|
|
|
|
|
|
|
4748
|
|
|
|
|
|
|
undef( $interface );
|
4749
|
|
|
|
|
|
|
|
4750
|
|
|
|
|
|
|
=head2 Interface Accessor Functions
|
4751
|
|
|
|
|
|
|
|
4752
|
|
|
|
|
|
|
=head3 GetWord2VecDir
|
4753
|
|
|
|
|
|
|
|
4754
|
|
|
|
|
|
|
Description:
|
4755
|
|
|
|
|
|
|
|
4756
|
|
|
|
|
|
|
Returns word2vec executable/source directory.
|
4757
|
|
|
|
|
|
|
|
4758
|
|
|
|
|
|
|
Input:
|
4759
|
|
|
|
|
|
|
|
4760
|
|
|
|
|
|
|
None
|
4761
|
|
|
|
|
|
|
|
4762
|
|
|
|
|
|
|
Output:
|
4763
|
|
|
|
|
|
|
|
4764
|
|
|
|
|
|
|
$string -> Word2vec file path
|
4765
|
|
|
|
|
|
|
|
4766
|
|
|
|
|
|
|
Example:
|
4767
|
|
|
|
|
|
|
|
4768
|
|
|
|
|
|
|
use Word2vec::Interface;
|
4769
|
|
|
|
|
|
|
|
4770
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
4771
|
|
|
|
|
|
|
my $filePath = $interface->GetWord2VecDir();
|
4772
|
|
|
|
|
|
|
|
4773
|
|
|
|
|
|
|
print( "FilePath: $filePath\n" );
|
4774
|
|
|
|
|
|
|
|
4775
|
|
|
|
|
|
|
undef( $interface );
|
4776
|
|
|
|
|
|
|
|
4777
|
|
|
|
|
|
|
=head3 GetDebugLog
|
4778
|
|
|
|
|
|
|
|
4779
|
|
|
|
|
|
|
Description:
|
4780
|
|
|
|
|
|
|
|
4781
|
|
|
|
|
|
|
Returns the _debugLog member variable set during Word2vec::Word2phrase object initialization of new function.
|
4782
|
|
|
|
|
|
|
|
4783
|
|
|
|
|
|
|
Input:
|
4784
|
|
|
|
|
|
|
|
4785
|
|
|
|
|
|
|
None
|
4786
|
|
|
|
|
|
|
|
4787
|
|
|
|
|
|
|
Output:
|
4788
|
|
|
|
|
|
|
|
4789
|
|
|
|
|
|
|
$value -> 0 = False, 1 = True
|
4790
|
|
|
|
|
|
|
|
4791
|
|
|
|
|
|
|
Example:
|
4792
|
|
|
|
|
|
|
|
4793
|
|
|
|
|
|
|
use Word2vec::Interface;
|
4794
|
|
|
|
|
|
|
|
4795
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
4796
|
|
|
|
|
|
|
my $debugLog = $interface->GetDebugLog();
|
4797
|
|
|
|
|
|
|
|
4798
|
|
|
|
|
|
|
print( "Debug Logging Enabled\n" ) if $debugLog == 1;
|
4799
|
|
|
|
|
|
|
print( "Debug Logging Disabled\n" ) if $debugLog == 0;
|
4800
|
|
|
|
|
|
|
|
4801
|
|
|
|
|
|
|
undef( $interface );
|
4802
|
|
|
|
|
|
|
|
4803
|
|
|
|
|
|
|
=head3 GetWriteLog
|
4804
|
|
|
|
|
|
|
|
4805
|
|
|
|
|
|
|
Description:
|
4806
|
|
|
|
|
|
|
|
4807
|
|
|
|
|
|
|
Returns the _writeLog member variable set during Word2vec::Word2phrase object initialization of new function.
|
4808
|
|
|
|
|
|
|
|
4809
|
|
|
|
|
|
|
Input:
|
4810
|
|
|
|
|
|
|
|
4811
|
|
|
|
|
|
|
None
|
4812
|
|
|
|
|
|
|
|
4813
|
|
|
|
|
|
|
Output:
|
4814
|
|
|
|
|
|
|
|
4815
|
|
|
|
|
|
|
$value -> 0 = False, 1 = True
|
4816
|
|
|
|
|
|
|
|
4817
|
|
|
|
|
|
|
Example:
|
4818
|
|
|
|
|
|
|
|
4819
|
|
|
|
|
|
|
use Word2vec::Interface;
|
4820
|
|
|
|
|
|
|
|
4821
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
4822
|
|
|
|
|
|
|
my $writeLog = $interface->GetWriteLog();
|
4823
|
|
|
|
|
|
|
|
4824
|
|
|
|
|
|
|
print( "Write Logging Enabled\n" ) if $writeLog == 1;
|
4825
|
|
|
|
|
|
|
print( "Write Logging Disabled\n" ) if $writeLog == 0;
|
4826
|
|
|
|
|
|
|
|
4827
|
|
|
|
|
|
|
undef( $interface );
|
4828
|
|
|
|
|
|
|
|
4829
|
|
|
|
|
|
|
=head3 GetIgnoreCompileErrors
|
4830
|
|
|
|
|
|
|
|
4831
|
|
|
|
|
|
|
Description:
|
4832
|
|
|
|
|
|
|
|
4833
|
|
|
|
|
|
|
Returns the _ignoreCompileErrors member variable set during Word2vec::Word2phrase object initialization of new function.
|
4834
|
|
|
|
|
|
|
|
4835
|
|
|
|
|
|
|
Input:
|
4836
|
|
|
|
|
|
|
|
4837
|
|
|
|
|
|
|
None
|
4838
|
|
|
|
|
|
|
|
4839
|
|
|
|
|
|
|
Output:
|
4840
|
|
|
|
|
|
|
|
4841
|
|
|
|
|
|
|
$value -> 0 = False, 1 = True
|
4842
|
|
|
|
|
|
|
|
4843
|
|
|
|
|
|
|
Example:
|
4844
|
|
|
|
|
|
|
|
4845
|
|
|
|
|
|
|
use Word2vec::Interface;
|
4846
|
|
|
|
|
|
|
|
4847
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
4848
|
|
|
|
|
|
|
my $ignoreCompileErrors = $interface->GetIgnoreCompileErrors();
|
4849
|
|
|
|
|
|
|
|
4850
|
|
|
|
|
|
|
print( "Ignore Compile Errors Enabled\n" ) if $ignoreCompileErrors == 1;
|
4851
|
|
|
|
|
|
|
print( "Ignore Compile Errors Disabled\n" ) if $ignoreCompileErrors == 0;
|
4852
|
|
|
|
|
|
|
|
4853
|
|
|
|
|
|
|
undef( $interface );
|
4854
|
|
|
|
|
|
|
|
4855
|
|
|
|
|
|
|
=head3 GetIgnoreFileChecks
|
4856
|
|
|
|
|
|
|
|
4857
|
|
|
|
|
|
|
Description:
|
4858
|
|
|
|
|
|
|
|
4859
|
|
|
|
|
|
|
Returns the _ignoreFileChecks member variable set during Word2vec::Word2phrase object initialization of new function.
|
4860
|
|
|
|
|
|
|
|
4861
|
|
|
|
|
|
|
Input:
|
4862
|
|
|
|
|
|
|
|
4863
|
|
|
|
|
|
|
None
|
4864
|
|
|
|
|
|
|
|
4865
|
|
|
|
|
|
|
Output:
|
4866
|
|
|
|
|
|
|
|
4867
|
|
|
|
|
|
|
$value -> 0 = False, 1 = True
|
4868
|
|
|
|
|
|
|
|
4869
|
|
|
|
|
|
|
Example:
|
4870
|
|
|
|
|
|
|
|
4871
|
|
|
|
|
|
|
use Word2vec::Interface;
|
4872
|
|
|
|
|
|
|
|
4873
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
4874
|
|
|
|
|
|
|
my $ignoreFileChecks = $interface->GetIgnoreFileChecks();
|
4875
|
|
|
|
|
|
|
|
4876
|
|
|
|
|
|
|
print( "Ignore File Checks Enabled\n" ) if $ignoreFileChecks == 1;
|
4877
|
|
|
|
|
|
|
print( "Ignore File Checks Disabled\n" ) if $ignoreFileChecks == 0;
|
4878
|
|
|
|
|
|
|
|
4879
|
|
|
|
|
|
|
undef( $interface );
|
4880
|
|
|
|
|
|
|
|
4881
|
|
|
|
|
|
|
=head3 GetExitFlag
|
4882
|
|
|
|
|
|
|
|
4883
|
|
|
|
|
|
|
Description:
|
4884
|
|
|
|
|
|
|
|
4885
|
|
|
|
|
|
|
Returns the _exitFlag member variable set during Word2vec::Word2phrase object initialization of new function.
|
4886
|
|
|
|
|
|
|
|
4887
|
|
|
|
|
|
|
Input:
|
4888
|
|
|
|
|
|
|
|
4889
|
|
|
|
|
|
|
None
|
4890
|
|
|
|
|
|
|
|
4891
|
|
|
|
|
|
|
Output:
|
4892
|
|
|
|
|
|
|
|
4893
|
|
|
|
|
|
|
$value -> 0 = False, 1 = True
|
4894
|
|
|
|
|
|
|
|
4895
|
|
|
|
|
|
|
Example:
|
4896
|
|
|
|
|
|
|
|
4897
|
|
|
|
|
|
|
use Word2vec::Interface;
|
4898
|
|
|
|
|
|
|
|
4899
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
4900
|
|
|
|
|
|
|
my $exitFlag = $interface->GetExitFlag();
|
4901
|
|
|
|
|
|
|
|
4902
|
|
|
|
|
|
|
print( "Exit Flag Set\n" ) if $exitFlag == 1;
|
4903
|
|
|
|
|
|
|
print( "Exit Flag Not Set\n" ) if $exitFlag == 0;
|
4904
|
|
|
|
|
|
|
|
4905
|
|
|
|
|
|
|
undef( $interface );
|
4906
|
|
|
|
|
|
|
|
4907
|
|
|
|
|
|
|
=head3 GetFileHandle
|
4908
|
|
|
|
|
|
|
|
4909
|
|
|
|
|
|
|
Description:
|
4910
|
|
|
|
|
|
|
|
4911
|
|
|
|
|
|
|
Returns file handle used by WriteLog() method.
|
4912
|
|
|
|
|
|
|
|
4913
|
|
|
|
|
|
|
Input:
|
4914
|
|
|
|
|
|
|
|
4915
|
|
|
|
|
|
|
None
|
4916
|
|
|
|
|
|
|
|
4917
|
|
|
|
|
|
|
Output:
|
4918
|
|
|
|
|
|
|
|
4919
|
|
|
|
|
|
|
$fileHandle -> Returns file handle blob used by 'WriteLog()' function or undefined.
|
4920
|
|
|
|
|
|
|
|
4921
|
|
|
|
|
|
|
Example:
|
4922
|
|
|
|
|
|
|
|
4923
|
|
|
|
|
|
|
This is a private function and should not be utilized.
|
4924
|
|
|
|
|
|
|
|
4925
|
|
|
|
|
|
|
=head3 GetWorkingDirectory
|
4926
|
|
|
|
|
|
|
|
4927
|
|
|
|
|
|
|
Description:
|
4928
|
|
|
|
|
|
|
|
4929
|
|
|
|
|
|
|
Returns the _workingDir member variable set during Word2vec::Word2phrase object initialization of new function.
|
4930
|
|
|
|
|
|
|
|
4931
|
|
|
|
|
|
|
Input:
|
4932
|
|
|
|
|
|
|
|
4933
|
|
|
|
|
|
|
None
|
4934
|
|
|
|
|
|
|
|
4935
|
|
|
|
|
|
|
Output:
|
4936
|
|
|
|
|
|
|
|
4937
|
|
|
|
|
|
|
$string -> Returns working directory
|
4938
|
|
|
|
|
|
|
|
4939
|
|
|
|
|
|
|
Example:
|
4940
|
|
|
|
|
|
|
|
4941
|
|
|
|
|
|
|
use Word2vec::Interface;
|
4942
|
|
|
|
|
|
|
|
4943
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
4944
|
|
|
|
|
|
|
my $dir = $interface->GetWorkingDirectory();
|
4945
|
|
|
|
|
|
|
|
4946
|
|
|
|
|
|
|
print( "Working Directory: $dir\n" );
|
4947
|
|
|
|
|
|
|
|
4948
|
|
|
|
|
|
|
undef( $interface );
|
4949
|
|
|
|
|
|
|
|
4950
|
|
|
|
|
|
|
=head3 GetWord2VecHandler
|
4951
|
|
|
|
|
|
|
|
4952
|
|
|
|
|
|
|
Description:
|
4953
|
|
|
|
|
|
|
|
4954
|
|
|
|
|
|
|
Returns the _word2vec member variable set during Word2vec::Word2phrase object initialization of new function.
|
4955
|
|
|
|
|
|
|
|
4956
|
|
|
|
|
|
|
Note: This returns a new object if not defined with word2vec::_debugLog and word2vec::_writeLog parameters mirroring interface::_debugLog and interface::_writeLog.
|
4957
|
|
|
|
|
|
|
|
4958
|
|
|
|
|
|
|
Input:
|
4959
|
|
|
|
|
|
|
|
4960
|
|
|
|
|
|
|
None
|
4961
|
|
|
|
|
|
|
|
4962
|
|
|
|
|
|
|
Output:
|
4963
|
|
|
|
|
|
|
|
4964
|
|
|
|
|
|
|
Word2vec::Word2vec -> Returns 'Word2vec::Word2vec' object.
|
4965
|
|
|
|
|
|
|
|
4966
|
|
|
|
|
|
|
Example:
|
4967
|
|
|
|
|
|
|
|
4968
|
|
|
|
|
|
|
use Word2vec::Interface;
|
4969
|
|
|
|
|
|
|
|
4970
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
4971
|
|
|
|
|
|
|
my $word2vec = $interface->GetWord2VecHandler();
|
4972
|
|
|
|
|
|
|
|
4973
|
|
|
|
|
|
|
undef( $word2vec );
|
4974
|
|
|
|
|
|
|
undef( $interface );
|
4975
|
|
|
|
|
|
|
|
4976
|
|
|
|
|
|
|
=head3 GetWord2PhraseHandler
|
4977
|
|
|
|
|
|
|
|
4978
|
|
|
|
|
|
|
Description:
|
4979
|
|
|
|
|
|
|
|
4980
|
|
|
|
|
|
|
Returns the _word2phrase member variable set during Word2vec::Word2phrase object initialization of new function.
|
4981
|
|
|
|
|
|
|
|
4982
|
|
|
|
|
|
|
Note: This returns a new object if not defined with word2vec::_debugLog and word2vec::_writeLog parameters mirroring interface::_debugLog and interface::_writeLog.
|
4983
|
|
|
|
|
|
|
|
4984
|
|
|
|
|
|
|
Input:
|
4985
|
|
|
|
|
|
|
|
4986
|
|
|
|
|
|
|
None
|
4987
|
|
|
|
|
|
|
|
4988
|
|
|
|
|
|
|
Output:
|
4989
|
|
|
|
|
|
|
|
4990
|
|
|
|
|
|
|
Word2vec::Word2phrase -> Returns 'Word2vec::Word2phrase' object
|
4991
|
|
|
|
|
|
|
|
4992
|
|
|
|
|
|
|
Example:
|
4993
|
|
|
|
|
|
|
|
4994
|
|
|
|
|
|
|
use Word2vec::Interface;
|
4995
|
|
|
|
|
|
|
|
4996
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
4997
|
|
|
|
|
|
|
my $word2phrase = $interface->GetWord2PhraseHandler();
|
4998
|
|
|
|
|
|
|
|
4999
|
|
|
|
|
|
|
undef( $word2phrase );
|
5000
|
|
|
|
|
|
|
undef( $interface );
|
5001
|
|
|
|
|
|
|
|
5002
|
|
|
|
|
|
|
=head3 GetXMLToW2VHandler
|
5003
|
|
|
|
|
|
|
|
5004
|
|
|
|
|
|
|
Description:
|
5005
|
|
|
|
|
|
|
|
5006
|
|
|
|
|
|
|
Returns the _xmltow2v member variable set during Word2vec::Word2phrase object initialization of new function.
|
5007
|
|
|
|
|
|
|
|
5008
|
|
|
|
|
|
|
Note: This returns a new object if not defined with word2vec::_debugLog and word2vec::_writeLog parameters mirroring interface::_debugLog and interface::_writeLog.
|
5009
|
|
|
|
|
|
|
|
5010
|
|
|
|
|
|
|
Input:
|
5011
|
|
|
|
|
|
|
|
5012
|
|
|
|
|
|
|
None
|
5013
|
|
|
|
|
|
|
|
5014
|
|
|
|
|
|
|
Output:
|
5015
|
|
|
|
|
|
|
|
5016
|
|
|
|
|
|
|
Word2vec::Xmltow2v -> Returns 'Word2vec::Xmltow2v' object
|
5017
|
|
|
|
|
|
|
|
5018
|
|
|
|
|
|
|
Example:
|
5019
|
|
|
|
|
|
|
|
5020
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5021
|
|
|
|
|
|
|
|
5022
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
5023
|
|
|
|
|
|
|
my $xmltow2v = $interface->GetXMLToW2VHandler();
|
5024
|
|
|
|
|
|
|
|
5025
|
|
|
|
|
|
|
undef( $xmltow2v );
|
5026
|
|
|
|
|
|
|
undef( $interface );
|
5027
|
|
|
|
|
|
|
|
5028
|
|
|
|
|
|
|
=head3 GetInstanceAry
|
5029
|
|
|
|
|
|
|
|
5030
|
|
|
|
|
|
|
Description:
|
5031
|
|
|
|
|
|
|
|
5032
|
|
|
|
|
|
|
Returns the _instanceAry member variable set during Word2vec::Word2phrase object initialization of new function.
|
5033
|
|
|
|
|
|
|
|
5034
|
|
|
|
|
|
|
Input:
|
5035
|
|
|
|
|
|
|
|
5036
|
|
|
|
|
|
|
None
|
5037
|
|
|
|
|
|
|
|
5038
|
|
|
|
|
|
|
Output:
|
5039
|
|
|
|
|
|
|
|
5040
|
|
|
|
|
|
|
$instance -> Returns array reference of WSD instances.
|
5041
|
|
|
|
|
|
|
|
5042
|
|
|
|
|
|
|
Example:
|
5043
|
|
|
|
|
|
|
|
5044
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5045
|
|
|
|
|
|
|
|
5046
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
5047
|
|
|
|
|
|
|
my $aryRef = $interface->GetInstanceAry();
|
5048
|
|
|
|
|
|
|
|
5049
|
|
|
|
|
|
|
my @instanceAry = @{ $aryRef };
|
5050
|
|
|
|
|
|
|
undef( $interface );
|
5051
|
|
|
|
|
|
|
|
5052
|
|
|
|
|
|
|
=head3 GetSensesAry
|
5053
|
|
|
|
|
|
|
|
5054
|
|
|
|
|
|
|
Description:
|
5055
|
|
|
|
|
|
|
|
5056
|
|
|
|
|
|
|
Returns the _senseAry member variable set during Word2vec::Word2phrase object initialization of new function.
|
5057
|
|
|
|
|
|
|
|
5058
|
|
|
|
|
|
|
Input:
|
5059
|
|
|
|
|
|
|
|
5060
|
|
|
|
|
|
|
None
|
5061
|
|
|
|
|
|
|
|
5062
|
|
|
|
|
|
|
Output:
|
5063
|
|
|
|
|
|
|
|
5064
|
|
|
|
|
|
|
$senses -> Returns array reference of WSD senses.
|
5065
|
|
|
|
|
|
|
|
5066
|
|
|
|
|
|
|
Example:
|
5067
|
|
|
|
|
|
|
|
5068
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5069
|
|
|
|
|
|
|
|
5070
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
5071
|
|
|
|
|
|
|
my $aryRef = $interface->GetSensesAry();
|
5072
|
|
|
|
|
|
|
|
5073
|
|
|
|
|
|
|
my @sensesAry = @{ $aryRef };
|
5074
|
|
|
|
|
|
|
undef( $interface );
|
5075
|
|
|
|
|
|
|
|
5076
|
|
|
|
|
|
|
=head3 GetInstanceCount
|
5077
|
|
|
|
|
|
|
|
5078
|
|
|
|
|
|
|
Description:
|
5079
|
|
|
|
|
|
|
|
5080
|
|
|
|
|
|
|
Returns the _instanceCount member variable set during Word2vec::Word2phrase object initialization of new function.
|
5081
|
|
|
|
|
|
|
|
5082
|
|
|
|
|
|
|
Input:
|
5083
|
|
|
|
|
|
|
|
5084
|
|
|
|
|
|
|
None
|
5085
|
|
|
|
|
|
|
|
5086
|
|
|
|
|
|
|
Output:
|
5087
|
|
|
|
|
|
|
|
5088
|
|
|
|
|
|
|
$value -> Returns number of stored WSD instances.
|
5089
|
|
|
|
|
|
|
|
5090
|
|
|
|
|
|
|
Example:
|
5091
|
|
|
|
|
|
|
|
5092
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5093
|
|
|
|
|
|
|
|
5094
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
5095
|
|
|
|
|
|
|
my $count = $interface->GetInstanceCount();
|
5096
|
|
|
|
|
|
|
|
5097
|
|
|
|
|
|
|
print( "Stored WSD instances in memory: $count\n" );
|
5098
|
|
|
|
|
|
|
|
5099
|
|
|
|
|
|
|
undef( $interface );
|
5100
|
|
|
|
|
|
|
|
5101
|
|
|
|
|
|
|
=head3 GetSenseCount
|
5102
|
|
|
|
|
|
|
|
5103
|
|
|
|
|
|
|
Description:
|
5104
|
|
|
|
|
|
|
|
5105
|
|
|
|
|
|
|
Returns the _sensesCount member variable set during Word2vec::Word2phrase object initialization of new function.
|
5106
|
|
|
|
|
|
|
|
5107
|
|
|
|
|
|
|
Input:
|
5108
|
|
|
|
|
|
|
|
5109
|
|
|
|
|
|
|
None
|
5110
|
|
|
|
|
|
|
|
5111
|
|
|
|
|
|
|
Output:
|
5112
|
|
|
|
|
|
|
|
5113
|
|
|
|
|
|
|
$value -> Returns number of stored WSD senses.
|
5114
|
|
|
|
|
|
|
|
5115
|
|
|
|
|
|
|
Example:
|
5116
|
|
|
|
|
|
|
|
5117
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5118
|
|
|
|
|
|
|
|
5119
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
5120
|
|
|
|
|
|
|
my $count = $interface->GetSensesCount();
|
5121
|
|
|
|
|
|
|
|
5122
|
|
|
|
|
|
|
print( "Stored WSD senses in memory: $count\n" );
|
5123
|
|
|
|
|
|
|
|
5124
|
|
|
|
|
|
|
undef( $interface );
|
5125
|
|
|
|
|
|
|
|
5126
|
|
|
|
|
|
|
=head2 Interface Mutator Functions
|
5127
|
|
|
|
|
|
|
|
5128
|
|
|
|
|
|
|
=head3 SetWord2VecDir
|
5129
|
|
|
|
|
|
|
|
5130
|
|
|
|
|
|
|
Description:
|
5131
|
|
|
|
|
|
|
|
5132
|
|
|
|
|
|
|
Sets word2vec executable/source file directory.
|
5133
|
|
|
|
|
|
|
|
5134
|
|
|
|
|
|
|
Input:
|
5135
|
|
|
|
|
|
|
|
5136
|
|
|
|
|
|
|
$string -> Word2Vec Directory
|
5137
|
|
|
|
|
|
|
|
5138
|
|
|
|
|
|
|
Output:
|
5139
|
|
|
|
|
|
|
|
5140
|
|
|
|
|
|
|
None
|
5141
|
|
|
|
|
|
|
|
5142
|
|
|
|
|
|
|
Example:
|
5143
|
|
|
|
|
|
|
|
5144
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5145
|
|
|
|
|
|
|
|
5146
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
5147
|
|
|
|
|
|
|
$interface->SetWord2VecDir( "/word2vec" );
|
5148
|
|
|
|
|
|
|
|
5149
|
|
|
|
|
|
|
undef( $interface );
|
5150
|
|
|
|
|
|
|
|
5151
|
|
|
|
|
|
|
=head3 SetDebugLog
|
5152
|
|
|
|
|
|
|
|
5153
|
|
|
|
|
|
|
Description:
|
5154
|
|
|
|
|
|
|
|
5155
|
|
|
|
|
|
|
Instructs module to print debug statements to the console.
|
5156
|
|
|
|
|
|
|
|
5157
|
|
|
|
|
|
|
Input:
|
5158
|
|
|
|
|
|
|
|
5159
|
|
|
|
|
|
|
$value -> '1' = Print Debug Statements / '0' = Do Not Print Statements
|
5160
|
|
|
|
|
|
|
|
5161
|
|
|
|
|
|
|
Output:
|
5162
|
|
|
|
|
|
|
|
5163
|
|
|
|
|
|
|
None
|
5164
|
|
|
|
|
|
|
|
5165
|
|
|
|
|
|
|
Example:
|
5166
|
|
|
|
|
|
|
|
5167
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5168
|
|
|
|
|
|
|
|
5169
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
5170
|
|
|
|
|
|
|
$interface->SetDebugLog( 1 );
|
5171
|
|
|
|
|
|
|
|
5172
|
|
|
|
|
|
|
undef( $interface );
|
5173
|
|
|
|
|
|
|
|
5174
|
|
|
|
|
|
|
=head3 SetWriteLog
|
5175
|
|
|
|
|
|
|
|
5176
|
|
|
|
|
|
|
Description:
|
5177
|
|
|
|
|
|
|
|
5178
|
|
|
|
|
|
|
Instructs module to print a log file.
|
5179
|
|
|
|
|
|
|
|
5180
|
|
|
|
|
|
|
Input:
|
5181
|
|
|
|
|
|
|
|
5182
|
|
|
|
|
|
|
$value -> '1' = Print Debug Statements / '0' = Do Not Print Statements
|
5183
|
|
|
|
|
|
|
|
5184
|
|
|
|
|
|
|
Output:
|
5185
|
|
|
|
|
|
|
|
5186
|
|
|
|
|
|
|
None
|
5187
|
|
|
|
|
|
|
|
5188
|
|
|
|
|
|
|
Example:
|
5189
|
|
|
|
|
|
|
|
5190
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5191
|
|
|
|
|
|
|
|
5192
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
5193
|
|
|
|
|
|
|
$interface->SetWriteLog( 1 );
|
5194
|
|
|
|
|
|
|
|
5195
|
|
|
|
|
|
|
undef( $interface );
|
5196
|
|
|
|
|
|
|
|
5197
|
|
|
|
|
|
|
=head3 SetIgnoreCompileErrors
|
5198
|
|
|
|
|
|
|
|
5199
|
|
|
|
|
|
|
Description:
|
5200
|
|
|
|
|
|
|
|
5201
|
|
|
|
|
|
|
Instructs module to ignore compile errors when compiling source files.
|
5202
|
|
|
|
|
|
|
|
5203
|
|
|
|
|
|
|
Input:
|
5204
|
|
|
|
|
|
|
|
5205
|
|
|
|
|
|
|
$value -> '1' = Ignore warnings/errors, '0' = Display and process warnings/errors.
|
5206
|
|
|
|
|
|
|
|
5207
|
|
|
|
|
|
|
Output:
|
5208
|
|
|
|
|
|
|
|
5209
|
|
|
|
|
|
|
None
|
5210
|
|
|
|
|
|
|
|
5211
|
|
|
|
|
|
|
Example:
|
5212
|
|
|
|
|
|
|
|
5213
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5214
|
|
|
|
|
|
|
|
5215
|
|
|
|
|
|
|
my $instance = word2vec::instance->new();
|
5216
|
|
|
|
|
|
|
$instance->SetIgnoreCompileErrors( 1 );
|
5217
|
|
|
|
|
|
|
|
5218
|
|
|
|
|
|
|
undef( $instance );
|
5219
|
|
|
|
|
|
|
|
5220
|
|
|
|
|
|
|
=head3 SetIgnoreFileCheckErrors
|
5221
|
|
|
|
|
|
|
|
5222
|
|
|
|
|
|
|
Description:
|
5223
|
|
|
|
|
|
|
|
5224
|
|
|
|
|
|
|
Instructs module to ignore file checking errors.
|
5225
|
|
|
|
|
|
|
|
5226
|
|
|
|
|
|
|
Input:
|
5227
|
|
|
|
|
|
|
|
5228
|
|
|
|
|
|
|
$value -> '1' = Ignore warnings/errors, '0' = Display and process warnings/errors.
|
5229
|
|
|
|
|
|
|
|
5230
|
|
|
|
|
|
|
Output:
|
5231
|
|
|
|
|
|
|
|
5232
|
|
|
|
|
|
|
None
|
5233
|
|
|
|
|
|
|
|
5234
|
|
|
|
|
|
|
Example:
|
5235
|
|
|
|
|
|
|
|
5236
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5237
|
|
|
|
|
|
|
|
5238
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
5239
|
|
|
|
|
|
|
$interface->SetIgnoreFileCheckErrors( 1 );
|
5240
|
|
|
|
|
|
|
|
5241
|
|
|
|
|
|
|
undef( $interface );
|
5242
|
|
|
|
|
|
|
|
5243
|
|
|
|
|
|
|
=head3 SetWorkingDirectory
|
5244
|
|
|
|
|
|
|
|
5245
|
|
|
|
|
|
|
Description:
|
5246
|
|
|
|
|
|
|
|
5247
|
|
|
|
|
|
|
Sets current working directory.
|
5248
|
|
|
|
|
|
|
|
5249
|
|
|
|
|
|
|
Input:
|
5250
|
|
|
|
|
|
|
|
5251
|
|
|
|
|
|
|
$path -> Working directory path (String)
|
5252
|
|
|
|
|
|
|
|
5253
|
|
|
|
|
|
|
Output:
|
5254
|
|
|
|
|
|
|
|
5255
|
|
|
|
|
|
|
None
|
5256
|
|
|
|
|
|
|
|
5257
|
|
|
|
|
|
|
Example:
|
5258
|
|
|
|
|
|
|
|
5259
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5260
|
|
|
|
|
|
|
|
5261
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
5262
|
|
|
|
|
|
|
$interface->SetWorkingDirectory( "my/new/working/directory" );
|
5263
|
|
|
|
|
|
|
|
5264
|
|
|
|
|
|
|
undef( $interface );
|
5265
|
|
|
|
|
|
|
|
5266
|
|
|
|
|
|
|
=head3 SetInstanceAry
|
5267
|
|
|
|
|
|
|
|
5268
|
|
|
|
|
|
|
Description:
|
5269
|
|
|
|
|
|
|
|
5270
|
|
|
|
|
|
|
Sets member instance array variable to de-referenced passed array reference parameter.
|
5271
|
|
|
|
|
|
|
|
5272
|
|
|
|
|
|
|
Input:
|
5273
|
|
|
|
|
|
|
|
5274
|
|
|
|
|
|
|
$arrayReference -> Array reference for Word Sense Disambiguation - Array of instances (Word2vec::Wsddata objects).
|
5275
|
|
|
|
|
|
|
|
5276
|
|
|
|
|
|
|
Output:
|
5277
|
|
|
|
|
|
|
|
5278
|
|
|
|
|
|
|
None
|
5279
|
|
|
|
|
|
|
|
5280
|
|
|
|
|
|
|
Example:
|
5281
|
|
|
|
|
|
|
|
5282
|
|
|
|
|
|
|
use word2vec::instance;
|
5283
|
|
|
|
|
|
|
|
5284
|
|
|
|
|
|
|
# This array would theoretically contain 'Word2vec::Wsddata' objects.
|
5285
|
|
|
|
|
|
|
my @instanceAry = ();
|
5286
|
|
|
|
|
|
|
|
5287
|
|
|
|
|
|
|
my $instance = word2vec::instance->new();
|
5288
|
|
|
|
|
|
|
$instance->SetInstanceAry( \@instanceAry );
|
5289
|
|
|
|
|
|
|
|
5290
|
|
|
|
|
|
|
undef( $instance );
|
5291
|
|
|
|
|
|
|
|
5292
|
|
|
|
|
|
|
=head3 ClearInstanceAry
|
5293
|
|
|
|
|
|
|
|
5294
|
|
|
|
|
|
|
Description:
|
5295
|
|
|
|
|
|
|
|
5296
|
|
|
|
|
|
|
Clears member instance array.
|
5297
|
|
|
|
|
|
|
|
5298
|
|
|
|
|
|
|
Input:
|
5299
|
|
|
|
|
|
|
|
5300
|
|
|
|
|
|
|
None
|
5301
|
|
|
|
|
|
|
|
5302
|
|
|
|
|
|
|
Output:
|
5303
|
|
|
|
|
|
|
|
5304
|
|
|
|
|
|
|
None
|
5305
|
|
|
|
|
|
|
|
5306
|
|
|
|
|
|
|
Example:
|
5307
|
|
|
|
|
|
|
|
5308
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5309
|
|
|
|
|
|
|
|
5310
|
|
|
|
|
|
|
my $instance = word2vec::instance->new();
|
5311
|
|
|
|
|
|
|
$instance->ClearInstanceAry();
|
5312
|
|
|
|
|
|
|
|
5313
|
|
|
|
|
|
|
undef( $instance );
|
5314
|
|
|
|
|
|
|
|
5315
|
|
|
|
|
|
|
=head3 SetSenseAry
|
5316
|
|
|
|
|
|
|
|
5317
|
|
|
|
|
|
|
Description:
|
5318
|
|
|
|
|
|
|
|
5319
|
|
|
|
|
|
|
Sets member sense array variable to de-referenced passed array reference parameter.
|
5320
|
|
|
|
|
|
|
|
5321
|
|
|
|
|
|
|
Input:
|
5322
|
|
|
|
|
|
|
|
5323
|
|
|
|
|
|
|
$arrayReference -> Array reference for Word Sense Disambiguation - Array of senses (Word2vec::Wsddata objects).
|
5324
|
|
|
|
|
|
|
|
5325
|
|
|
|
|
|
|
Output:
|
5326
|
|
|
|
|
|
|
|
5327
|
|
|
|
|
|
|
None
|
5328
|
|
|
|
|
|
|
|
5329
|
|
|
|
|
|
|
Example:
|
5330
|
|
|
|
|
|
|
|
5331
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5332
|
|
|
|
|
|
|
|
5333
|
|
|
|
|
|
|
# This array would theoretically contain 'Word2vec::Wsddata' objects.
|
5334
|
|
|
|
|
|
|
my @senseAry = ();
|
5335
|
|
|
|
|
|
|
|
5336
|
|
|
|
|
|
|
my $interface = word2vec::instance->new();
|
5337
|
|
|
|
|
|
|
$interface->SetSenseAry( \@senseAry );
|
5338
|
|
|
|
|
|
|
|
5339
|
|
|
|
|
|
|
undef( $instance );
|
5340
|
|
|
|
|
|
|
|
5341
|
|
|
|
|
|
|
=head3 ClearSenseAry
|
5342
|
|
|
|
|
|
|
|
5343
|
|
|
|
|
|
|
Description:
|
5344
|
|
|
|
|
|
|
|
5345
|
|
|
|
|
|
|
Clears member sense array.
|
5346
|
|
|
|
|
|
|
|
5347
|
|
|
|
|
|
|
Input:
|
5348
|
|
|
|
|
|
|
|
5349
|
|
|
|
|
|
|
None
|
5350
|
|
|
|
|
|
|
|
5351
|
|
|
|
|
|
|
Output:
|
5352
|
|
|
|
|
|
|
|
5353
|
|
|
|
|
|
|
None
|
5354
|
|
|
|
|
|
|
|
5355
|
|
|
|
|
|
|
Example:
|
5356
|
|
|
|
|
|
|
|
5357
|
|
|
|
|
|
|
use word2vec::instance;
|
5358
|
|
|
|
|
|
|
|
5359
|
|
|
|
|
|
|
my $instance = word2vec::instance->new();
|
5360
|
|
|
|
|
|
|
$instance->ClearSenseAry();
|
5361
|
|
|
|
|
|
|
|
5362
|
|
|
|
|
|
|
undef( $instance );
|
5363
|
|
|
|
|
|
|
|
5364
|
|
|
|
|
|
|
=head3 SetInstanceCount
|
5365
|
|
|
|
|
|
|
|
5366
|
|
|
|
|
|
|
Description:
|
5367
|
|
|
|
|
|
|
|
5368
|
|
|
|
|
|
|
Sets member instance count variable to passed value (integer).
|
5369
|
|
|
|
|
|
|
|
5370
|
|
|
|
|
|
|
Input:
|
5371
|
|
|
|
|
|
|
|
5372
|
|
|
|
|
|
|
$value -> Integer (Positive)
|
5373
|
|
|
|
|
|
|
|
5374
|
|
|
|
|
|
|
Output:
|
5375
|
|
|
|
|
|
|
|
5376
|
|
|
|
|
|
|
None
|
5377
|
|
|
|
|
|
|
|
5378
|
|
|
|
|
|
|
Example:
|
5379
|
|
|
|
|
|
|
|
5380
|
|
|
|
|
|
|
use word2vec::instance;
|
5381
|
|
|
|
|
|
|
|
5382
|
|
|
|
|
|
|
my $instance = word2vec::instance->new();
|
5383
|
|
|
|
|
|
|
$instance->SetInstanceCount( 12 );
|
5384
|
|
|
|
|
|
|
|
5385
|
|
|
|
|
|
|
undef( $instance );
|
5386
|
|
|
|
|
|
|
|
5387
|
|
|
|
|
|
|
=head3 SetSenseCount
|
5388
|
|
|
|
|
|
|
|
5389
|
|
|
|
|
|
|
Description:
|
5390
|
|
|
|
|
|
|
|
5391
|
|
|
|
|
|
|
Sets member sense count variable to passed value (integer).
|
5392
|
|
|
|
|
|
|
|
5393
|
|
|
|
|
|
|
Input:
|
5394
|
|
|
|
|
|
|
|
5395
|
|
|
|
|
|
|
$value -> Integer (Positive)
|
5396
|
|
|
|
|
|
|
|
5397
|
|
|
|
|
|
|
Output:
|
5398
|
|
|
|
|
|
|
|
5399
|
|
|
|
|
|
|
None
|
5400
|
|
|
|
|
|
|
|
5401
|
|
|
|
|
|
|
Example:
|
5402
|
|
|
|
|
|
|
|
5403
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5404
|
|
|
|
|
|
|
|
5405
|
|
|
|
|
|
|
my $interface = word2vec::instance->new();
|
5406
|
|
|
|
|
|
|
$instance->SetSenseCount( 12 );
|
5407
|
|
|
|
|
|
|
|
5408
|
|
|
|
|
|
|
undef( $instance );
|
5409
|
|
|
|
|
|
|
|
5410
|
|
|
|
|
|
|
=head2 Debug Functions
|
5411
|
|
|
|
|
|
|
|
5412
|
|
|
|
|
|
|
=head3 GetTime
|
5413
|
|
|
|
|
|
|
|
5414
|
|
|
|
|
|
|
Description:
|
5415
|
|
|
|
|
|
|
|
5416
|
|
|
|
|
|
|
Returns current time string in "Hour:Minute:Second" format.
|
5417
|
|
|
|
|
|
|
|
5418
|
|
|
|
|
|
|
Input:
|
5419
|
|
|
|
|
|
|
|
5420
|
|
|
|
|
|
|
None
|
5421
|
|
|
|
|
|
|
|
5422
|
|
|
|
|
|
|
Output:
|
5423
|
|
|
|
|
|
|
|
5424
|
|
|
|
|
|
|
$string -> XX:XX:XX ("Hour:Minute:Second")
|
5425
|
|
|
|
|
|
|
|
5426
|
|
|
|
|
|
|
Example:
|
5427
|
|
|
|
|
|
|
|
5428
|
|
|
|
|
|
|
use Word2vec::Interface:
|
5429
|
|
|
|
|
|
|
|
5430
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
5431
|
|
|
|
|
|
|
my $time = $interface->GetTime();
|
5432
|
|
|
|
|
|
|
|
5433
|
|
|
|
|
|
|
print( "Current Time: $time\n" ) if defined( $time );
|
5434
|
|
|
|
|
|
|
|
5435
|
|
|
|
|
|
|
undef( $interface );
|
5436
|
|
|
|
|
|
|
|
5437
|
|
|
|
|
|
|
=head3 GetDate
|
5438
|
|
|
|
|
|
|
|
5439
|
|
|
|
|
|
|
Description:
|
5440
|
|
|
|
|
|
|
|
5441
|
|
|
|
|
|
|
Returns current month, day and year string in "Month/Day/Year" format.
|
5442
|
|
|
|
|
|
|
|
5443
|
|
|
|
|
|
|
Input:
|
5444
|
|
|
|
|
|
|
|
5445
|
|
|
|
|
|
|
None
|
5446
|
|
|
|
|
|
|
|
5447
|
|
|
|
|
|
|
Output:
|
5448
|
|
|
|
|
|
|
|
5449
|
|
|
|
|
|
|
$string -> XX/XX/XXXX ("Month/Day/Year")
|
5450
|
|
|
|
|
|
|
|
5451
|
|
|
|
|
|
|
Example:
|
5452
|
|
|
|
|
|
|
|
5453
|
|
|
|
|
|
|
use Word2vec::Interface:
|
5454
|
|
|
|
|
|
|
|
5455
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
5456
|
|
|
|
|
|
|
my $date = $interface->GetDate();
|
5457
|
|
|
|
|
|
|
|
5458
|
|
|
|
|
|
|
print( "Current Date: $date\n" ) if defined( $date );
|
5459
|
|
|
|
|
|
|
|
5460
|
|
|
|
|
|
|
undef( $interface );
|
5461
|
|
|
|
|
|
|
|
5462
|
|
|
|
|
|
|
=head3 WriteLog
|
5463
|
|
|
|
|
|
|
|
5464
|
|
|
|
|
|
|
Description:
|
5465
|
|
|
|
|
|
|
|
5466
|
|
|
|
|
|
|
Prints passed string parameter to the console, log file or both depending on user options.
|
5467
|
|
|
|
|
|
|
|
5468
|
|
|
|
|
|
|
Note: printNewLine parameter prints a new line character following the string if the parameter
|
5469
|
|
|
|
|
|
|
is undefined and does not if parameter is 0.
|
5470
|
|
|
|
|
|
|
|
5471
|
|
|
|
|
|
|
Input:
|
5472
|
|
|
|
|
|
|
|
5473
|
|
|
|
|
|
|
$string -> String to print to the console/log file.
|
5474
|
|
|
|
|
|
|
$value -> 0 = Do not print newline character after string, all else prints new line character including 'undef'.
|
5475
|
|
|
|
|
|
|
|
5476
|
|
|
|
|
|
|
Output:
|
5477
|
|
|
|
|
|
|
|
5478
|
|
|
|
|
|
|
None
|
5479
|
|
|
|
|
|
|
|
5480
|
|
|
|
|
|
|
Example:
|
5481
|
|
|
|
|
|
|
|
5482
|
|
|
|
|
|
|
use Word2vec::Interface:
|
5483
|
|
|
|
|
|
|
|
5484
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
5485
|
|
|
|
|
|
|
$interface->WriteLog( "Hello World" );
|
5486
|
|
|
|
|
|
|
|
5487
|
|
|
|
|
|
|
undef( $interface );
|
5488
|
|
|
|
|
|
|
|
5489
|
|
|
|
|
|
|
=head2 Util Main Functions
|
5490
|
|
|
|
|
|
|
|
5491
|
|
|
|
|
|
|
=head3 IsFileOrDirectory
|
5492
|
|
|
|
|
|
|
|
5493
|
|
|
|
|
|
|
Description:
|
5494
|
|
|
|
|
|
|
|
5495
|
|
|
|
|
|
|
Given a path, returns a string specifying whether this path represents a file or directory.
|
5496
|
|
|
|
|
|
|
|
5497
|
|
|
|
|
|
|
Input:
|
5498
|
|
|
|
|
|
|
|
5499
|
|
|
|
|
|
|
$path -> String representing path to check
|
5500
|
|
|
|
|
|
|
|
5501
|
|
|
|
|
|
|
Output:
|
5502
|
|
|
|
|
|
|
|
5503
|
|
|
|
|
|
|
$string -> Returns "file", "dir" or "unknown".
|
5504
|
|
|
|
|
|
|
|
5505
|
|
|
|
|
|
|
Example:
|
5506
|
|
|
|
|
|
|
|
5507
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5508
|
|
|
|
|
|
|
|
5509
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
5510
|
|
|
|
|
|
|
|
5511
|
|
|
|
|
|
|
my $result = $interface->IsFileOrDirectory( "../samples/stoplist" );
|
5512
|
|
|
|
|
|
|
|
5513
|
|
|
|
|
|
|
print( "Path Type Is A File\n" ) if $result eq "file";
|
5514
|
|
|
|
|
|
|
print( "Path Type Is A Directory\n" ) if $result eq "dir";
|
5515
|
|
|
|
|
|
|
print( "Path Type Is Unknown\n" ) if $result eq "unknown";
|
5516
|
|
|
|
|
|
|
|
5517
|
|
|
|
|
|
|
undef( $interface );
|
5518
|
|
|
|
|
|
|
|
5519
|
|
|
|
|
|
|
=head3 GetFilesInDirectory
|
5520
|
|
|
|
|
|
|
|
5521
|
|
|
|
|
|
|
Description:
|
5522
|
|
|
|
|
|
|
|
5523
|
|
|
|
|
|
|
Given a path and file tag string, returns a string of files consisting of the file tag string in the specified path.
|
5524
|
|
|
|
|
|
|
|
5525
|
|
|
|
|
|
|
Input:
|
5526
|
|
|
|
|
|
|
|
5527
|
|
|
|
|
|
|
$path -> String representing path
|
5528
|
|
|
|
|
|
|
$fileTag -> String consisting of file tag to fetch.
|
5529
|
|
|
|
|
|
|
|
5530
|
|
|
|
|
|
|
Output:
|
5531
|
|
|
|
|
|
|
|
5532
|
|
|
|
|
|
|
$string -> Returns string of file names consisting of $fileTag.
|
5533
|
|
|
|
|
|
|
|
5534
|
|
|
|
|
|
|
Example:
|
5535
|
|
|
|
|
|
|
|
5536
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5537
|
|
|
|
|
|
|
|
5538
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
5539
|
|
|
|
|
|
|
|
5540
|
|
|
|
|
|
|
# Looks in specified path for files including ".sval" in their file name.
|
5541
|
|
|
|
|
|
|
my $result = $interface->GetFilesInDirectory( "../samples/", ".sval" );
|
5542
|
|
|
|
|
|
|
|
5543
|
|
|
|
|
|
|
print( "Found File Name(s): $result\n" ) if defined( $result );
|
5544
|
|
|
|
|
|
|
|
5545
|
|
|
|
|
|
|
undef( $interface );
|
5546
|
|
|
|
|
|
|
|
5547
|
|
|
|
|
|
|
=head2 Word2Vec Main Functions
|
5548
|
|
|
|
|
|
|
|
5549
|
|
|
|
|
|
|
=head3 W2VExecuteTraining
|
5550
|
|
|
|
|
|
|
|
5551
|
|
|
|
|
|
|
Executes word2vec training based on parameters. Parameter variables have higher precedence
|
5552
|
|
|
|
|
|
|
than member variables. Any parameter specified will override its respective member variable.
|
5553
|
|
|
|
|
|
|
|
5554
|
|
|
|
|
|
|
Note: If no parameters are specified, this module executes word2vec training based on preset
|
5555
|
|
|
|
|
|
|
member variables. Returns string regarding training status.
|
5556
|
|
|
|
|
|
|
|
5557
|
|
|
|
|
|
|
Input:
|
5558
|
|
|
|
|
|
|
|
5559
|
|
|
|
|
|
|
$trainFilePath -> Specifies word2vec text corpus training file in a given path. (String)
|
5560
|
|
|
|
|
|
|
$outputFilePath -> Specifies word2vec trained output data file name and save path. (String)
|
5561
|
|
|
|
|
|
|
$vectorSize -> Size of word2vec word vectors. (Integer)
|
5562
|
|
|
|
|
|
|
$windowSize -> Maximum skip length between words. (Integer)
|
5563
|
|
|
|
|
|
|
$minCount -> Disregard words that appear less than $minCount times. (Integer)
|
5564
|
|
|
|
|
|
|
$sample -> Threshold for occurrence of words. Those that appear with higher frequency in the training data will be randomly down-sampled. (Float)
|
5565
|
|
|
|
|
|
|
$negative -> Number of negative examples. (Integer)
|
5566
|
|
|
|
|
|
|
$alpha -> Set that start learning rate. (Float)
|
5567
|
|
|
|
|
|
|
$hs -> Hierarchical Soft-max (Integer)
|
5568
|
|
|
|
|
|
|
$binary -> Save trained data as binary mode. (Integer)
|
5569
|
|
|
|
|
|
|
$numOfThreads -> Number of word2vec training threads. (Integer)
|
5570
|
|
|
|
|
|
|
$iterations -> Number of training iterations to run prior to completion of training. (Integer)
|
5571
|
|
|
|
|
|
|
$useCBOW -> Enable Continuous Bag Of Words model or Skip-Gram model. (Integer)
|
5572
|
|
|
|
|
|
|
$classes -> Output word classes rather than word vectors. (Integer)
|
5573
|
|
|
|
|
|
|
$readVocab -> Read vocabulary from file path without constructing from training data. (String)
|
5574
|
|
|
|
|
|
|
$saveVocab -> Save vocabulary to file path. (String)
|
5575
|
|
|
|
|
|
|
$debug -> Set word2vec debug mode. (Integer)
|
5576
|
|
|
|
|
|
|
$overwrite -> Instructs the module to either overwrite any existing text corpus files or append to the existing file. ( '1' = True / '0' = False )
|
5577
|
|
|
|
|
|
|
|
5578
|
|
|
|
|
|
|
Note: It is not recommended to specify all new() parameters, as it has not been thoroughly tested.
|
5579
|
|
|
|
|
|
|
|
5580
|
|
|
|
|
|
|
Output:
|
5581
|
|
|
|
|
|
|
|
5582
|
|
|
|
|
|
|
$value -> '0' = Successful / '-1' = Un-successful
|
5583
|
|
|
|
|
|
|
|
5584
|
|
|
|
|
|
|
Example:
|
5585
|
|
|
|
|
|
|
|
5586
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5587
|
|
|
|
|
|
|
|
5588
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
5589
|
|
|
|
|
|
|
$interface->W2VSetTrainFilePath( "textcorpus.txt" );
|
5590
|
|
|
|
|
|
|
$interface->W2VSetOutputFilePath( "vectors.bin" );
|
5591
|
|
|
|
|
|
|
$interface->W2VSetWordVecSize( 200 );
|
5592
|
|
|
|
|
|
|
$interface->W2VSetWindowSize( 8 );
|
5593
|
|
|
|
|
|
|
$interface->W2VSetSample( 0.0001 );
|
5594
|
|
|
|
|
|
|
$interface->W2VSetNegative( 25 );
|
5595
|
|
|
|
|
|
|
$interface->W2VSetHSoftMax( 0 );
|
5596
|
|
|
|
|
|
|
$interface->W2VSetBinaryOutput( 0 );
|
5597
|
|
|
|
|
|
|
$interface->W2VSetNumOfThreads( 20 );
|
5598
|
|
|
|
|
|
|
$interface->W2VSetNumOfIterations( 15 );
|
5599
|
|
|
|
|
|
|
$interface->W2VSetUseCBOW( 1 );
|
5600
|
|
|
|
|
|
|
$interface->W2VSetOverwriteOldFile( 0 );
|
5601
|
|
|
|
|
|
|
$interface->W2VExecuteTraining();
|
5602
|
|
|
|
|
|
|
|
5603
|
|
|
|
|
|
|
undef( $interface );
|
5604
|
|
|
|
|
|
|
|
5605
|
|
|
|
|
|
|
# or
|
5606
|
|
|
|
|
|
|
|
5607
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5608
|
|
|
|
|
|
|
|
5609
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
5610
|
|
|
|
|
|
|
$interface->W2VExecuteTraining( "textcorpus.txt", "vectors.bin", 200, 8, 5, 0.001, 25, 0.05, 0, 0, 20, 15, 1, 0, "", "", 2, 0 );
|
5611
|
|
|
|
|
|
|
|
5612
|
|
|
|
|
|
|
undef( $interface );
|
5613
|
|
|
|
|
|
|
|
5614
|
|
|
|
|
|
|
=head3 W2VExecuteStringTraining
|
5615
|
|
|
|
|
|
|
|
5616
|
|
|
|
|
|
|
Executes word2vec training based on parameters. Parameter variables have higher precedence
|
5617
|
|
|
|
|
|
|
than member variables. Any parameter specified will override its respective member variable.
|
5618
|
|
|
|
|
|
|
|
5619
|
|
|
|
|
|
|
Note: If no parameters are specified, this module executes word2vec training based on preset
|
5620
|
|
|
|
|
|
|
member variables. Returns string regarding training status.
|
5621
|
|
|
|
|
|
|
|
5622
|
|
|
|
|
|
|
Input:
|
5623
|
|
|
|
|
|
|
|
5624
|
|
|
|
|
|
|
$trainingStr -> String to train with word2vec.
|
5625
|
|
|
|
|
|
|
$outputFilePath -> Specifies word2vec trained output data file name and save path. (String)
|
5626
|
|
|
|
|
|
|
$vectorSize -> Size of word2vec word vectors. (Integer)
|
5627
|
|
|
|
|
|
|
$windowSize -> Maximum skip length between words. (Integer)
|
5628
|
|
|
|
|
|
|
$minCount -> Disregard words that appear less than $minCount times. (Integer)
|
5629
|
|
|
|
|
|
|
$sample -> Threshold for occurrence of words. Those that appear with higher frequency in the training data will be randomly down-sampled. (Float)
|
5630
|
|
|
|
|
|
|
$negative -> Number of negative examples. (Integer)
|
5631
|
|
|
|
|
|
|
$alpha -> Set that start learning rate. (Float)
|
5632
|
|
|
|
|
|
|
$hs -> Hierarchical Soft-max (Integer)
|
5633
|
|
|
|
|
|
|
$binary -> Save trained data as binary mode. (Integer)
|
5634
|
|
|
|
|
|
|
$numOfThreads -> Number of word2vec training threads. (Integer)
|
5635
|
|
|
|
|
|
|
$iterations -> Number of training iterations to run prior to completion of training. (Integer)
|
5636
|
|
|
|
|
|
|
$useCBOW -> Enable Continuous Bag Of Words model or Skip-Gram model. (Integer)
|
5637
|
|
|
|
|
|
|
$classes -> Output word classes rather than word vectors. (Integer)
|
5638
|
|
|
|
|
|
|
$readVocab -> Read vocabulary from file path without constructing from training data. (String)
|
5639
|
|
|
|
|
|
|
$saveVocab -> Save vocabulary to file path. (String)
|
5640
|
|
|
|
|
|
|
$debug -> Set word2vec debug mode. (Integer)
|
5641
|
|
|
|
|
|
|
$overwrite -> Instructs the module to either overwrite any existing text corpus files or append to the existing file. ( '1' = True / '0' = False )
|
5642
|
|
|
|
|
|
|
|
5643
|
|
|
|
|
|
|
Note: It is not recommended to specify all new() parameters, as it has not been thoroughly tested.
|
5644
|
|
|
|
|
|
|
|
5645
|
|
|
|
|
|
|
Output:
|
5646
|
|
|
|
|
|
|
|
5647
|
|
|
|
|
|
|
$value -> '0' = Successful / '-1' = Un-successful
|
5648
|
|
|
|
|
|
|
|
5649
|
|
|
|
|
|
|
Example:
|
5650
|
|
|
|
|
|
|
|
5651
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5652
|
|
|
|
|
|
|
|
5653
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
5654
|
|
|
|
|
|
|
$interface->W2VSetOutputFilePath( "vectors.bin" );
|
5655
|
|
|
|
|
|
|
$interface->W2VSetWordVecSize( 200 );
|
5656
|
|
|
|
|
|
|
$interface->W2VSetWindowSize( 8 );
|
5657
|
|
|
|
|
|
|
$interface->W2VSetSample( 0.0001 );
|
5658
|
|
|
|
|
|
|
$interface->W2VSetNegative( 25 );
|
5659
|
|
|
|
|
|
|
$interface->W2VSetHSoftMax( 0 );
|
5660
|
|
|
|
|
|
|
$interface->W2VSetBinaryOutput( 0 );
|
5661
|
|
|
|
|
|
|
$interface->W2VSetNumOfThreads( 20 );
|
5662
|
|
|
|
|
|
|
$interface->W2VSetNumOfIterations( 15 );
|
5663
|
|
|
|
|
|
|
$interface->W2VSetUseCBOW( 1 );
|
5664
|
|
|
|
|
|
|
$interface->W2VSetOverwriteOldFile( 0 );
|
5665
|
|
|
|
|
|
|
$interface->W2VExecuteStringTraining( "string to train here" );
|
5666
|
|
|
|
|
|
|
|
5667
|
|
|
|
|
|
|
undef( $interface );
|
5668
|
|
|
|
|
|
|
|
5669
|
|
|
|
|
|
|
# or
|
5670
|
|
|
|
|
|
|
|
5671
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5672
|
|
|
|
|
|
|
|
5673
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
5674
|
|
|
|
|
|
|
$interface->W2VExecuteStringTraining( "string to train here", "vectors.bin", 200, 8, 5, 0.001, 25, 0.05, 0, 0, 20, 15, 1, 0, "", "", 2, 0 );
|
5675
|
|
|
|
|
|
|
|
5676
|
|
|
|
|
|
|
undef( $interface );
|
5677
|
|
|
|
|
|
|
|
5678
|
|
|
|
|
|
|
=head3 W2VComputeCosineSimilarity
|
5679
|
|
|
|
|
|
|
|
5680
|
|
|
|
|
|
|
Description:
|
5681
|
|
|
|
|
|
|
|
5682
|
|
|
|
|
|
|
Computes cosine similarity between two words using trained word2vec vector data. Returns
|
5683
|
|
|
|
|
|
|
float value or undefined if one or more words are not in the dictionary.
|
5684
|
|
|
|
|
|
|
|
5685
|
|
|
|
|
|
|
Note: Supports single words only and requires vector data to be in memory with W2VReadTrainedVectorDataFromFile() prior to function execution.
|
5686
|
|
|
|
|
|
|
|
5687
|
|
|
|
|
|
|
Input:
|
5688
|
|
|
|
|
|
|
|
5689
|
|
|
|
|
|
|
$string -> Single string word
|
5690
|
|
|
|
|
|
|
$string -> Single string word
|
5691
|
|
|
|
|
|
|
|
5692
|
|
|
|
|
|
|
Output:
|
5693
|
|
|
|
|
|
|
|
5694
|
|
|
|
|
|
|
$value -> Float or Undefined
|
5695
|
|
|
|
|
|
|
|
5696
|
|
|
|
|
|
|
Example:
|
5697
|
|
|
|
|
|
|
|
5698
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5699
|
|
|
|
|
|
|
|
5700
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
5701
|
|
|
|
|
|
|
$interface->W2VReadTrainedVectorDataFromFile( "samples/samplevectors.bin" );
|
5702
|
|
|
|
|
|
|
print "Cosine similarity between words: \"of\" and \"the\": " . $interface->W2VComputeCosineSimilarity( "of", "the" ) . "\n";
|
5703
|
|
|
|
|
|
|
|
5704
|
|
|
|
|
|
|
undef( $interface );
|
5705
|
|
|
|
|
|
|
|
5706
|
|
|
|
|
|
|
=head3 W2VComputeAvgOfWordsCosineSimilarity
|
5707
|
|
|
|
|
|
|
|
5708
|
|
|
|
|
|
|
Description:
|
5709
|
|
|
|
|
|
|
|
5710
|
|
|
|
|
|
|
Computes cosine similarity between two words or compound words using trained word2vec vector data.
|
5711
|
|
|
|
|
|
|
Returns float value or undefined.
|
5712
|
|
|
|
|
|
|
|
5713
|
|
|
|
|
|
|
Note: Supports multiple words concatenated by ' ' and requires vector data to be in memory prior
|
5714
|
|
|
|
|
|
|
to method execution. This method will not error out when a word is not located within the dictionary.
|
5715
|
|
|
|
|
|
|
It will take the average of all found words for each parameter then cosine similarity of both word vectors.
|
5716
|
|
|
|
|
|
|
|
5717
|
|
|
|
|
|
|
Input:
|
5718
|
|
|
|
|
|
|
|
5719
|
|
|
|
|
|
|
$string -> string of single or multiple words separated by ' ' (space).
|
5720
|
|
|
|
|
|
|
$string -> string of single or multiple words separated by ' ' (space).
|
5721
|
|
|
|
|
|
|
|
5722
|
|
|
|
|
|
|
Output:
|
5723
|
|
|
|
|
|
|
|
5724
|
|
|
|
|
|
|
$value -> Float or Undefined
|
5725
|
|
|
|
|
|
|
|
5726
|
|
|
|
|
|
|
Example:
|
5727
|
|
|
|
|
|
|
|
5728
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5729
|
|
|
|
|
|
|
|
5730
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
5731
|
|
|
|
|
|
|
$interface->W2VReadTrainedVectorDataFromFile( "samples/samplevectors.bin" );
|
5732
|
|
|
|
|
|
|
print "Cosine similarity between words: \"heart attack\" and \"acute myocardial infarction\": " .
|
5733
|
|
|
|
|
|
|
$interface->W2VComputeAvgOfWordsCosineSimilarity( "heart attack", "acute myocardial infarction" ) . "\n";
|
5734
|
|
|
|
|
|
|
|
5735
|
|
|
|
|
|
|
undef( $interface );
|
5736
|
|
|
|
|
|
|
|
5737
|
|
|
|
|
|
|
=head3 W2VComputeMultiWordCosineSimilarity
|
5738
|
|
|
|
|
|
|
|
5739
|
|
|
|
|
|
|
Description:
|
5740
|
|
|
|
|
|
|
|
5741
|
|
|
|
|
|
|
Computes cosine similarity between two words or compound words using trained word2vec vector data.
|
5742
|
|
|
|
|
|
|
Returns float value or undefined if one or more words are not in the dictionary.
|
5743
|
|
|
|
|
|
|
|
5744
|
|
|
|
|
|
|
Note: Supports multiple words concatenated by ' ' and requires vector data to be in memory prior to method execution.
|
5745
|
|
|
|
|
|
|
This function will error out when a specified word is not found and return undefined.
|
5746
|
|
|
|
|
|
|
|
5747
|
|
|
|
|
|
|
Input:
|
5748
|
|
|
|
|
|
|
|
5749
|
|
|
|
|
|
|
$string -> string of single or multiple words separated by ' ' (space).
|
5750
|
|
|
|
|
|
|
$string -> string of single or multiple words separated by ' ' (space).
|
5751
|
|
|
|
|
|
|
|
5752
|
|
|
|
|
|
|
Output:
|
5753
|
|
|
|
|
|
|
|
5754
|
|
|
|
|
|
|
$value -> Float or Undefined
|
5755
|
|
|
|
|
|
|
|
5756
|
|
|
|
|
|
|
Example:
|
5757
|
|
|
|
|
|
|
|
5758
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5759
|
|
|
|
|
|
|
|
5760
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
5761
|
|
|
|
|
|
|
$interface->W2VReadTrainedVectorDataFromFile( "samples/samplevectors.bin" );
|
5762
|
|
|
|
|
|
|
print "Cosine similarity between words: \"heart attack\" and \"acute myocardial infarction\": " .
|
5763
|
|
|
|
|
|
|
$interface->W2VComputeMultiWordCosineSimilarity( "heart attack", "acute myocardial infarction" ) . "\n";
|
5764
|
|
|
|
|
|
|
|
5765
|
|
|
|
|
|
|
undef( $interface );
|
5766
|
|
|
|
|
|
|
|
5767
|
|
|
|
|
|
|
=head3 W2VComputeCosineSimilarityOfWordVectors
|
5768
|
|
|
|
|
|
|
|
5769
|
|
|
|
|
|
|
Description:
|
5770
|
|
|
|
|
|
|
|
5771
|
|
|
|
|
|
|
Computes cosine similarity between two word vectors.
|
5772
|
|
|
|
|
|
|
Returns float value or undefined if one or more words are not in the dictionary.
|
5773
|
|
|
|
|
|
|
|
5774
|
|
|
|
|
|
|
Note: Function parameters require actual word vector data with words removed.
|
5775
|
|
|
|
|
|
|
|
5776
|
|
|
|
|
|
|
Input:
|
5777
|
|
|
|
|
|
|
|
5778
|
|
|
|
|
|
|
$string -> string of word vector representation data separated by ' ' (space).
|
5779
|
|
|
|
|
|
|
$string -> string of word vector representation data separated by ' ' (space).
|
5780
|
|
|
|
|
|
|
|
5781
|
|
|
|
|
|
|
Output:
|
5782
|
|
|
|
|
|
|
|
5783
|
|
|
|
|
|
|
$value -> Float or Undefined
|
5784
|
|
|
|
|
|
|
|
5785
|
|
|
|
|
|
|
Example:
|
5786
|
|
|
|
|
|
|
|
5787
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5788
|
|
|
|
|
|
|
|
5789
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
5790
|
|
|
|
|
|
|
$interface->W2VReadTrainedVectorDataFromFile( "samples/samplevectors.bin" );
|
5791
|
|
|
|
|
|
|
my $vectorAData = $interface->W2VGetWordVector( "heart" );
|
5792
|
|
|
|
|
|
|
my $vectorBData = $interface->W2VGetWordVector( "attack" );
|
5793
|
|
|
|
|
|
|
|
5794
|
|
|
|
|
|
|
# Remove Words From Data
|
5795
|
|
|
|
|
|
|
$vectorAData = RemoveWordFromWordVectorString( $vectorAData );
|
5796
|
|
|
|
|
|
|
$vectorBData = RemoveWordFromWordVectorString( $vectorBData );
|
5797
|
|
|
|
|
|
|
|
5798
|
|
|
|
|
|
|
undef( @tempAry );
|
5799
|
|
|
|
|
|
|
|
5800
|
|
|
|
|
|
|
print "Cosine similarity between words: \"heart\" and \"attack\": " .
|
5801
|
|
|
|
|
|
|
$interface->W2VComputeCosineSimilarityOfWordVectors( $vectorAData, $vectorBData ) . "\n";
|
5802
|
|
|
|
|
|
|
|
5803
|
|
|
|
|
|
|
undef( $interface );
|
5804
|
|
|
|
|
|
|
|
5805
|
|
|
|
|
|
|
=head3 W2VCosSimWithUserInput
|
5806
|
|
|
|
|
|
|
|
5807
|
|
|
|
|
|
|
Description:
|
5808
|
|
|
|
|
|
|
|
5809
|
|
|
|
|
|
|
Computes cosine similarity between two words using trained word2vec vector data based on user input.
|
5810
|
|
|
|
|
|
|
|
5811
|
|
|
|
|
|
|
Note: No compound word support.
|
5812
|
|
|
|
|
|
|
|
5813
|
|
|
|
|
|
|
Warning: Requires vector data to be in memory prior to method execution.
|
5814
|
|
|
|
|
|
|
|
5815
|
|
|
|
|
|
|
Input:
|
5816
|
|
|
|
|
|
|
|
5817
|
|
|
|
|
|
|
None
|
5818
|
|
|
|
|
|
|
|
5819
|
|
|
|
|
|
|
Output:
|
5820
|
|
|
|
|
|
|
|
5821
|
|
|
|
|
|
|
None
|
5822
|
|
|
|
|
|
|
|
5823
|
|
|
|
|
|
|
Example:
|
5824
|
|
|
|
|
|
|
|
5825
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5826
|
|
|
|
|
|
|
|
5827
|
|
|
|
|
|
|
my $interface = Word2vec::Word2vec->new();
|
5828
|
|
|
|
|
|
|
$interface->W2VReadTrainedVectorDataFromFile( "samples/samplevectors.bin" );
|
5829
|
|
|
|
|
|
|
$interface->W2VCosSimWIthUserInputTest();
|
5830
|
|
|
|
|
|
|
|
5831
|
|
|
|
|
|
|
undef( $interface );
|
5832
|
|
|
|
|
|
|
|
5833
|
|
|
|
|
|
|
=head3 W2VMultiWordCosSimWithUserInput
|
5834
|
|
|
|
|
|
|
|
5835
|
|
|
|
|
|
|
Description:
|
5836
|
|
|
|
|
|
|
|
5837
|
|
|
|
|
|
|
Computes cosine similarity between two words or compound words using trained word2vec vector data based on user input.
|
5838
|
|
|
|
|
|
|
|
5839
|
|
|
|
|
|
|
Note: Supports multiple words concatenated by ':'.
|
5840
|
|
|
|
|
|
|
|
5841
|
|
|
|
|
|
|
Warning: Requires vector data to be in memory prior to method execution.
|
5842
|
|
|
|
|
|
|
|
5843
|
|
|
|
|
|
|
Input:
|
5844
|
|
|
|
|
|
|
|
5845
|
|
|
|
|
|
|
None
|
5846
|
|
|
|
|
|
|
|
5847
|
|
|
|
|
|
|
Output:
|
5848
|
|
|
|
|
|
|
|
5849
|
|
|
|
|
|
|
None
|
5850
|
|
|
|
|
|
|
|
5851
|
|
|
|
|
|
|
Example:
|
5852
|
|
|
|
|
|
|
|
5853
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5854
|
|
|
|
|
|
|
|
5855
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
5856
|
|
|
|
|
|
|
$interface->W2VReadTrainedVectorDataFromFile( "samples/samplevectors.bin" );
|
5857
|
|
|
|
|
|
|
$interface->W2VMultiWordCosSimWithUserInput();
|
5858
|
|
|
|
|
|
|
|
5859
|
|
|
|
|
|
|
undef( $interface );
|
5860
|
|
|
|
|
|
|
|
5861
|
|
|
|
|
|
|
|
5862
|
|
|
|
|
|
|
=head3 W2VComputeAverageOfWords
|
5863
|
|
|
|
|
|
|
|
5864
|
|
|
|
|
|
|
Description:
|
5865
|
|
|
|
|
|
|
|
5866
|
|
|
|
|
|
|
Computes cosine similarity average of all found words given an array reference parameter of
|
5867
|
|
|
|
|
|
|
plain text words. Returns average values (string) or undefined.
|
5868
|
|
|
|
|
|
|
|
5869
|
|
|
|
|
|
|
Warning: Requires vector data to be in memory prior to method execution.
|
5870
|
|
|
|
|
|
|
|
5871
|
|
|
|
|
|
|
Input:
|
5872
|
|
|
|
|
|
|
|
5873
|
|
|
|
|
|
|
$arrayReference -> Array reference of words
|
5874
|
|
|
|
|
|
|
|
5875
|
|
|
|
|
|
|
Output:
|
5876
|
|
|
|
|
|
|
|
5877
|
|
|
|
|
|
|
$string -> String of word2vec word average values
|
5878
|
|
|
|
|
|
|
|
5879
|
|
|
|
|
|
|
Example:
|
5880
|
|
|
|
|
|
|
|
5881
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5882
|
|
|
|
|
|
|
|
5883
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
5884
|
|
|
|
|
|
|
$interface->W2VReadTrainedVectorDataFromFile( "samples/samplevectors.bin" );
|
5885
|
|
|
|
|
|
|
my @wordAry = qw( of the and );
|
5886
|
|
|
|
|
|
|
my $data = $interface->W2VComputeAverageOfWords( \@wordAry );
|
5887
|
|
|
|
|
|
|
print( "Computed Average Of Words: $data" ) if defined( $data );
|
5888
|
|
|
|
|
|
|
|
5889
|
|
|
|
|
|
|
undef( $interface );
|
5890
|
|
|
|
|
|
|
|
5891
|
|
|
|
|
|
|
=head3 W2VAddTwoWords
|
5892
|
|
|
|
|
|
|
|
5893
|
|
|
|
|
|
|
Description:
|
5894
|
|
|
|
|
|
|
|
5895
|
|
|
|
|
|
|
Adds two word vectors and returns the result.
|
5896
|
|
|
|
|
|
|
|
5897
|
|
|
|
|
|
|
Warning: This method also requires vector data to be in memory prior to method execution.
|
5898
|
|
|
|
|
|
|
|
5899
|
|
|
|
|
|
|
Input:
|
5900
|
|
|
|
|
|
|
|
5901
|
|
|
|
|
|
|
$string -> Word to add
|
5902
|
|
|
|
|
|
|
$string -> Word to add
|
5903
|
|
|
|
|
|
|
|
5904
|
|
|
|
|
|
|
Output:
|
5905
|
|
|
|
|
|
|
|
5906
|
|
|
|
|
|
|
$string -> String of word2vec summed word values
|
5907
|
|
|
|
|
|
|
|
5908
|
|
|
|
|
|
|
Example:
|
5909
|
|
|
|
|
|
|
|
5910
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5911
|
|
|
|
|
|
|
|
5912
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
5913
|
|
|
|
|
|
|
$interface->W2VReadTrainedVectorDataFromFile( "samples/samplevectors.bin" );
|
5914
|
|
|
|
|
|
|
|
5915
|
|
|
|
|
|
|
my $data = $interface->W2VAddTwoWords( "heart", "attack" );
|
5916
|
|
|
|
|
|
|
print( "Computed Sum Of Words: $data" ) if defined( $data );
|
5917
|
|
|
|
|
|
|
|
5918
|
|
|
|
|
|
|
undef( $interface );
|
5919
|
|
|
|
|
|
|
|
5920
|
|
|
|
|
|
|
=head3 W2VSubtractTwoWords
|
5921
|
|
|
|
|
|
|
|
5922
|
|
|
|
|
|
|
Description:
|
5923
|
|
|
|
|
|
|
|
5924
|
|
|
|
|
|
|
Subtracts two word vectors and returns the result.
|
5925
|
|
|
|
|
|
|
|
5926
|
|
|
|
|
|
|
Warning: This method also requires vector data to be in memory prior to method execution.
|
5927
|
|
|
|
|
|
|
|
5928
|
|
|
|
|
|
|
Input:
|
5929
|
|
|
|
|
|
|
|
5930
|
|
|
|
|
|
|
$string -> Word to subtract
|
5931
|
|
|
|
|
|
|
$string -> Word to subtract
|
5932
|
|
|
|
|
|
|
|
5933
|
|
|
|
|
|
|
Output:
|
5934
|
|
|
|
|
|
|
|
5935
|
|
|
|
|
|
|
$string -> String of word2vec difference between word values
|
5936
|
|
|
|
|
|
|
|
5937
|
|
|
|
|
|
|
Example:
|
5938
|
|
|
|
|
|
|
|
5939
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5940
|
|
|
|
|
|
|
|
5941
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
5942
|
|
|
|
|
|
|
$interface->W2VReadTrainedVectorDataFromFile( "samples/samplevectors.bin" );
|
5943
|
|
|
|
|
|
|
|
5944
|
|
|
|
|
|
|
my $data = $interface->W2VSubtractTwoWords( "king", "man" );
|
5945
|
|
|
|
|
|
|
print( "Computed Difference Of Words: $data" ) if defined( $data );
|
5946
|
|
|
|
|
|
|
|
5947
|
|
|
|
|
|
|
undef( $interface );
|
5948
|
|
|
|
|
|
|
|
5949
|
|
|
|
|
|
|
|
5950
|
|
|
|
|
|
|
=head3 W2VAddTwoWordVectors
|
5951
|
|
|
|
|
|
|
|
5952
|
|
|
|
|
|
|
Description:
|
5953
|
|
|
|
|
|
|
|
5954
|
|
|
|
|
|
|
Adds two vector data strings and returns the result.
|
5955
|
|
|
|
|
|
|
|
5956
|
|
|
|
|
|
|
Warning: Text word must be removed from vector data prior to calling this method. This method
|
5957
|
|
|
|
|
|
|
also requires vector data to be in memory prior to method execution.
|
5958
|
|
|
|
|
|
|
|
5959
|
|
|
|
|
|
|
Input:
|
5960
|
|
|
|
|
|
|
|
5961
|
|
|
|
|
|
|
$string -> Word2vec word vector data (with string word removed)
|
5962
|
|
|
|
|
|
|
$string -> Word2vec word vector data (with string word removed)
|
5963
|
|
|
|
|
|
|
|
5964
|
|
|
|
|
|
|
Output:
|
5965
|
|
|
|
|
|
|
|
5966
|
|
|
|
|
|
|
$string -> String of word2vec summed word values
|
5967
|
|
|
|
|
|
|
|
5968
|
|
|
|
|
|
|
Example:
|
5969
|
|
|
|
|
|
|
|
5970
|
|
|
|
|
|
|
use Word2vec::Interface;
|
5971
|
|
|
|
|
|
|
|
5972
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
5973
|
|
|
|
|
|
|
$interface->W2VReadTrainedVectorDataFromFile( "samples/samplevectors.bin" );
|
5974
|
|
|
|
|
|
|
my $wordAData = $interface->W2VGetWordVector( "of" );
|
5975
|
|
|
|
|
|
|
my $wordBData = $interface->W2VGetWordVector( "the" );
|
5976
|
|
|
|
|
|
|
|
5977
|
|
|
|
|
|
|
# Removing Words From Vector Data
|
5978
|
|
|
|
|
|
|
$wordAData = RemoveWordFromWordVectorString( $wordAData );
|
5979
|
|
|
|
|
|
|
$wordBData = RemoveWordFromWordVectorString( $wordBData );
|
5980
|
|
|
|
|
|
|
|
5981
|
|
|
|
|
|
|
my $data = $interface->W2VAddTwoWordVectors( $wordAData, $wordBData );
|
5982
|
|
|
|
|
|
|
print( "Computed Sum Of Words: $data" ) if defined( $data );
|
5983
|
|
|
|
|
|
|
|
5984
|
|
|
|
|
|
|
undef( $interface );
|
5985
|
|
|
|
|
|
|
|
5986
|
|
|
|
|
|
|
=head3 W2VSubtractTwoWordVectors
|
5987
|
|
|
|
|
|
|
|
5988
|
|
|
|
|
|
|
Description:
|
5989
|
|
|
|
|
|
|
|
5990
|
|
|
|
|
|
|
Subtracts two vector data strings and returns the result.
|
5991
|
|
|
|
|
|
|
|
5992
|
|
|
|
|
|
|
Warning: Text word must be removed from vector data prior to calling this method. This method
|
5993
|
|
|
|
|
|
|
also requires vector data to be in memory prior to method execution.
|
5994
|
|
|
|
|
|
|
|
5995
|
|
|
|
|
|
|
Input:
|
5996
|
|
|
|
|
|
|
|
5997
|
|
|
|
|
|
|
$string -> Word2vec word vector data (with string word removed)
|
5998
|
|
|
|
|
|
|
$string -> Word2vec word vector data (with string word removed)
|
5999
|
|
|
|
|
|
|
|
6000
|
|
|
|
|
|
|
Output:
|
6001
|
|
|
|
|
|
|
|
6002
|
|
|
|
|
|
|
$string -> String of word2vec difference between word values
|
6003
|
|
|
|
|
|
|
|
6004
|
|
|
|
|
|
|
Example:
|
6005
|
|
|
|
|
|
|
|
6006
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6007
|
|
|
|
|
|
|
|
6008
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6009
|
|
|
|
|
|
|
$interface->W2VReadTrainedVectorDataFromFile( "samples/samplevectors.bin" );
|
6010
|
|
|
|
|
|
|
my $wordAData = $interface->W2VGetWordVector( "of" );
|
6011
|
|
|
|
|
|
|
my $wordBData = $interface->W2VGetWordVector( "the" );
|
6012
|
|
|
|
|
|
|
|
6013
|
|
|
|
|
|
|
# Removing Words From Vector Data
|
6014
|
|
|
|
|
|
|
$wordAData = RemoveWordFromWordVectorString( $wordAData );
|
6015
|
|
|
|
|
|
|
$wordBData = RemoveWordFromWordVectorString( $wordBData );
|
6016
|
|
|
|
|
|
|
|
6017
|
|
|
|
|
|
|
my $data = $interface->W2VSubtractTwoWordVectors( $wordAData, $wordBData );
|
6018
|
|
|
|
|
|
|
print( "Computed Difference Of Words: $data" ) if defined( $data );
|
6019
|
|
|
|
|
|
|
|
6020
|
|
|
|
|
|
|
undef( $interface );
|
6021
|
|
|
|
|
|
|
|
6022
|
|
|
|
|
|
|
=head3 W2VAverageOfTwoWordVectors
|
6023
|
|
|
|
|
|
|
|
6024
|
|
|
|
|
|
|
Description:
|
6025
|
|
|
|
|
|
|
|
6026
|
|
|
|
|
|
|
Computes the average of two vector data strings and returns the result.
|
6027
|
|
|
|
|
|
|
|
6028
|
|
|
|
|
|
|
Warning: Text word must be removed from vector data prior to calling this method. This method
|
6029
|
|
|
|
|
|
|
also requires vector data to be in memory prior to method execution.
|
6030
|
|
|
|
|
|
|
|
6031
|
|
|
|
|
|
|
Input:
|
6032
|
|
|
|
|
|
|
|
6033
|
|
|
|
|
|
|
$string -> Word2vec word vector data (with string word removed)
|
6034
|
|
|
|
|
|
|
$string -> Word2vec word vector data (with string word removed)
|
6035
|
|
|
|
|
|
|
|
6036
|
|
|
|
|
|
|
Output:
|
6037
|
|
|
|
|
|
|
|
6038
|
|
|
|
|
|
|
$string -> String of word2vec average between word values
|
6039
|
|
|
|
|
|
|
|
6040
|
|
|
|
|
|
|
Example:
|
6041
|
|
|
|
|
|
|
|
6042
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6043
|
|
|
|
|
|
|
|
6044
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6045
|
|
|
|
|
|
|
$interface->W2VReadTrainedVectorDataFromFile( "samples/samplevectors.bin" );
|
6046
|
|
|
|
|
|
|
my $wordAData = $interface->W2VGetWordVector( "of" );
|
6047
|
|
|
|
|
|
|
my $wordBData = $interface->W2VGetWordVector( "the" );
|
6048
|
|
|
|
|
|
|
|
6049
|
|
|
|
|
|
|
# Removing Words From Vector Data
|
6050
|
|
|
|
|
|
|
$wordAData = RemoveWordFromWordVectorString( $wordAData );
|
6051
|
|
|
|
|
|
|
$wordBData = RemoveWordFromWordVectorString( $wordBData );
|
6052
|
|
|
|
|
|
|
|
6053
|
|
|
|
|
|
|
my $data = $interface->W2VAverageOfTwoWordVectors( $wordAData, $wordBData );
|
6054
|
|
|
|
|
|
|
print( "Computed Average Of Words: $data" ) if defined( $data );
|
6055
|
|
|
|
|
|
|
|
6056
|
|
|
|
|
|
|
undef( $interface );
|
6057
|
|
|
|
|
|
|
|
6058
|
|
|
|
|
|
|
=head3 W2VGetWordVector
|
6059
|
|
|
|
|
|
|
|
6060
|
|
|
|
|
|
|
Description:
|
6061
|
|
|
|
|
|
|
|
6062
|
|
|
|
|
|
|
Searches dictionary in memory for the specified string argument and returns the vector data.
|
6063
|
|
|
|
|
|
|
Returns undefined if not found.
|
6064
|
|
|
|
|
|
|
|
6065
|
|
|
|
|
|
|
Warning: Requires vector data to be in memory prior to method execution.
|
6066
|
|
|
|
|
|
|
|
6067
|
|
|
|
|
|
|
Input:
|
6068
|
|
|
|
|
|
|
|
6069
|
|
|
|
|
|
|
$string -> Word to locate in word2vec vocabulary/dictionary
|
6070
|
|
|
|
|
|
|
|
6071
|
|
|
|
|
|
|
Output:
|
6072
|
|
|
|
|
|
|
|
6073
|
|
|
|
|
|
|
$string -> Found word2vec word + word vector data or undefined.
|
6074
|
|
|
|
|
|
|
|
6075
|
|
|
|
|
|
|
Example:
|
6076
|
|
|
|
|
|
|
|
6077
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6078
|
|
|
|
|
|
|
|
6079
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6080
|
|
|
|
|
|
|
$interface->W2VReadTrainedVectorDataFromFile( "samples/samplevectors.bin" );
|
6081
|
|
|
|
|
|
|
my $wordData = $interface->W2VGetWordVector( "of" );
|
6082
|
|
|
|
|
|
|
print( "Word2vec Word Data: $wordData\n" ) if defined( $wordData );
|
6083
|
|
|
|
|
|
|
|
6084
|
|
|
|
|
|
|
undef( $interface );
|
6085
|
|
|
|
|
|
|
|
6086
|
|
|
|
|
|
|
=head3 W2VIsVectorDataInMemory
|
6087
|
|
|
|
|
|
|
|
6088
|
|
|
|
|
|
|
Description:
|
6089
|
|
|
|
|
|
|
|
6090
|
|
|
|
|
|
|
Checks to see if vector data has been loaded in memory.
|
6091
|
|
|
|
|
|
|
|
6092
|
|
|
|
|
|
|
Input:
|
6093
|
|
|
|
|
|
|
|
6094
|
|
|
|
|
|
|
None
|
6095
|
|
|
|
|
|
|
|
6096
|
|
|
|
|
|
|
Output:
|
6097
|
|
|
|
|
|
|
|
6098
|
|
|
|
|
|
|
$value -> '1' = True / '0' = False
|
6099
|
|
|
|
|
|
|
|
6100
|
|
|
|
|
|
|
Example:
|
6101
|
|
|
|
|
|
|
|
6102
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6103
|
|
|
|
|
|
|
|
6104
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6105
|
|
|
|
|
|
|
my $result = $interface->W2VIsVectorDataInMemory();
|
6106
|
|
|
|
|
|
|
|
6107
|
|
|
|
|
|
|
print( "No vector data in memory\n" ) if $result == 0;
|
6108
|
|
|
|
|
|
|
print( "Yes vector data in memory\n" ) if $result == 1;
|
6109
|
|
|
|
|
|
|
|
6110
|
|
|
|
|
|
|
$interface->W2VReadTrainedVectorDataFromFile( "samples/samplevectors.bin" );
|
6111
|
|
|
|
|
|
|
|
6112
|
|
|
|
|
|
|
print( "No vector data in memory\n" ) if $result == 0;
|
6113
|
|
|
|
|
|
|
print( "Yes vector data in memory\n" ) if $result == 1;
|
6114
|
|
|
|
|
|
|
|
6115
|
|
|
|
|
|
|
undef( $interface );
|
6116
|
|
|
|
|
|
|
|
6117
|
|
|
|
|
|
|
=head3 W2VIsVectorDataSorted
|
6118
|
|
|
|
|
|
|
|
6119
|
|
|
|
|
|
|
Description:
|
6120
|
|
|
|
|
|
|
|
6121
|
|
|
|
|
|
|
Checks to see if vector data header is signed as sorted in memory.
|
6122
|
|
|
|
|
|
|
|
6123
|
|
|
|
|
|
|
Input:
|
6124
|
|
|
|
|
|
|
|
6125
|
|
|
|
|
|
|
None
|
6126
|
|
|
|
|
|
|
|
6127
|
|
|
|
|
|
|
Output:
|
6128
|
|
|
|
|
|
|
|
6129
|
|
|
|
|
|
|
$value -> '1' = True / '0' = False
|
6130
|
|
|
|
|
|
|
|
6131
|
|
|
|
|
|
|
Example:
|
6132
|
|
|
|
|
|
|
|
6133
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6134
|
|
|
|
|
|
|
|
6135
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6136
|
|
|
|
|
|
|
$interface->W2VReadTrainedVectorDataFromFile( "samples/samplevectors.bin" );
|
6137
|
|
|
|
|
|
|
|
6138
|
|
|
|
|
|
|
my $result = $interface->IsVectorDataSorted();
|
6139
|
|
|
|
|
|
|
|
6140
|
|
|
|
|
|
|
print( "No vector data is not sorted\n" ) if $result == 0;
|
6141
|
|
|
|
|
|
|
print( "Yes vector data is sorted\n" ) if $result == 1;
|
6142
|
|
|
|
|
|
|
|
6143
|
|
|
|
|
|
|
undef( $interface );
|
6144
|
|
|
|
|
|
|
|
6145
|
|
|
|
|
|
|
=head3 W2VCheckWord2VecDataFileType
|
6146
|
|
|
|
|
|
|
|
6147
|
|
|
|
|
|
|
Description:
|
6148
|
|
|
|
|
|
|
|
6149
|
|
|
|
|
|
|
Checks specified file to see if vector data is in binary or plain text format. Returns 'text'
|
6150
|
|
|
|
|
|
|
for plain text and 'binary' for binary data.
|
6151
|
|
|
|
|
|
|
|
6152
|
|
|
|
|
|
|
Input:
|
6153
|
|
|
|
|
|
|
|
6154
|
|
|
|
|
|
|
$string -> File path
|
6155
|
|
|
|
|
|
|
|
6156
|
|
|
|
|
|
|
Output:
|
6157
|
|
|
|
|
|
|
|
6158
|
|
|
|
|
|
|
$string -> File Type ( "text" = Plain text file / "binary" = Binary data file )
|
6159
|
|
|
|
|
|
|
|
6160
|
|
|
|
|
|
|
Example:
|
6161
|
|
|
|
|
|
|
|
6162
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6163
|
|
|
|
|
|
|
|
6164
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6165
|
|
|
|
|
|
|
my $fileType = $interface->W2VCheckWord2VecDataFileType( "samples/samplevectors.bin" );
|
6166
|
|
|
|
|
|
|
|
6167
|
|
|
|
|
|
|
print( "FileType: $fileType\n" ) if defined( $fileType );
|
6168
|
|
|
|
|
|
|
|
6169
|
|
|
|
|
|
|
undef( $fileType );
|
6170
|
|
|
|
|
|
|
|
6171
|
|
|
|
|
|
|
=head3 W2VReadTrainedVectorDataFromFile
|
6172
|
|
|
|
|
|
|
|
6173
|
|
|
|
|
|
|
Description:
|
6174
|
|
|
|
|
|
|
|
6175
|
|
|
|
|
|
|
Reads trained vector data from file path in memory. This function supports and
|
6176
|
|
|
|
|
|
|
automatically detects word2vec binary, plain text and sparse vector data formats.
|
6177
|
|
|
|
|
|
|
|
6178
|
|
|
|
|
|
|
Input:
|
6179
|
|
|
|
|
|
|
|
6180
|
|
|
|
|
|
|
$string -> Word2vec trained vector data file path
|
6181
|
|
|
|
|
|
|
|
6182
|
|
|
|
|
|
|
Output:
|
6183
|
|
|
|
|
|
|
|
6184
|
|
|
|
|
|
|
$value -> '0' = Successful / '-1' = Un-successful
|
6185
|
|
|
|
|
|
|
|
6186
|
|
|
|
|
|
|
Example:
|
6187
|
|
|
|
|
|
|
|
6188
|
|
|
|
|
|
|
# Loading data in a Binary Search Tree
|
6189
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6190
|
|
|
|
|
|
|
|
6191
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6192
|
|
|
|
|
|
|
my $result = $interface->W2VReadTrainedVectorDataFromFile( "samples/samplevectors.bin" );
|
6193
|
|
|
|
|
|
|
|
6194
|
|
|
|
|
|
|
print( "Success Loading Data\n" ) if $result == 0;
|
6195
|
|
|
|
|
|
|
print( "Un-successful, Data Not Loaded\n" ) if $result == -1;
|
6196
|
|
|
|
|
|
|
|
6197
|
|
|
|
|
|
|
undef( $interface );
|
6198
|
|
|
|
|
|
|
|
6199
|
|
|
|
|
|
|
=head3 W2VSaveTrainedVectorDataToFile
|
6200
|
|
|
|
|
|
|
|
6201
|
|
|
|
|
|
|
Description:
|
6202
|
|
|
|
|
|
|
|
6203
|
|
|
|
|
|
|
Saves trained vector data at the location in specified format.
|
6204
|
|
|
|
|
|
|
|
6205
|
|
|
|
|
|
|
Note: Leaving 'saveFormat' undefined will automatically save as plain text format.
|
6206
|
|
|
|
|
|
|
|
6207
|
|
|
|
|
|
|
Input:
|
6208
|
|
|
|
|
|
|
|
6209
|
|
|
|
|
|
|
$string -> Save Path
|
6210
|
|
|
|
|
|
|
$saveFormat -> Integer ( '0' = Save as plain text / '1' = Save data in word2vec binary format / '2' = Sparse vector data Ffrmat )
|
6211
|
|
|
|
|
|
|
|
6212
|
|
|
|
|
|
|
Note: Leaving $saveFormat as undefined will save the file in plain text format.
|
6213
|
|
|
|
|
|
|
|
6214
|
|
|
|
|
|
|
Warning: If the vector data is stored as a binary search tree, this method will error out gracefully.
|
6215
|
|
|
|
|
|
|
|
6216
|
|
|
|
|
|
|
Output:
|
6217
|
|
|
|
|
|
|
|
6218
|
|
|
|
|
|
|
$value -> '0' = Successful / '-1' = Un-successful
|
6219
|
|
|
|
|
|
|
|
6220
|
|
|
|
|
|
|
Example:
|
6221
|
|
|
|
|
|
|
|
6222
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6223
|
|
|
|
|
|
|
|
6224
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6225
|
|
|
|
|
|
|
|
6226
|
|
|
|
|
|
|
$interface->W2VReadTrainedVectorDataFromFile( "samples/samplevectors.bin" );
|
6227
|
|
|
|
|
|
|
$interface->W2VSaveTrainedVectorDataToFile( "samples/newvectors.bin" );
|
6228
|
|
|
|
|
|
|
|
6229
|
|
|
|
|
|
|
undef( $interface );
|
6230
|
|
|
|
|
|
|
|
6231
|
|
|
|
|
|
|
=head3 W2VStringsAreEqual
|
6232
|
|
|
|
|
|
|
|
6233
|
|
|
|
|
|
|
Description:
|
6234
|
|
|
|
|
|
|
|
6235
|
|
|
|
|
|
|
Compares two strings to check for equality, ignoring case-sensitivity.
|
6236
|
|
|
|
|
|
|
|
6237
|
|
|
|
|
|
|
Note: This method is not case-sensitive. ie. "string" equals "StRiNg"
|
6238
|
|
|
|
|
|
|
|
6239
|
|
|
|
|
|
|
Input:
|
6240
|
|
|
|
|
|
|
|
6241
|
|
|
|
|
|
|
$string -> String to compare
|
6242
|
|
|
|
|
|
|
$string -> String to compare
|
6243
|
|
|
|
|
|
|
|
6244
|
|
|
|
|
|
|
Output:
|
6245
|
|
|
|
|
|
|
|
6246
|
|
|
|
|
|
|
$value -> '1' = Strings are equal / '0' = Strings are not equal
|
6247
|
|
|
|
|
|
|
|
6248
|
|
|
|
|
|
|
Example:
|
6249
|
|
|
|
|
|
|
|
6250
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6251
|
|
|
|
|
|
|
|
6252
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6253
|
|
|
|
|
|
|
my $result = $interface->W2VStringsAreEqual( "hello world", "HeLlO wOrLd" );
|
6254
|
|
|
|
|
|
|
|
6255
|
|
|
|
|
|
|
print( "Strings are equal!\n" )if $result == 1;
|
6256
|
|
|
|
|
|
|
print( "Strings are not equal!\n" ) if $result == 0;
|
6257
|
|
|
|
|
|
|
|
6258
|
|
|
|
|
|
|
undef( $interface );
|
6259
|
|
|
|
|
|
|
|
6260
|
|
|
|
|
|
|
=head3 W2VConvertRawSparseTextToVectorDataAry
|
6261
|
|
|
|
|
|
|
|
6262
|
|
|
|
|
|
|
Description:
|
6263
|
|
|
|
|
|
|
|
6264
|
|
|
|
|
|
|
Converts sparse vector string to a dense vector format data array.
|
6265
|
|
|
|
|
|
|
|
6266
|
|
|
|
|
|
|
Input:
|
6267
|
|
|
|
|
|
|
|
6268
|
|
|
|
|
|
|
$string -> Vector data string.
|
6269
|
|
|
|
|
|
|
|
6270
|
|
|
|
|
|
|
Output:
|
6271
|
|
|
|
|
|
|
|
6272
|
|
|
|
|
|
|
$arrayReference -> Reference to array of vector data.
|
6273
|
|
|
|
|
|
|
|
6274
|
|
|
|
|
|
|
Example:
|
6275
|
|
|
|
|
|
|
|
6276
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6277
|
|
|
|
|
|
|
|
6278
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6279
|
|
|
|
|
|
|
my $str = "cookie 1 0.234 9 0.0002 13 0.234 17 -0.0023 19 1.0000";
|
6280
|
|
|
|
|
|
|
|
6281
|
|
|
|
|
|
|
my @vectorData = @{ $interface->W2VConvertRawSparseTextToVectorDataAry( $str ) };
|
6282
|
|
|
|
|
|
|
|
6283
|
|
|
|
|
|
|
print( "Data conversion successful!\n" ) if @vectorData > 0;
|
6284
|
|
|
|
|
|
|
print( "Data conversion un-successful!\n" ) if @vectorData == 0;
|
6285
|
|
|
|
|
|
|
|
6286
|
|
|
|
|
|
|
undef( $w2v );
|
6287
|
|
|
|
|
|
|
|
6288
|
|
|
|
|
|
|
=head3 W2VConvertRawSparseTextToVectorDataHash
|
6289
|
|
|
|
|
|
|
|
6290
|
|
|
|
|
|
|
Description:
|
6291
|
|
|
|
|
|
|
|
6292
|
|
|
|
|
|
|
Converts sparse vector string to a dense vector format data hash.
|
6293
|
|
|
|
|
|
|
|
6294
|
|
|
|
|
|
|
Input:
|
6295
|
|
|
|
|
|
|
|
6296
|
|
|
|
|
|
|
$string -> Vector data string.
|
6297
|
|
|
|
|
|
|
|
6298
|
|
|
|
|
|
|
Output:
|
6299
|
|
|
|
|
|
|
|
6300
|
|
|
|
|
|
|
$hashReference -> Reference to hash of vector data.
|
6301
|
|
|
|
|
|
|
|
6302
|
|
|
|
|
|
|
Example:
|
6303
|
|
|
|
|
|
|
|
6304
|
|
|
|
|
|
|
use Word2vec::Word2vec;
|
6305
|
|
|
|
|
|
|
|
6306
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6307
|
|
|
|
|
|
|
my $str = "cookie 1 0.234 9 0.0002 13 0.234 17 -0.0023 19 1.0000";
|
6308
|
|
|
|
|
|
|
|
6309
|
|
|
|
|
|
|
my %vectorData = %{ $interface->W2VConvertRawSparseTextToVectorDataHash( $str ) };
|
6310
|
|
|
|
|
|
|
|
6311
|
|
|
|
|
|
|
print( "Data conversion successful!\n" ) if ( keys %vectorData ) > 0;
|
6312
|
|
|
|
|
|
|
print( "Data conversion un-successful!\n" ) if ( keys %vectorData ) == 0;
|
6313
|
|
|
|
|
|
|
|
6314
|
|
|
|
|
|
|
undef( $w2v );
|
6315
|
|
|
|
|
|
|
|
6316
|
|
|
|
|
|
|
=head2 Word2Vec Accessor Functions
|
6317
|
|
|
|
|
|
|
|
6318
|
|
|
|
|
|
|
=head3 W2VGetDebugLog
|
6319
|
|
|
|
|
|
|
|
6320
|
|
|
|
|
|
|
Description:
|
6321
|
|
|
|
|
|
|
|
6322
|
|
|
|
|
|
|
Returns the _debugLog member variable set during Word2vec::Word2vec object initialization of new function.
|
6323
|
|
|
|
|
|
|
|
6324
|
|
|
|
|
|
|
Input:
|
6325
|
|
|
|
|
|
|
|
6326
|
|
|
|
|
|
|
None
|
6327
|
|
|
|
|
|
|
|
6328
|
|
|
|
|
|
|
Output:
|
6329
|
|
|
|
|
|
|
|
6330
|
|
|
|
|
|
|
$value -> '0' = False, '1' = True
|
6331
|
|
|
|
|
|
|
|
6332
|
|
|
|
|
|
|
Example:
|
6333
|
|
|
|
|
|
|
|
6334
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6335
|
|
|
|
|
|
|
|
6336
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new()
|
6337
|
|
|
|
|
|
|
my $debugLog = $interface->W2VGetDebugLog();
|
6338
|
|
|
|
|
|
|
|
6339
|
|
|
|
|
|
|
print( "Debug Logging Enabled\n" ) if $debugLog == 1;
|
6340
|
|
|
|
|
|
|
print( "Debug Logging Disabled\n" ) if $debugLog == 0;
|
6341
|
|
|
|
|
|
|
|
6342
|
|
|
|
|
|
|
|
6343
|
|
|
|
|
|
|
undef( $interface );
|
6344
|
|
|
|
|
|
|
|
6345
|
|
|
|
|
|
|
=head3 W2VGetWriteLog
|
6346
|
|
|
|
|
|
|
|
6347
|
|
|
|
|
|
|
Description:
|
6348
|
|
|
|
|
|
|
|
6349
|
|
|
|
|
|
|
Returns the _writeLog member variable set during Word2vec::Word2vec object initialization of new function.
|
6350
|
|
|
|
|
|
|
|
6351
|
|
|
|
|
|
|
Input:
|
6352
|
|
|
|
|
|
|
|
6353
|
|
|
|
|
|
|
None
|
6354
|
|
|
|
|
|
|
|
6355
|
|
|
|
|
|
|
Output:
|
6356
|
|
|
|
|
|
|
|
6357
|
|
|
|
|
|
|
$value -> '0' = False, '1' = True
|
6358
|
|
|
|
|
|
|
|
6359
|
|
|
|
|
|
|
Example:
|
6360
|
|
|
|
|
|
|
|
6361
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6362
|
|
|
|
|
|
|
|
6363
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6364
|
|
|
|
|
|
|
my $writeLog = $interface->W2VGetWriteLog();
|
6365
|
|
|
|
|
|
|
|
6366
|
|
|
|
|
|
|
print( "Write Logging Enabled\n" ) if $writeLog == 1;
|
6367
|
|
|
|
|
|
|
print( "Write Logging Disabled\n" ) if $writeLog == 0;
|
6368
|
|
|
|
|
|
|
|
6369
|
|
|
|
|
|
|
undef( $interface );
|
6370
|
|
|
|
|
|
|
|
6371
|
|
|
|
|
|
|
=head3 W2VGetFileHandle
|
6372
|
|
|
|
|
|
|
|
6373
|
|
|
|
|
|
|
Description:
|
6374
|
|
|
|
|
|
|
|
6375
|
|
|
|
|
|
|
Returns the _fileHandle member variable set during Word2vec::Word2vec object instantiation of new function.
|
6376
|
|
|
|
|
|
|
|
6377
|
|
|
|
|
|
|
Warning: This is a private function. File handle is used by WriteLog() method. Do not manipulate this file handle as errors can result.
|
6378
|
|
|
|
|
|
|
|
6379
|
|
|
|
|
|
|
Input:
|
6380
|
|
|
|
|
|
|
|
6381
|
|
|
|
|
|
|
None
|
6382
|
|
|
|
|
|
|
|
6383
|
|
|
|
|
|
|
Output:
|
6384
|
|
|
|
|
|
|
|
6385
|
|
|
|
|
|
|
$fileHandle -> Returns file handle for WriteLog() method or undefined.
|
6386
|
|
|
|
|
|
|
|
6387
|
|
|
|
|
|
|
Example:
|
6388
|
|
|
|
|
|
|
|
6389
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6390
|
|
|
|
|
|
|
|
6391
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6392
|
|
|
|
|
|
|
my $fileHandle = $interface->W2VGetFileHandle();
|
6393
|
|
|
|
|
|
|
|
6394
|
|
|
|
|
|
|
undef( $interface );
|
6395
|
|
|
|
|
|
|
|
6396
|
|
|
|
|
|
|
=head3 W2VGetTrainFilePath
|
6397
|
|
|
|
|
|
|
|
6398
|
|
|
|
|
|
|
Description:
|
6399
|
|
|
|
|
|
|
|
6400
|
|
|
|
|
|
|
Returns the _trainFilePath member variable set during Word2vec::Word2vec object instantiation of new function.
|
6401
|
|
|
|
|
|
|
|
6402
|
|
|
|
|
|
|
Input:
|
6403
|
|
|
|
|
|
|
|
6404
|
|
|
|
|
|
|
None
|
6405
|
|
|
|
|
|
|
|
6406
|
|
|
|
|
|
|
Output:
|
6407
|
|
|
|
|
|
|
|
6408
|
|
|
|
|
|
|
$string -> Returns word2vec training text corpus file path.
|
6409
|
|
|
|
|
|
|
|
6410
|
|
|
|
|
|
|
Example:
|
6411
|
|
|
|
|
|
|
|
6412
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6413
|
|
|
|
|
|
|
|
6414
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6415
|
|
|
|
|
|
|
my $filePath = $interface->W2VGetTrainFilePath();
|
6416
|
|
|
|
|
|
|
print( "Training File Path: $filePath\n" );
|
6417
|
|
|
|
|
|
|
|
6418
|
|
|
|
|
|
|
undef( $interface );
|
6419
|
|
|
|
|
|
|
|
6420
|
|
|
|
|
|
|
=head3 W2VGetOutputFilePath
|
6421
|
|
|
|
|
|
|
|
6422
|
|
|
|
|
|
|
Description:
|
6423
|
|
|
|
|
|
|
|
6424
|
|
|
|
|
|
|
Returns the _outputFilePath member variable set during Word2vec::Word2vec object instantiation of new function.
|
6425
|
|
|
|
|
|
|
|
6426
|
|
|
|
|
|
|
Input:
|
6427
|
|
|
|
|
|
|
|
6428
|
|
|
|
|
|
|
None
|
6429
|
|
|
|
|
|
|
|
6430
|
|
|
|
|
|
|
Output:
|
6431
|
|
|
|
|
|
|
|
6432
|
|
|
|
|
|
|
$string -> Returns post word2vec training output file path.
|
6433
|
|
|
|
|
|
|
|
6434
|
|
|
|
|
|
|
Example:
|
6435
|
|
|
|
|
|
|
|
6436
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6437
|
|
|
|
|
|
|
|
6438
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6439
|
|
|
|
|
|
|
my $filePath = $interface->W2VGetOutputFilePath();
|
6440
|
|
|
|
|
|
|
print( "File Path: $filePath\n" );
|
6441
|
|
|
|
|
|
|
|
6442
|
|
|
|
|
|
|
undef( $interface );
|
6443
|
|
|
|
|
|
|
|
6444
|
|
|
|
|
|
|
=head3 W2VGetWordVecSize
|
6445
|
|
|
|
|
|
|
|
6446
|
|
|
|
|
|
|
Description:
|
6447
|
|
|
|
|
|
|
|
6448
|
|
|
|
|
|
|
Returns the _wordVecSize member variable set during Word2vec::Word2vec object instantiation of new function.
|
6449
|
|
|
|
|
|
|
|
6450
|
|
|
|
|
|
|
Input:
|
6451
|
|
|
|
|
|
|
|
6452
|
|
|
|
|
|
|
None
|
6453
|
|
|
|
|
|
|
|
6454
|
|
|
|
|
|
|
Output:
|
6455
|
|
|
|
|
|
|
|
6456
|
|
|
|
|
|
|
$value -> Returns (integer) size of word2vec word vectors. Default value = 100
|
6457
|
|
|
|
|
|
|
|
6458
|
|
|
|
|
|
|
Example:
|
6459
|
|
|
|
|
|
|
|
6460
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6461
|
|
|
|
|
|
|
|
6462
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6463
|
|
|
|
|
|
|
my $value = $interface->W2VGetWordVecSize();
|
6464
|
|
|
|
|
|
|
print( "Word Vector Size: $value\n" );
|
6465
|
|
|
|
|
|
|
|
6466
|
|
|
|
|
|
|
undef( $interface );
|
6467
|
|
|
|
|
|
|
|
6468
|
|
|
|
|
|
|
=head3 W2VGetWindowSize
|
6469
|
|
|
|
|
|
|
|
6470
|
|
|
|
|
|
|
Description:
|
6471
|
|
|
|
|
|
|
|
6472
|
|
|
|
|
|
|
Returns the _windowSize member variable set during Word2vec::Word2vec object instantiation of new function.
|
6473
|
|
|
|
|
|
|
|
6474
|
|
|
|
|
|
|
Input:
|
6475
|
|
|
|
|
|
|
|
6476
|
|
|
|
|
|
|
None
|
6477
|
|
|
|
|
|
|
|
6478
|
|
|
|
|
|
|
Output:
|
6479
|
|
|
|
|
|
|
|
6480
|
|
|
|
|
|
|
$value -> Returns (integer) word2vec window size. Default value = 5
|
6481
|
|
|
|
|
|
|
|
6482
|
|
|
|
|
|
|
Example:
|
6483
|
|
|
|
|
|
|
|
6484
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6485
|
|
|
|
|
|
|
|
6486
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6487
|
|
|
|
|
|
|
my $value = $interface->W2VGetWindowSize();
|
6488
|
|
|
|
|
|
|
print( "Window Size: $value\n" );
|
6489
|
|
|
|
|
|
|
|
6490
|
|
|
|
|
|
|
undef( $interface );
|
6491
|
|
|
|
|
|
|
|
6492
|
|
|
|
|
|
|
=head3 W2VGetSample
|
6493
|
|
|
|
|
|
|
|
6494
|
|
|
|
|
|
|
Description:
|
6495
|
|
|
|
|
|
|
|
6496
|
|
|
|
|
|
|
Returns the _sample member variable set during Word2vec::Word2vec object instantiation of new function.
|
6497
|
|
|
|
|
|
|
|
6498
|
|
|
|
|
|
|
Input:
|
6499
|
|
|
|
|
|
|
|
6500
|
|
|
|
|
|
|
None
|
6501
|
|
|
|
|
|
|
|
6502
|
|
|
|
|
|
|
Output:
|
6503
|
|
|
|
|
|
|
|
6504
|
|
|
|
|
|
|
$value -> Returns (integer) word2vec sample size. Default value = 0.001
|
6505
|
|
|
|
|
|
|
|
6506
|
|
|
|
|
|
|
Example:
|
6507
|
|
|
|
|
|
|
|
6508
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6509
|
|
|
|
|
|
|
|
6510
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6511
|
|
|
|
|
|
|
my $value = $interface->W2VGetSample();
|
6512
|
|
|
|
|
|
|
print( "Sample: $value\n" );
|
6513
|
|
|
|
|
|
|
|
6514
|
|
|
|
|
|
|
undef( $interface );
|
6515
|
|
|
|
|
|
|
|
6516
|
|
|
|
|
|
|
=head3 W2VGetHSoftMax
|
6517
|
|
|
|
|
|
|
|
6518
|
|
|
|
|
|
|
Description:
|
6519
|
|
|
|
|
|
|
|
6520
|
|
|
|
|
|
|
Returns the _hSoftMax member variable set during Word2vec::Word2vec object instantiation of new function.
|
6521
|
|
|
|
|
|
|
|
6522
|
|
|
|
|
|
|
Input:
|
6523
|
|
|
|
|
|
|
|
6524
|
|
|
|
|
|
|
None
|
6525
|
|
|
|
|
|
|
|
6526
|
|
|
|
|
|
|
Output:
|
6527
|
|
|
|
|
|
|
|
6528
|
|
|
|
|
|
|
$value -> Returns (integer) word2vec HSoftMax value. Default = 0
|
6529
|
|
|
|
|
|
|
|
6530
|
|
|
|
|
|
|
Example:
|
6531
|
|
|
|
|
|
|
|
6532
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6533
|
|
|
|
|
|
|
|
6534
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6535
|
|
|
|
|
|
|
my $value = $interface->W2VGetHSoftMax();
|
6536
|
|
|
|
|
|
|
print( "HSoftMax: $value\n" );
|
6537
|
|
|
|
|
|
|
|
6538
|
|
|
|
|
|
|
undef( $interface );
|
6539
|
|
|
|
|
|
|
|
6540
|
|
|
|
|
|
|
=head3 W2VGetNegative
|
6541
|
|
|
|
|
|
|
|
6542
|
|
|
|
|
|
|
Description:
|
6543
|
|
|
|
|
|
|
|
6544
|
|
|
|
|
|
|
Returns the _negative member variable set during Word2vec::Word2vec object instantiation of new function.
|
6545
|
|
|
|
|
|
|
|
6546
|
|
|
|
|
|
|
Input:
|
6547
|
|
|
|
|
|
|
|
6548
|
|
|
|
|
|
|
None
|
6549
|
|
|
|
|
|
|
|
6550
|
|
|
|
|
|
|
Output:
|
6551
|
|
|
|
|
|
|
|
6552
|
|
|
|
|
|
|
$value -> Returns (integer) word2vec negative value. Default = 5
|
6553
|
|
|
|
|
|
|
|
6554
|
|
|
|
|
|
|
Example:
|
6555
|
|
|
|
|
|
|
|
6556
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6557
|
|
|
|
|
|
|
|
6558
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6559
|
|
|
|
|
|
|
my $value = $interface->W2VGetNegative();
|
6560
|
|
|
|
|
|
|
print( "Negative: $value\n" );
|
6561
|
|
|
|
|
|
|
|
6562
|
|
|
|
|
|
|
undef( $interface );
|
6563
|
|
|
|
|
|
|
|
6564
|
|
|
|
|
|
|
=head3 W2VGetNumOfThreads
|
6565
|
|
|
|
|
|
|
|
6566
|
|
|
|
|
|
|
Description:
|
6567
|
|
|
|
|
|
|
|
6568
|
|
|
|
|
|
|
Returns the _numOfThreads member variable set during Word2vec::Word2vec object instantiation of new function.
|
6569
|
|
|
|
|
|
|
|
6570
|
|
|
|
|
|
|
Input:
|
6571
|
|
|
|
|
|
|
|
6572
|
|
|
|
|
|
|
None
|
6573
|
|
|
|
|
|
|
|
6574
|
|
|
|
|
|
|
Output:
|
6575
|
|
|
|
|
|
|
|
6576
|
|
|
|
|
|
|
$value -> Returns (integer) word2vec number of threads to use during training. Default = 12
|
6577
|
|
|
|
|
|
|
|
6578
|
|
|
|
|
|
|
Example:
|
6579
|
|
|
|
|
|
|
|
6580
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6581
|
|
|
|
|
|
|
|
6582
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6583
|
|
|
|
|
|
|
my $value = $interface->W2VGetNumOfThreads();
|
6584
|
|
|
|
|
|
|
print( "Number of threads: $value\n" );
|
6585
|
|
|
|
|
|
|
|
6586
|
|
|
|
|
|
|
undef( $interface );
|
6587
|
|
|
|
|
|
|
|
6588
|
|
|
|
|
|
|
=head3 W2VGetNumOfIterations
|
6589
|
|
|
|
|
|
|
|
6590
|
|
|
|
|
|
|
Description:
|
6591
|
|
|
|
|
|
|
|
6592
|
|
|
|
|
|
|
Returns the _iterations member variable set during Word2vec::Word2vec object instantiation of new function.
|
6593
|
|
|
|
|
|
|
|
6594
|
|
|
|
|
|
|
Input:
|
6595
|
|
|
|
|
|
|
|
6596
|
|
|
|
|
|
|
None
|
6597
|
|
|
|
|
|
|
|
6598
|
|
|
|
|
|
|
Output:
|
6599
|
|
|
|
|
|
|
|
6600
|
|
|
|
|
|
|
$value -> Returns (integer) word2vec number of word2vec iterations. Default = 5
|
6601
|
|
|
|
|
|
|
|
6602
|
|
|
|
|
|
|
Example:
|
6603
|
|
|
|
|
|
|
|
6604
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6605
|
|
|
|
|
|
|
|
6606
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6607
|
|
|
|
|
|
|
my $value = $interface->W2VGetNumOfIterations();
|
6608
|
|
|
|
|
|
|
print( "Number of iterations: $value\n" );
|
6609
|
|
|
|
|
|
|
|
6610
|
|
|
|
|
|
|
undef( $interface );
|
6611
|
|
|
|
|
|
|
|
6612
|
|
|
|
|
|
|
=head3 W2VGetMinCount
|
6613
|
|
|
|
|
|
|
|
6614
|
|
|
|
|
|
|
Description:
|
6615
|
|
|
|
|
|
|
|
6616
|
|
|
|
|
|
|
Returns the _minCount member variable set during Word2vec::Word2vec object instantiation of new function.
|
6617
|
|
|
|
|
|
|
|
6618
|
|
|
|
|
|
|
Input:
|
6619
|
|
|
|
|
|
|
|
6620
|
|
|
|
|
|
|
None
|
6621
|
|
|
|
|
|
|
|
6622
|
|
|
|
|
|
|
Output:
|
6623
|
|
|
|
|
|
|
|
6624
|
|
|
|
|
|
|
$value -> Returns (integer) word2vec min-count value. Default = 5
|
6625
|
|
|
|
|
|
|
|
6626
|
|
|
|
|
|
|
Example:
|
6627
|
|
|
|
|
|
|
|
6628
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6629
|
|
|
|
|
|
|
|
6630
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6631
|
|
|
|
|
|
|
my $value = $interface->W2VGetMinCount();
|
6632
|
|
|
|
|
|
|
print( "Min Count: $value\n" );
|
6633
|
|
|
|
|
|
|
|
6634
|
|
|
|
|
|
|
undef( $interface );
|
6635
|
|
|
|
|
|
|
|
6636
|
|
|
|
|
|
|
=head3 W2VGetAlpha
|
6637
|
|
|
|
|
|
|
|
6638
|
|
|
|
|
|
|
Description:
|
6639
|
|
|
|
|
|
|
|
6640
|
|
|
|
|
|
|
Returns the _alpha member variable set during Word2vec::Word2vec object instantiation of new function.
|
6641
|
|
|
|
|
|
|
|
6642
|
|
|
|
|
|
|
Input:
|
6643
|
|
|
|
|
|
|
|
6644
|
|
|
|
|
|
|
None
|
6645
|
|
|
|
|
|
|
|
6646
|
|
|
|
|
|
|
Output:
|
6647
|
|
|
|
|
|
|
|
6648
|
|
|
|
|
|
|
$value -> Returns (integer) word2vec alpha value. Default = 0.05 for CBOW and 0.025 for Skip-Gram.
|
6649
|
|
|
|
|
|
|
|
6650
|
|
|
|
|
|
|
Example:
|
6651
|
|
|
|
|
|
|
|
6652
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6653
|
|
|
|
|
|
|
|
6654
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6655
|
|
|
|
|
|
|
my $value = $interface->W2VGetAlpha();
|
6656
|
|
|
|
|
|
|
print( "Alpha: $value\n" );
|
6657
|
|
|
|
|
|
|
|
6658
|
|
|
|
|
|
|
undef( $interface );
|
6659
|
|
|
|
|
|
|
|
6660
|
|
|
|
|
|
|
=head3 W2VGetClasses
|
6661
|
|
|
|
|
|
|
|
6662
|
|
|
|
|
|
|
Description:
|
6663
|
|
|
|
|
|
|
|
6664
|
|
|
|
|
|
|
Returns the _classes member variable set during Word2vec::Word2vec object instantiation of new function.
|
6665
|
|
|
|
|
|
|
|
6666
|
|
|
|
|
|
|
Input:
|
6667
|
|
|
|
|
|
|
|
6668
|
|
|
|
|
|
|
None
|
6669
|
|
|
|
|
|
|
|
6670
|
|
|
|
|
|
|
Output:
|
6671
|
|
|
|
|
|
|
|
6672
|
|
|
|
|
|
|
$value -> Returns (integer) word2vec classes value. Default = 0
|
6673
|
|
|
|
|
|
|
|
6674
|
|
|
|
|
|
|
Example:
|
6675
|
|
|
|
|
|
|
|
6676
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6677
|
|
|
|
|
|
|
|
6678
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6679
|
|
|
|
|
|
|
my $value = $interface->W2VGetClasses();
|
6680
|
|
|
|
|
|
|
print( "Classes: $value\n" );
|
6681
|
|
|
|
|
|
|
|
6682
|
|
|
|
|
|
|
undef( $interface );
|
6683
|
|
|
|
|
|
|
|
6684
|
|
|
|
|
|
|
=head3 W2VGetDebugTraining
|
6685
|
|
|
|
|
|
|
|
6686
|
|
|
|
|
|
|
Description:
|
6687
|
|
|
|
|
|
|
|
6688
|
|
|
|
|
|
|
Returns the _debug member variable set during Word2vec::Word2vec object instantiation of new function.
|
6689
|
|
|
|
|
|
|
|
6690
|
|
|
|
|
|
|
Note: 0 = No debug output, 1 = Enable debug output, 2 = Even more debug output
|
6691
|
|
|
|
|
|
|
|
6692
|
|
|
|
|
|
|
Input:
|
6693
|
|
|
|
|
|
|
|
6694
|
|
|
|
|
|
|
None
|
6695
|
|
|
|
|
|
|
|
6696
|
|
|
|
|
|
|
Output:
|
6697
|
|
|
|
|
|
|
|
6698
|
|
|
|
|
|
|
$value -> Returns (integer) word2vec debug value. Default = 2
|
6699
|
|
|
|
|
|
|
|
6700
|
|
|
|
|
|
|
Example:
|
6701
|
|
|
|
|
|
|
|
6702
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6703
|
|
|
|
|
|
|
|
6704
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6705
|
|
|
|
|
|
|
my $value = $interface->W2VGetDebugTraining();
|
6706
|
|
|
|
|
|
|
print( "Debug: $value\n" );
|
6707
|
|
|
|
|
|
|
|
6708
|
|
|
|
|
|
|
undef( $interface );
|
6709
|
|
|
|
|
|
|
|
6710
|
|
|
|
|
|
|
=head3 W2VGetBinaryOutput
|
6711
|
|
|
|
|
|
|
|
6712
|
|
|
|
|
|
|
Description:
|
6713
|
|
|
|
|
|
|
|
6714
|
|
|
|
|
|
|
Returns the _binaryOutput member variable set during Word2vec::Word2vec object instantiation of new function.
|
6715
|
|
|
|
|
|
|
|
6716
|
|
|
|
|
|
|
Note: 1 = Save trained vector data in binary format, 2 = Save trained vector data in plain text format.
|
6717
|
|
|
|
|
|
|
|
6718
|
|
|
|
|
|
|
Input:
|
6719
|
|
|
|
|
|
|
|
6720
|
|
|
|
|
|
|
None
|
6721
|
|
|
|
|
|
|
|
6722
|
|
|
|
|
|
|
Output:
|
6723
|
|
|
|
|
|
|
|
6724
|
|
|
|
|
|
|
$value -> Returns (integer) word2vec binary flag. Default = 0
|
6725
|
|
|
|
|
|
|
|
6726
|
|
|
|
|
|
|
Example:
|
6727
|
|
|
|
|
|
|
|
6728
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6729
|
|
|
|
|
|
|
|
6730
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6731
|
|
|
|
|
|
|
my $value = $interface->W2VGetBinaryOutput();
|
6732
|
|
|
|
|
|
|
print( "Binary Output: $value\n" );
|
6733
|
|
|
|
|
|
|
|
6734
|
|
|
|
|
|
|
undef( $interface );
|
6735
|
|
|
|
|
|
|
|
6736
|
|
|
|
|
|
|
=head3 W2VGetReadVocabFilePath
|
6737
|
|
|
|
|
|
|
|
6738
|
|
|
|
|
|
|
Description:
|
6739
|
|
|
|
|
|
|
|
6740
|
|
|
|
|
|
|
Returns the _readVocab member variable set during Word2vec::Word2vec object instantiation of new function.
|
6741
|
|
|
|
|
|
|
|
6742
|
|
|
|
|
|
|
Input:
|
6743
|
|
|
|
|
|
|
|
6744
|
|
|
|
|
|
|
None
|
6745
|
|
|
|
|
|
|
|
6746
|
|
|
|
|
|
|
Output:
|
6747
|
|
|
|
|
|
|
|
6748
|
|
|
|
|
|
|
$string -> Returns (string) word2vec read vocabulary file name or empty string if not set.
|
6749
|
|
|
|
|
|
|
|
6750
|
|
|
|
|
|
|
Example:
|
6751
|
|
|
|
|
|
|
|
6752
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6753
|
|
|
|
|
|
|
|
6754
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6755
|
|
|
|
|
|
|
my $str = $interface->W2VGetReadVocabFilePath();
|
6756
|
|
|
|
|
|
|
print( "Read Vocab File Path: $str\n" );
|
6757
|
|
|
|
|
|
|
|
6758
|
|
|
|
|
|
|
undef( $interface );
|
6759
|
|
|
|
|
|
|
|
6760
|
|
|
|
|
|
|
=head3 W2VGetSaveVocabFilePath
|
6761
|
|
|
|
|
|
|
|
6762
|
|
|
|
|
|
|
Description:
|
6763
|
|
|
|
|
|
|
|
6764
|
|
|
|
|
|
|
Returns the _saveVocab member variable set during Word2vec::Word2vec object instantiation of new function.
|
6765
|
|
|
|
|
|
|
|
6766
|
|
|
|
|
|
|
Input:
|
6767
|
|
|
|
|
|
|
|
6768
|
|
|
|
|
|
|
None
|
6769
|
|
|
|
|
|
|
|
6770
|
|
|
|
|
|
|
Output:
|
6771
|
|
|
|
|
|
|
|
6772
|
|
|
|
|
|
|
$string -> Returns (string) word2vec save vocabulary file name or empty string if not set.
|
6773
|
|
|
|
|
|
|
|
6774
|
|
|
|
|
|
|
Example:
|
6775
|
|
|
|
|
|
|
|
6776
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6777
|
|
|
|
|
|
|
|
6778
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6779
|
|
|
|
|
|
|
my $str = $interface->W2VGetSaveVocabFilePath();
|
6780
|
|
|
|
|
|
|
print( "Save Vocab File Path: $str\n" );
|
6781
|
|
|
|
|
|
|
|
6782
|
|
|
|
|
|
|
undef( $interface );
|
6783
|
|
|
|
|
|
|
|
6784
|
|
|
|
|
|
|
=head3 W2VGetUseCBOW
|
6785
|
|
|
|
|
|
|
|
6786
|
|
|
|
|
|
|
Description:
|
6787
|
|
|
|
|
|
|
|
6788
|
|
|
|
|
|
|
Returns the _useCBOW member variable set during Word2vec::Word2vec object instantiation of new function.
|
6789
|
|
|
|
|
|
|
|
6790
|
|
|
|
|
|
|
Note: 0 = Skip-Gram Model, 1 = Continuous Bag Of Words Model.
|
6791
|
|
|
|
|
|
|
|
6792
|
|
|
|
|
|
|
Input:
|
6793
|
|
|
|
|
|
|
|
6794
|
|
|
|
|
|
|
None
|
6795
|
|
|
|
|
|
|
|
6796
|
|
|
|
|
|
|
Output:
|
6797
|
|
|
|
|
|
|
|
6798
|
|
|
|
|
|
|
$value -> Returns (integer) word2vec Continuous-Bag-Of-Words flag. Default = 1
|
6799
|
|
|
|
|
|
|
|
6800
|
|
|
|
|
|
|
Example:
|
6801
|
|
|
|
|
|
|
|
6802
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6803
|
|
|
|
|
|
|
|
6804
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6805
|
|
|
|
|
|
|
my $value = $interface->W2VGetUseCBOW();
|
6806
|
|
|
|
|
|
|
print( "Use CBOW?: $value\n" );
|
6807
|
|
|
|
|
|
|
|
6808
|
|
|
|
|
|
|
undef( $interface );
|
6809
|
|
|
|
|
|
|
|
6810
|
|
|
|
|
|
|
=head3 W2VGetWorkingDir
|
6811
|
|
|
|
|
|
|
|
6812
|
|
|
|
|
|
|
Description:
|
6813
|
|
|
|
|
|
|
|
6814
|
|
|
|
|
|
|
Returns the _workingDir member variable set during Word2vec::Word2vec object instantiation of new function.
|
6815
|
|
|
|
|
|
|
|
6816
|
|
|
|
|
|
|
Input:
|
6817
|
|
|
|
|
|
|
|
6818
|
|
|
|
|
|
|
None
|
6819
|
|
|
|
|
|
|
|
6820
|
|
|
|
|
|
|
Output:
|
6821
|
|
|
|
|
|
|
|
6822
|
|
|
|
|
|
|
$value -> Returns (string) working directory path or current directory if not specified.
|
6823
|
|
|
|
|
|
|
|
6824
|
|
|
|
|
|
|
Example:
|
6825
|
|
|
|
|
|
|
|
6826
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6827
|
|
|
|
|
|
|
|
6828
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6829
|
|
|
|
|
|
|
my $str = $interface->W2VGetWorkingDir();
|
6830
|
|
|
|
|
|
|
print( "Working Directory: $str\n" );
|
6831
|
|
|
|
|
|
|
|
6832
|
|
|
|
|
|
|
undef( $interface );
|
6833
|
|
|
|
|
|
|
|
6834
|
|
|
|
|
|
|
=head3 W2VGetWord2VecExeDir
|
6835
|
|
|
|
|
|
|
|
6836
|
|
|
|
|
|
|
Description:
|
6837
|
|
|
|
|
|
|
|
6838
|
|
|
|
|
|
|
Returns the _word2VecExeDir member variable set during Word2vec::Word2vec object instantiation of new function.
|
6839
|
|
|
|
|
|
|
|
6840
|
|
|
|
|
|
|
Input:
|
6841
|
|
|
|
|
|
|
|
6842
|
|
|
|
|
|
|
None
|
6843
|
|
|
|
|
|
|
|
6844
|
|
|
|
|
|
|
Output:
|
6845
|
|
|
|
|
|
|
|
6846
|
|
|
|
|
|
|
$value -> Returns (string) word2vec executable directory path or empty string if not specified.
|
6847
|
|
|
|
|
|
|
|
6848
|
|
|
|
|
|
|
Example:
|
6849
|
|
|
|
|
|
|
|
6850
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6851
|
|
|
|
|
|
|
|
6852
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6853
|
|
|
|
|
|
|
my $str = $interface->W2VGetWord2VecExeDir();
|
6854
|
|
|
|
|
|
|
print( "Word2Vec Executable File Directory: $str\n" );
|
6855
|
|
|
|
|
|
|
|
6856
|
|
|
|
|
|
|
undef( $interface );
|
6857
|
|
|
|
|
|
|
|
6858
|
|
|
|
|
|
|
=head3 W2VGetVocabularyHash
|
6859
|
|
|
|
|
|
|
|
6860
|
|
|
|
|
|
|
Description:
|
6861
|
|
|
|
|
|
|
|
6862
|
|
|
|
|
|
|
Returns the _hashRefOfWordVectors member variable set during Word2vec::Word2vec object instantiation of new function.
|
6863
|
|
|
|
|
|
|
|
6864
|
|
|
|
|
|
|
Input:
|
6865
|
|
|
|
|
|
|
|
6866
|
|
|
|
|
|
|
None
|
6867
|
|
|
|
|
|
|
|
6868
|
|
|
|
|
|
|
Output:
|
6869
|
|
|
|
|
|
|
|
6870
|
|
|
|
|
|
|
$value -> Returns hash reference of vocabulary/dictionary words. (Word2vec trained data in memory)
|
6871
|
|
|
|
|
|
|
|
6872
|
|
|
|
|
|
|
Example:
|
6873
|
|
|
|
|
|
|
|
6874
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6875
|
|
|
|
|
|
|
|
6876
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6877
|
|
|
|
|
|
|
my @vocabulary = $interface->W2VGetVocabularyHash();
|
6878
|
|
|
|
|
|
|
|
6879
|
|
|
|
|
|
|
undef( $interface );
|
6880
|
|
|
|
|
|
|
|
6881
|
|
|
|
|
|
|
=head3 W2VGetOverwriteOldFile
|
6882
|
|
|
|
|
|
|
|
6883
|
|
|
|
|
|
|
Description:
|
6884
|
|
|
|
|
|
|
|
6885
|
|
|
|
|
|
|
Returns the _overwriteOldFile member variable set during Word2vec::Word2vec object instantiation of new function.
|
6886
|
|
|
|
|
|
|
|
6887
|
|
|
|
|
|
|
Input:
|
6888
|
|
|
|
|
|
|
|
6889
|
|
|
|
|
|
|
None
|
6890
|
|
|
|
|
|
|
|
6891
|
|
|
|
|
|
|
Output:
|
6892
|
|
|
|
|
|
|
|
6893
|
|
|
|
|
|
|
$value -> Returns 1 = True or 0 = False.
|
6894
|
|
|
|
|
|
|
|
6895
|
|
|
|
|
|
|
Example:
|
6896
|
|
|
|
|
|
|
|
6897
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6898
|
|
|
|
|
|
|
|
6899
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6900
|
|
|
|
|
|
|
my $value = $interface->W2VGetOverwriteOldFile();
|
6901
|
|
|
|
|
|
|
print( "Overwrite Exiting File?: $value\n" );
|
6902
|
|
|
|
|
|
|
|
6903
|
|
|
|
|
|
|
undef( $interface );
|
6904
|
|
|
|
|
|
|
|
6905
|
|
|
|
|
|
|
=head2 Word2Vec Mutator Functions
|
6906
|
|
|
|
|
|
|
|
6907
|
|
|
|
|
|
|
=head3 W2VSetTrainFilePath
|
6908
|
|
|
|
|
|
|
|
6909
|
|
|
|
|
|
|
Description:
|
6910
|
|
|
|
|
|
|
|
6911
|
|
|
|
|
|
|
Sets member variable to string parameter. Sets training file path.
|
6912
|
|
|
|
|
|
|
|
6913
|
|
|
|
|
|
|
Input:
|
6914
|
|
|
|
|
|
|
|
6915
|
|
|
|
|
|
|
$string -> Text corpus training file path
|
6916
|
|
|
|
|
|
|
|
6917
|
|
|
|
|
|
|
Output:
|
6918
|
|
|
|
|
|
|
|
6919
|
|
|
|
|
|
|
None
|
6920
|
|
|
|
|
|
|
|
6921
|
|
|
|
|
|
|
Example:
|
6922
|
|
|
|
|
|
|
|
6923
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6924
|
|
|
|
|
|
|
|
6925
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6926
|
|
|
|
|
|
|
$interface->W2VSetTrainFilePath( "samples/textcorpus.txt" );
|
6927
|
|
|
|
|
|
|
|
6928
|
|
|
|
|
|
|
undef( $interface );
|
6929
|
|
|
|
|
|
|
|
6930
|
|
|
|
|
|
|
=head3 W2VSetOutputFilePath
|
6931
|
|
|
|
|
|
|
|
6932
|
|
|
|
|
|
|
Description:
|
6933
|
|
|
|
|
|
|
|
6934
|
|
|
|
|
|
|
Sets member variable to string parameter. Sets output file path.
|
6935
|
|
|
|
|
|
|
|
6936
|
|
|
|
|
|
|
Input:
|
6937
|
|
|
|
|
|
|
|
6938
|
|
|
|
|
|
|
$string -> Post word2vec training save file path
|
6939
|
|
|
|
|
|
|
|
6940
|
|
|
|
|
|
|
Output:
|
6941
|
|
|
|
|
|
|
|
6942
|
|
|
|
|
|
|
None
|
6943
|
|
|
|
|
|
|
|
6944
|
|
|
|
|
|
|
Example:
|
6945
|
|
|
|
|
|
|
|
6946
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6947
|
|
|
|
|
|
|
|
6948
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6949
|
|
|
|
|
|
|
$interface->W2VSetOutputFilePath( "samples/tempvectors.bin" );
|
6950
|
|
|
|
|
|
|
|
6951
|
|
|
|
|
|
|
undef( $interface );
|
6952
|
|
|
|
|
|
|
|
6953
|
|
|
|
|
|
|
=head3 W2VSetWordVecSize
|
6954
|
|
|
|
|
|
|
|
6955
|
|
|
|
|
|
|
Description:
|
6956
|
|
|
|
|
|
|
|
6957
|
|
|
|
|
|
|
Sets member variable to integer parameter. Sets word2vec word vector size.
|
6958
|
|
|
|
|
|
|
|
6959
|
|
|
|
|
|
|
Input:
|
6960
|
|
|
|
|
|
|
|
6961
|
|
|
|
|
|
|
$value -> Word2vec word vector size
|
6962
|
|
|
|
|
|
|
|
6963
|
|
|
|
|
|
|
Output:
|
6964
|
|
|
|
|
|
|
|
6965
|
|
|
|
|
|
|
None
|
6966
|
|
|
|
|
|
|
|
6967
|
|
|
|
|
|
|
Example:
|
6968
|
|
|
|
|
|
|
|
6969
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6970
|
|
|
|
|
|
|
|
6971
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6972
|
|
|
|
|
|
|
$interface->W2VSetWordVecSize( 100 );
|
6973
|
|
|
|
|
|
|
|
6974
|
|
|
|
|
|
|
undef( $interface );
|
6975
|
|
|
|
|
|
|
|
6976
|
|
|
|
|
|
|
=head3 W2VSetWindowSize
|
6977
|
|
|
|
|
|
|
|
6978
|
|
|
|
|
|
|
Description:
|
6979
|
|
|
|
|
|
|
|
6980
|
|
|
|
|
|
|
Sets member variable to integer parameter. Sets word2vec window size.
|
6981
|
|
|
|
|
|
|
|
6982
|
|
|
|
|
|
|
Input:
|
6983
|
|
|
|
|
|
|
|
6984
|
|
|
|
|
|
|
$value -> Word2vec window size
|
6985
|
|
|
|
|
|
|
|
6986
|
|
|
|
|
|
|
Output:
|
6987
|
|
|
|
|
|
|
|
6988
|
|
|
|
|
|
|
None
|
6989
|
|
|
|
|
|
|
|
6990
|
|
|
|
|
|
|
Example:
|
6991
|
|
|
|
|
|
|
|
6992
|
|
|
|
|
|
|
use Word2vec::Interface;
|
6993
|
|
|
|
|
|
|
|
6994
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
6995
|
|
|
|
|
|
|
$interface->W2VSetWindowSize( 8 );
|
6996
|
|
|
|
|
|
|
|
6997
|
|
|
|
|
|
|
undef( $interface );
|
6998
|
|
|
|
|
|
|
|
6999
|
|
|
|
|
|
|
=head3 W2VSetSample
|
7000
|
|
|
|
|
|
|
|
7001
|
|
|
|
|
|
|
Description:
|
7002
|
|
|
|
|
|
|
|
7003
|
|
|
|
|
|
|
Sets member variable to integer parameter. Sets word2vec sample size.
|
7004
|
|
|
|
|
|
|
|
7005
|
|
|
|
|
|
|
Input:
|
7006
|
|
|
|
|
|
|
|
7007
|
|
|
|
|
|
|
$value -> Word2vec sample size
|
7008
|
|
|
|
|
|
|
|
7009
|
|
|
|
|
|
|
Output:
|
7010
|
|
|
|
|
|
|
|
7011
|
|
|
|
|
|
|
None
|
7012
|
|
|
|
|
|
|
|
7013
|
|
|
|
|
|
|
Example:
|
7014
|
|
|
|
|
|
|
|
7015
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7016
|
|
|
|
|
|
|
|
7017
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7018
|
|
|
|
|
|
|
$interface->W2VSetSample( 3 );
|
7019
|
|
|
|
|
|
|
|
7020
|
|
|
|
|
|
|
undef( $interface );
|
7021
|
|
|
|
|
|
|
|
7022
|
|
|
|
|
|
|
=head3 W2VSetHSoftMax
|
7023
|
|
|
|
|
|
|
|
7024
|
|
|
|
|
|
|
Description:
|
7025
|
|
|
|
|
|
|
|
7026
|
|
|
|
|
|
|
Sets member variable to integer parameter. Sets word2vec HSoftMax value.
|
7027
|
|
|
|
|
|
|
|
7028
|
|
|
|
|
|
|
Input:
|
7029
|
|
|
|
|
|
|
|
7030
|
|
|
|
|
|
|
$value -> Word2vec HSoftMax size
|
7031
|
|
|
|
|
|
|
|
7032
|
|
|
|
|
|
|
Output:
|
7033
|
|
|
|
|
|
|
|
7034
|
|
|
|
|
|
|
None
|
7035
|
|
|
|
|
|
|
|
7036
|
|
|
|
|
|
|
Example:
|
7037
|
|
|
|
|
|
|
|
7038
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7039
|
|
|
|
|
|
|
|
7040
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7041
|
|
|
|
|
|
|
$interface->W2VSetHSoftMax( 12 );
|
7042
|
|
|
|
|
|
|
|
7043
|
|
|
|
|
|
|
undef( $interface );
|
7044
|
|
|
|
|
|
|
|
7045
|
|
|
|
|
|
|
=head3 W2VSetNegative
|
7046
|
|
|
|
|
|
|
|
7047
|
|
|
|
|
|
|
Description:
|
7048
|
|
|
|
|
|
|
|
7049
|
|
|
|
|
|
|
Sets member variable to integer parameter. Sets word2vec negative value.
|
7050
|
|
|
|
|
|
|
|
7051
|
|
|
|
|
|
|
Input:
|
7052
|
|
|
|
|
|
|
|
7053
|
|
|
|
|
|
|
$value -> Word2vec negative value
|
7054
|
|
|
|
|
|
|
|
7055
|
|
|
|
|
|
|
Output:
|
7056
|
|
|
|
|
|
|
|
7057
|
|
|
|
|
|
|
None
|
7058
|
|
|
|
|
|
|
|
7059
|
|
|
|
|
|
|
Example:
|
7060
|
|
|
|
|
|
|
|
7061
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7062
|
|
|
|
|
|
|
|
7063
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7064
|
|
|
|
|
|
|
$interface->W2VSetNegative( 12 );
|
7065
|
|
|
|
|
|
|
|
7066
|
|
|
|
|
|
|
undef( $interface );
|
7067
|
|
|
|
|
|
|
|
7068
|
|
|
|
|
|
|
=head3 W2VSetNumOfThreads
|
7069
|
|
|
|
|
|
|
|
7070
|
|
|
|
|
|
|
Description:
|
7071
|
|
|
|
|
|
|
|
7072
|
|
|
|
|
|
|
Sets member variable to integer parameter. Sets word2vec number of training threads to specified value.
|
7073
|
|
|
|
|
|
|
|
7074
|
|
|
|
|
|
|
Input:
|
7075
|
|
|
|
|
|
|
|
7076
|
|
|
|
|
|
|
$value -> Word2vec number of threads value
|
7077
|
|
|
|
|
|
|
|
7078
|
|
|
|
|
|
|
Output:
|
7079
|
|
|
|
|
|
|
|
7080
|
|
|
|
|
|
|
None
|
7081
|
|
|
|
|
|
|
|
7082
|
|
|
|
|
|
|
Example:
|
7083
|
|
|
|
|
|
|
|
7084
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7085
|
|
|
|
|
|
|
|
7086
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7087
|
|
|
|
|
|
|
$interface->W2VSetNumOfThreads( 12 );
|
7088
|
|
|
|
|
|
|
|
7089
|
|
|
|
|
|
|
undef( $interface );
|
7090
|
|
|
|
|
|
|
|
7091
|
|
|
|
|
|
|
=head3 W2VSetNumOfIterations
|
7092
|
|
|
|
|
|
|
|
7093
|
|
|
|
|
|
|
Description:
|
7094
|
|
|
|
|
|
|
|
7095
|
|
|
|
|
|
|
Sets member variable to integer parameter. Sets word2vec iterations value.
|
7096
|
|
|
|
|
|
|
|
7097
|
|
|
|
|
|
|
Input:
|
7098
|
|
|
|
|
|
|
|
7099
|
|
|
|
|
|
|
$value -> Word2vec number of iterations value
|
7100
|
|
|
|
|
|
|
|
7101
|
|
|
|
|
|
|
Output:
|
7102
|
|
|
|
|
|
|
|
7103
|
|
|
|
|
|
|
None
|
7104
|
|
|
|
|
|
|
|
7105
|
|
|
|
|
|
|
Example:
|
7106
|
|
|
|
|
|
|
|
7107
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7108
|
|
|
|
|
|
|
|
7109
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7110
|
|
|
|
|
|
|
$interface->W2VSetNumOfIterations( 12 );
|
7111
|
|
|
|
|
|
|
|
7112
|
|
|
|
|
|
|
undef( $interface );
|
7113
|
|
|
|
|
|
|
|
7114
|
|
|
|
|
|
|
=head3 W2VSetMinCount
|
7115
|
|
|
|
|
|
|
|
7116
|
|
|
|
|
|
|
Description:
|
7117
|
|
|
|
|
|
|
|
7118
|
|
|
|
|
|
|
Sets member variable to integer parameter. Sets word2vec min-count value.
|
7119
|
|
|
|
|
|
|
|
7120
|
|
|
|
|
|
|
Input:
|
7121
|
|
|
|
|
|
|
|
7122
|
|
|
|
|
|
|
$value -> Word2vec min-count value
|
7123
|
|
|
|
|
|
|
|
7124
|
|
|
|
|
|
|
Output:
|
7125
|
|
|
|
|
|
|
|
7126
|
|
|
|
|
|
|
None
|
7127
|
|
|
|
|
|
|
|
7128
|
|
|
|
|
|
|
Example:
|
7129
|
|
|
|
|
|
|
|
7130
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7131
|
|
|
|
|
|
|
|
7132
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7133
|
|
|
|
|
|
|
$interface->W2VSetMinCount( 7 );
|
7134
|
|
|
|
|
|
|
|
7135
|
|
|
|
|
|
|
undef( $interface );
|
7136
|
|
|
|
|
|
|
|
7137
|
|
|
|
|
|
|
=head3 W2VSetAlpha
|
7138
|
|
|
|
|
|
|
|
7139
|
|
|
|
|
|
|
Description:
|
7140
|
|
|
|
|
|
|
|
7141
|
|
|
|
|
|
|
Sets member variable to float parameter. Sets word2vec alpha value.
|
7142
|
|
|
|
|
|
|
|
7143
|
|
|
|
|
|
|
Input:
|
7144
|
|
|
|
|
|
|
|
7145
|
|
|
|
|
|
|
$value -> Word2vec alpha value. (Float)
|
7146
|
|
|
|
|
|
|
|
7147
|
|
|
|
|
|
|
Output:
|
7148
|
|
|
|
|
|
|
|
7149
|
|
|
|
|
|
|
None
|
7150
|
|
|
|
|
|
|
|
7151
|
|
|
|
|
|
|
Example:
|
7152
|
|
|
|
|
|
|
|
7153
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7154
|
|
|
|
|
|
|
|
7155
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7156
|
|
|
|
|
|
|
$interface->SetAlpha( 0.0012 );
|
7157
|
|
|
|
|
|
|
|
7158
|
|
|
|
|
|
|
undef( $interface );
|
7159
|
|
|
|
|
|
|
|
7160
|
|
|
|
|
|
|
=head3 W2VSetClasses
|
7161
|
|
|
|
|
|
|
|
7162
|
|
|
|
|
|
|
Description:
|
7163
|
|
|
|
|
|
|
|
7164
|
|
|
|
|
|
|
Sets member variable to integer parameter. Sets word2vec classes value.
|
7165
|
|
|
|
|
|
|
|
7166
|
|
|
|
|
|
|
Input:
|
7167
|
|
|
|
|
|
|
|
7168
|
|
|
|
|
|
|
$value -> Word2vec classes value.
|
7169
|
|
|
|
|
|
|
|
7170
|
|
|
|
|
|
|
Output:
|
7171
|
|
|
|
|
|
|
|
7172
|
|
|
|
|
|
|
None
|
7173
|
|
|
|
|
|
|
|
7174
|
|
|
|
|
|
|
Example:
|
7175
|
|
|
|
|
|
|
|
7176
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7177
|
|
|
|
|
|
|
|
7178
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7179
|
|
|
|
|
|
|
$interface->W2VSetClasses( 0 );
|
7180
|
|
|
|
|
|
|
|
7181
|
|
|
|
|
|
|
undef( $interface );
|
7182
|
|
|
|
|
|
|
|
7183
|
|
|
|
|
|
|
=head3 W2VSetDebugTraining
|
7184
|
|
|
|
|
|
|
|
7185
|
|
|
|
|
|
|
Description:
|
7186
|
|
|
|
|
|
|
|
7187
|
|
|
|
|
|
|
Sets member variable to integer parameter. Sets word2vec debug parameter value.
|
7188
|
|
|
|
|
|
|
|
7189
|
|
|
|
|
|
|
Input:
|
7190
|
|
|
|
|
|
|
|
7191
|
|
|
|
|
|
|
$value -> Word2vec debug training value.
|
7192
|
|
|
|
|
|
|
|
7193
|
|
|
|
|
|
|
Output:
|
7194
|
|
|
|
|
|
|
|
7195
|
|
|
|
|
|
|
None
|
7196
|
|
|
|
|
|
|
|
7197
|
|
|
|
|
|
|
Example:
|
7198
|
|
|
|
|
|
|
|
7199
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7200
|
|
|
|
|
|
|
|
7201
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7202
|
|
|
|
|
|
|
$interface->W2VSetDebugTraining( 0 );
|
7203
|
|
|
|
|
|
|
|
7204
|
|
|
|
|
|
|
undef( $interface );
|
7205
|
|
|
|
|
|
|
|
7206
|
|
|
|
|
|
|
=head3 W2VSetBinaryOutput
|
7207
|
|
|
|
|
|
|
|
7208
|
|
|
|
|
|
|
Description:
|
7209
|
|
|
|
|
|
|
|
7210
|
|
|
|
|
|
|
Sets member variable to integer parameter. Sets word2vec binary parameter value.
|
7211
|
|
|
|
|
|
|
|
7212
|
|
|
|
|
|
|
Input:
|
7213
|
|
|
|
|
|
|
|
7214
|
|
|
|
|
|
|
$value -> Word2vec binary output mode value. ( '1' = Binary Output / '0' = Plain Text )
|
7215
|
|
|
|
|
|
|
|
7216
|
|
|
|
|
|
|
Output:
|
7217
|
|
|
|
|
|
|
|
7218
|
|
|
|
|
|
|
None
|
7219
|
|
|
|
|
|
|
|
7220
|
|
|
|
|
|
|
Example:
|
7221
|
|
|
|
|
|
|
|
7222
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7223
|
|
|
|
|
|
|
|
7224
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7225
|
|
|
|
|
|
|
$interface->W2VSetBinaryOutput( 1 );
|
7226
|
|
|
|
|
|
|
|
7227
|
|
|
|
|
|
|
undef( $interface );
|
7228
|
|
|
|
|
|
|
|
7229
|
|
|
|
|
|
|
=head3 W2VSetSaveVocabFilePath
|
7230
|
|
|
|
|
|
|
|
7231
|
|
|
|
|
|
|
Description:
|
7232
|
|
|
|
|
|
|
|
7233
|
|
|
|
|
|
|
Sets member variable to string parameter. Sets word2vec save vocabulary file name.
|
7234
|
|
|
|
|
|
|
|
7235
|
|
|
|
|
|
|
Input:
|
7236
|
|
|
|
|
|
|
|
7237
|
|
|
|
|
|
|
$string -> Word2vec save vocabulary file name and path.
|
7238
|
|
|
|
|
|
|
|
7239
|
|
|
|
|
|
|
Output:
|
7240
|
|
|
|
|
|
|
|
7241
|
|
|
|
|
|
|
None
|
7242
|
|
|
|
|
|
|
|
7243
|
|
|
|
|
|
|
Example:
|
7244
|
|
|
|
|
|
|
|
7245
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7246
|
|
|
|
|
|
|
|
7247
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7248
|
|
|
|
|
|
|
$interface->W2VSetSaveVocabFilePath( "samples/vocab.txt" );
|
7249
|
|
|
|
|
|
|
|
7250
|
|
|
|
|
|
|
undef( $interface );
|
7251
|
|
|
|
|
|
|
|
7252
|
|
|
|
|
|
|
=head3 W2VSetReadVocabFilePath
|
7253
|
|
|
|
|
|
|
|
7254
|
|
|
|
|
|
|
Description:
|
7255
|
|
|
|
|
|
|
|
7256
|
|
|
|
|
|
|
Sets member variable to string parameter. Sets word2vec read vocabulary file name.
|
7257
|
|
|
|
|
|
|
|
7258
|
|
|
|
|
|
|
Input:
|
7259
|
|
|
|
|
|
|
|
7260
|
|
|
|
|
|
|
$string -> Word2vec read vocabulary file name and path.
|
7261
|
|
|
|
|
|
|
|
7262
|
|
|
|
|
|
|
Output:
|
7263
|
|
|
|
|
|
|
|
7264
|
|
|
|
|
|
|
None
|
7265
|
|
|
|
|
|
|
|
7266
|
|
|
|
|
|
|
Example:
|
7267
|
|
|
|
|
|
|
|
7268
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7269
|
|
|
|
|
|
|
|
7270
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7271
|
|
|
|
|
|
|
$interface->W2VSetReadVocabFilePath( "samples/vocab.txt" );
|
7272
|
|
|
|
|
|
|
|
7273
|
|
|
|
|
|
|
undef( $interface );
|
7274
|
|
|
|
|
|
|
|
7275
|
|
|
|
|
|
|
=head3 W2VSetUseCBOW
|
7276
|
|
|
|
|
|
|
|
7277
|
|
|
|
|
|
|
Description:
|
7278
|
|
|
|
|
|
|
|
7279
|
|
|
|
|
|
|
Sets member variable to integer parameter. Sets word2vec CBOW parameter value.
|
7280
|
|
|
|
|
|
|
|
7281
|
|
|
|
|
|
|
Input:
|
7282
|
|
|
|
|
|
|
|
7283
|
|
|
|
|
|
|
$value -> Word2vec CBOW mode value.
|
7284
|
|
|
|
|
|
|
|
7285
|
|
|
|
|
|
|
Output:
|
7286
|
|
|
|
|
|
|
|
7287
|
|
|
|
|
|
|
None
|
7288
|
|
|
|
|
|
|
|
7289
|
|
|
|
|
|
|
Example:
|
7290
|
|
|
|
|
|
|
|
7291
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7292
|
|
|
|
|
|
|
|
7293
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7294
|
|
|
|
|
|
|
$interface->W2VSetUseCBOW( 1 );
|
7295
|
|
|
|
|
|
|
|
7296
|
|
|
|
|
|
|
undef( $interface );
|
7297
|
|
|
|
|
|
|
|
7298
|
|
|
|
|
|
|
=head3 W2VSetWorkingDir
|
7299
|
|
|
|
|
|
|
|
7300
|
|
|
|
|
|
|
Description:
|
7301
|
|
|
|
|
|
|
|
7302
|
|
|
|
|
|
|
Sets member variable to string parameter. Sets working directory.
|
7303
|
|
|
|
|
|
|
|
7304
|
|
|
|
|
|
|
Input:
|
7305
|
|
|
|
|
|
|
|
7306
|
|
|
|
|
|
|
$string -> Working directory
|
7307
|
|
|
|
|
|
|
|
7308
|
|
|
|
|
|
|
Output:
|
7309
|
|
|
|
|
|
|
|
7310
|
|
|
|
|
|
|
None
|
7311
|
|
|
|
|
|
|
|
7312
|
|
|
|
|
|
|
Example:
|
7313
|
|
|
|
|
|
|
|
7314
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7315
|
|
|
|
|
|
|
|
7316
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7317
|
|
|
|
|
|
|
$interface->W2VSetWorkingDir( "/samples" );
|
7318
|
|
|
|
|
|
|
|
7319
|
|
|
|
|
|
|
undef( $interface );
|
7320
|
|
|
|
|
|
|
|
7321
|
|
|
|
|
|
|
=head3 W2VSetWord2VecExeDir
|
7322
|
|
|
|
|
|
|
|
7323
|
|
|
|
|
|
|
Description:
|
7324
|
|
|
|
|
|
|
|
7325
|
|
|
|
|
|
|
Sets member variable to string parameter. Sets word2vec executable file directory.
|
7326
|
|
|
|
|
|
|
|
7327
|
|
|
|
|
|
|
Input:
|
7328
|
|
|
|
|
|
|
|
7329
|
|
|
|
|
|
|
$string -> Word2vec directory
|
7330
|
|
|
|
|
|
|
|
7331
|
|
|
|
|
|
|
Output:
|
7332
|
|
|
|
|
|
|
|
7333
|
|
|
|
|
|
|
None
|
7334
|
|
|
|
|
|
|
|
7335
|
|
|
|
|
|
|
Example:
|
7336
|
|
|
|
|
|
|
|
7337
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7338
|
|
|
|
|
|
|
|
7339
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7340
|
|
|
|
|
|
|
$interface->W2VSetWord2VecExeDir( "/word2vec" );
|
7341
|
|
|
|
|
|
|
|
7342
|
|
|
|
|
|
|
undef( $interface );
|
7343
|
|
|
|
|
|
|
|
7344
|
|
|
|
|
|
|
=head3 W2VSetVocabularyHash
|
7345
|
|
|
|
|
|
|
|
7346
|
|
|
|
|
|
|
Description:
|
7347
|
|
|
|
|
|
|
|
7348
|
|
|
|
|
|
|
Sets vocabulary/dictionary hash reference to hash reference parameter.
|
7349
|
|
|
|
|
|
|
|
7350
|
|
|
|
|
|
|
Warning: This will overwrite any existing vocabulary/dictionary data in memory.
|
7351
|
|
|
|
|
|
|
|
7352
|
|
|
|
|
|
|
Input:
|
7353
|
|
|
|
|
|
|
|
7354
|
|
|
|
|
|
|
$hashReference -> Vocabulary/Dictionary hash reference of word2vec word vectors.
|
7355
|
|
|
|
|
|
|
|
7356
|
|
|
|
|
|
|
Output:
|
7357
|
|
|
|
|
|
|
|
7358
|
|
|
|
|
|
|
None
|
7359
|
|
|
|
|
|
|
|
7360
|
|
|
|
|
|
|
Example:
|
7361
|
|
|
|
|
|
|
|
7362
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7363
|
|
|
|
|
|
|
|
7364
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7365
|
|
|
|
|
|
|
$interface->W2VReadTrainedVectorDataFromFile( "samples/samplevectors.bin" );
|
7366
|
|
|
|
|
|
|
my $vocabularyHasReference = $interface->W2VGetVocabularyHash();
|
7367
|
|
|
|
|
|
|
$interface->W2VSetVocabularyHash( $vocabularyHasReference );
|
7368
|
|
|
|
|
|
|
|
7369
|
|
|
|
|
|
|
undef( $interface );
|
7370
|
|
|
|
|
|
|
|
7371
|
|
|
|
|
|
|
=head3 W2VClearVocabularyHash
|
7372
|
|
|
|
|
|
|
|
7373
|
|
|
|
|
|
|
Description:
|
7374
|
|
|
|
|
|
|
|
7375
|
|
|
|
|
|
|
Clears vocabulary/dictionary hash.
|
7376
|
|
|
|
|
|
|
|
7377
|
|
|
|
|
|
|
Input:
|
7378
|
|
|
|
|
|
|
|
7379
|
|
|
|
|
|
|
None
|
7380
|
|
|
|
|
|
|
|
7381
|
|
|
|
|
|
|
Output:
|
7382
|
|
|
|
|
|
|
|
7383
|
|
|
|
|
|
|
None
|
7384
|
|
|
|
|
|
|
|
7385
|
|
|
|
|
|
|
Example:
|
7386
|
|
|
|
|
|
|
|
7387
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7388
|
|
|
|
|
|
|
|
7389
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7390
|
|
|
|
|
|
|
$interface->W2VClearVocabularyHash();
|
7391
|
|
|
|
|
|
|
|
7392
|
|
|
|
|
|
|
undef( $interface );
|
7393
|
|
|
|
|
|
|
|
7394
|
|
|
|
|
|
|
=head3 W2VAddWordVectorToVocabHash
|
7395
|
|
|
|
|
|
|
|
7396
|
|
|
|
|
|
|
Description:
|
7397
|
|
|
|
|
|
|
|
7398
|
|
|
|
|
|
|
Adds word vector string to vocabulary/dictionary.
|
7399
|
|
|
|
|
|
|
|
7400
|
|
|
|
|
|
|
Input:
|
7401
|
|
|
|
|
|
|
|
7402
|
|
|
|
|
|
|
$string -> Word2vec word vector string
|
7403
|
|
|
|
|
|
|
|
7404
|
|
|
|
|
|
|
Output:
|
7405
|
|
|
|
|
|
|
|
7406
|
|
|
|
|
|
|
None
|
7407
|
|
|
|
|
|
|
|
7408
|
|
|
|
|
|
|
Example:
|
7409
|
|
|
|
|
|
|
|
7410
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7411
|
|
|
|
|
|
|
|
7412
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7413
|
|
|
|
|
|
|
|
7414
|
|
|
|
|
|
|
# Note: This is representational data of word2vec's word vector format and not actual data.
|
7415
|
|
|
|
|
|
|
$interface->W2VAddWordVectorToVocabHash( "of 0.4346 -0.1235 0.5789 0.2347 -0.0056 -0.0001" );
|
7416
|
|
|
|
|
|
|
|
7417
|
|
|
|
|
|
|
undef( $interface );
|
7418
|
|
|
|
|
|
|
|
7419
|
|
|
|
|
|
|
=head3 W2VSetOverwriteOldFile
|
7420
|
|
|
|
|
|
|
|
7421
|
|
|
|
|
|
|
Description:
|
7422
|
|
|
|
|
|
|
|
7423
|
|
|
|
|
|
|
Sets member variable to integer parameter. Enables overwriting output file if one already exists.
|
7424
|
|
|
|
|
|
|
|
7425
|
|
|
|
|
|
|
Input:
|
7426
|
|
|
|
|
|
|
|
7427
|
|
|
|
|
|
|
$value -> '1' = Overwrite exiting file / '0' = Graceful termination when file with same name exists
|
7428
|
|
|
|
|
|
|
|
7429
|
|
|
|
|
|
|
Output:
|
7430
|
|
|
|
|
|
|
|
7431
|
|
|
|
|
|
|
None
|
7432
|
|
|
|
|
|
|
|
7433
|
|
|
|
|
|
|
Example:
|
7434
|
|
|
|
|
|
|
|
7435
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7436
|
|
|
|
|
|
|
|
7437
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7438
|
|
|
|
|
|
|
$interface->W2VSetOverwriteOldFile( 1 );
|
7439
|
|
|
|
|
|
|
|
7440
|
|
|
|
|
|
|
undef( $interface );
|
7441
|
|
|
|
|
|
|
|
7442
|
|
|
|
|
|
|
=head2 Word2Phrase Main Functions
|
7443
|
|
|
|
|
|
|
|
7444
|
|
|
|
|
|
|
=head3 W2PExecuteTraining
|
7445
|
|
|
|
|
|
|
|
7446
|
|
|
|
|
|
|
Description:
|
7447
|
|
|
|
|
|
|
|
7448
|
|
|
|
|
|
|
Executes word2phrase training based on parameters. Parameter variables have higher precedence than member variables.
|
7449
|
|
|
|
|
|
|
Any parameter specified will override its respective member variable.
|
7450
|
|
|
|
|
|
|
|
7451
|
|
|
|
|
|
|
Note: If no parameters are specified, this module executes word2phrase training based on preset member
|
7452
|
|
|
|
|
|
|
variables. Returns string regarding training status.
|
7453
|
|
|
|
|
|
|
|
7454
|
|
|
|
|
|
|
Input:
|
7455
|
|
|
|
|
|
|
|
7456
|
|
|
|
|
|
|
$trainFilePath -> Training text corpus file path
|
7457
|
|
|
|
|
|
|
$outputFilePath -> Vector binary file path
|
7458
|
|
|
|
|
|
|
$minCount -> Minimum bi-gram frequency (Positive Integer)
|
7459
|
|
|
|
|
|
|
$threshold -> Maximum bi-gram frequency (Positive Integer)
|
7460
|
|
|
|
|
|
|
$debug -> Displays word2phrase debug information during training. (0 = None, 1 = Show Debug Information, 2 = Show Even More Debug Information)
|
7461
|
|
|
|
|
|
|
$overwrite -> Overwrites old training file when executing training. (0 = False / 1 = True)
|
7462
|
|
|
|
|
|
|
|
7463
|
|
|
|
|
|
|
Output:
|
7464
|
|
|
|
|
|
|
|
7465
|
|
|
|
|
|
|
$value -> '0' = Successful / '-1' = Un-successful
|
7466
|
|
|
|
|
|
|
|
7467
|
|
|
|
|
|
|
Example:
|
7468
|
|
|
|
|
|
|
|
7469
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7470
|
|
|
|
|
|
|
|
7471
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7472
|
|
|
|
|
|
|
$interface->W2PSetMinCount( 12 );
|
7473
|
|
|
|
|
|
|
$interface->W2PSetMaxCount( 20 );
|
7474
|
|
|
|
|
|
|
$interface->W2PSetTrainFilePath( "textCorpus.txt" );
|
7475
|
|
|
|
|
|
|
$interface->W2PSetOutputFilePath( "phraseTextCorpus.txt" );
|
7476
|
|
|
|
|
|
|
$interface->W2PExecuteTraining();
|
7477
|
|
|
|
|
|
|
undef( $interface );
|
7478
|
|
|
|
|
|
|
|
7479
|
|
|
|
|
|
|
# Or
|
7480
|
|
|
|
|
|
|
|
7481
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7482
|
|
|
|
|
|
|
|
7483
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7484
|
|
|
|
|
|
|
$interface->W2PExecuteTraining( "textCorpus.txt", "phraseTextCorpus.txt", 12, 20, 2, 1 );
|
7485
|
|
|
|
|
|
|
undef( $interface );
|
7486
|
|
|
|
|
|
|
|
7487
|
|
|
|
|
|
|
=head3 W2PExecuteStringTraining
|
7488
|
|
|
|
|
|
|
|
7489
|
|
|
|
|
|
|
Description:
|
7490
|
|
|
|
|
|
|
|
7491
|
|
|
|
|
|
|
Executes word2phrase training based on parameters. Parameter variables have higher precedence than member variables.
|
7492
|
|
|
|
|
|
|
Any parameter specified will override its respective member variable.
|
7493
|
|
|
|
|
|
|
|
7494
|
|
|
|
|
|
|
Note: If no parameters are specified, this module executes word2phrase training based on preset member
|
7495
|
|
|
|
|
|
|
variables. Returns string regarding training status.
|
7496
|
|
|
|
|
|
|
|
7497
|
|
|
|
|
|
|
Input:
|
7498
|
|
|
|
|
|
|
|
7499
|
|
|
|
|
|
|
$trainingString -> String to train
|
7500
|
|
|
|
|
|
|
$outputFilePath -> Vector binary file path
|
7501
|
|
|
|
|
|
|
$minCount -> Minimum bi-gram frequency (Positive Integer)
|
7502
|
|
|
|
|
|
|
$threshold -> Maximum bi-gram frequency (Positive Integer)
|
7503
|
|
|
|
|
|
|
$debug -> Displays word2phrase debug information during training. (0 = None, 1 = Show Debug Information, 2 = Show Even More Debug Information)
|
7504
|
|
|
|
|
|
|
$overwrite -> Overwrites old training file when executing training. (0 = False / 1 = True)
|
7505
|
|
|
|
|
|
|
|
7506
|
|
|
|
|
|
|
Output:
|
7507
|
|
|
|
|
|
|
|
7508
|
|
|
|
|
|
|
$value -> '0' = Successful / '-1' = Un-successful
|
7509
|
|
|
|
|
|
|
|
7510
|
|
|
|
|
|
|
Example:
|
7511
|
|
|
|
|
|
|
|
7512
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7513
|
|
|
|
|
|
|
|
7514
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7515
|
|
|
|
|
|
|
$interface->W2PSetMinCount( 12 );
|
7516
|
|
|
|
|
|
|
$interface->W2PSetMaxCount( 20 );
|
7517
|
|
|
|
|
|
|
$interface->W2PSetTrainFilePath( "large string to train here" );
|
7518
|
|
|
|
|
|
|
$interface->W2PSetOutputFilePath( "phraseTextCorpus.txt" );
|
7519
|
|
|
|
|
|
|
$interface->W2PExecuteTraining();
|
7520
|
|
|
|
|
|
|
undef( $interface );
|
7521
|
|
|
|
|
|
|
|
7522
|
|
|
|
|
|
|
# Or
|
7523
|
|
|
|
|
|
|
|
7524
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7525
|
|
|
|
|
|
|
|
7526
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7527
|
|
|
|
|
|
|
$interface->W2PExecuteTraining( "large string to train here", "phraseTextCorpus.txt", 12, 20, 2, 1 );
|
7528
|
|
|
|
|
|
|
undef( $interface );
|
7529
|
|
|
|
|
|
|
|
7530
|
|
|
|
|
|
|
=head2 Word2Phrase Accessor Functions
|
7531
|
|
|
|
|
|
|
|
7532
|
|
|
|
|
|
|
=head3 W2PGetDebugLog
|
7533
|
|
|
|
|
|
|
|
7534
|
|
|
|
|
|
|
Description:
|
7535
|
|
|
|
|
|
|
|
7536
|
|
|
|
|
|
|
Returns the _debugLog member variable set during Word2vec::Interface object initialization of new function.
|
7537
|
|
|
|
|
|
|
|
7538
|
|
|
|
|
|
|
Input:
|
7539
|
|
|
|
|
|
|
|
7540
|
|
|
|
|
|
|
None
|
7541
|
|
|
|
|
|
|
|
7542
|
|
|
|
|
|
|
Output:
|
7543
|
|
|
|
|
|
|
|
7544
|
|
|
|
|
|
|
$value -> 0 = False, 1 = True
|
7545
|
|
|
|
|
|
|
|
7546
|
|
|
|
|
|
|
Example:
|
7547
|
|
|
|
|
|
|
|
7548
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7549
|
|
|
|
|
|
|
|
7550
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7551
|
|
|
|
|
|
|
my $debugLog = $interface->W2PGetDebugLog();
|
7552
|
|
|
|
|
|
|
|
7553
|
|
|
|
|
|
|
print( "Debug Logging Enabled\n" ) if $debugLog == 1;
|
7554
|
|
|
|
|
|
|
print( "Debug Logging Disabled\n" ) if $debugLog == 0;
|
7555
|
|
|
|
|
|
|
|
7556
|
|
|
|
|
|
|
undef( $interface );
|
7557
|
|
|
|
|
|
|
|
7558
|
|
|
|
|
|
|
=head3 W2PGetWriteLog
|
7559
|
|
|
|
|
|
|
|
7560
|
|
|
|
|
|
|
Description:
|
7561
|
|
|
|
|
|
|
|
7562
|
|
|
|
|
|
|
Returns the _writeLog member variable set during Word2vec::Interface object initialization of new function.
|
7563
|
|
|
|
|
|
|
|
7564
|
|
|
|
|
|
|
Input:
|
7565
|
|
|
|
|
|
|
|
7566
|
|
|
|
|
|
|
None
|
7567
|
|
|
|
|
|
|
|
7568
|
|
|
|
|
|
|
Output:
|
7569
|
|
|
|
|
|
|
|
7570
|
|
|
|
|
|
|
$value -> 0 = False, 1 = True
|
7571
|
|
|
|
|
|
|
|
7572
|
|
|
|
|
|
|
Example:
|
7573
|
|
|
|
|
|
|
|
7574
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7575
|
|
|
|
|
|
|
|
7576
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7577
|
|
|
|
|
|
|
my $writeLog = $interface->W2PGetWriteLog();
|
7578
|
|
|
|
|
|
|
|
7579
|
|
|
|
|
|
|
print( "Write Logging Enabled\n" ) if $writeLog == 1;
|
7580
|
|
|
|
|
|
|
print( "Write Logging Disabled\n" ) if $writeLog == 0;
|
7581
|
|
|
|
|
|
|
|
7582
|
|
|
|
|
|
|
undef( $interface );
|
7583
|
|
|
|
|
|
|
|
7584
|
|
|
|
|
|
|
=head3 W2PGetFileHandle
|
7585
|
|
|
|
|
|
|
|
7586
|
|
|
|
|
|
|
Description:
|
7587
|
|
|
|
|
|
|
|
7588
|
|
|
|
|
|
|
Returns file handle used by word2phrase::WriteLog() method.
|
7589
|
|
|
|
|
|
|
|
7590
|
|
|
|
|
|
|
Input:
|
7591
|
|
|
|
|
|
|
|
7592
|
|
|
|
|
|
|
None
|
7593
|
|
|
|
|
|
|
|
7594
|
|
|
|
|
|
|
Output:
|
7595
|
|
|
|
|
|
|
|
7596
|
|
|
|
|
|
|
$fileHandle -> Returns file handle blob used by 'WriteLog()' function or undefined.
|
7597
|
|
|
|
|
|
|
|
7598
|
|
|
|
|
|
|
Example:
|
7599
|
|
|
|
|
|
|
|
7600
|
|
|
|
|
|
|
|
7601
|
|
|
|
|
|
|
|
7602
|
|
|
|
|
|
|
=head3 W2PGetTrainFilePath
|
7603
|
|
|
|
|
|
|
|
7604
|
|
|
|
|
|
|
Description:
|
7605
|
|
|
|
|
|
|
|
7606
|
|
|
|
|
|
|
Returns (string) training file path.
|
7607
|
|
|
|
|
|
|
|
7608
|
|
|
|
|
|
|
Input:
|
7609
|
|
|
|
|
|
|
|
7610
|
|
|
|
|
|
|
None
|
7611
|
|
|
|
|
|
|
|
7612
|
|
|
|
|
|
|
Output:
|
7613
|
|
|
|
|
|
|
|
7614
|
|
|
|
|
|
|
$string -> word2phrase training file path
|
7615
|
|
|
|
|
|
|
|
7616
|
|
|
|
|
|
|
Example:
|
7617
|
|
|
|
|
|
|
|
7618
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7619
|
|
|
|
|
|
|
|
7620
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7621
|
|
|
|
|
|
|
my $filePath = $interface->W2PGetTrainFilePath();
|
7622
|
|
|
|
|
|
|
|
7623
|
|
|
|
|
|
|
print( "Output File Path: $filePath\n" ) if defined( $filePath );
|
7624
|
|
|
|
|
|
|
undef( $interface );
|
7625
|
|
|
|
|
|
|
|
7626
|
|
|
|
|
|
|
=head3 W2PGetOutputFilePath
|
7627
|
|
|
|
|
|
|
|
7628
|
|
|
|
|
|
|
Description:
|
7629
|
|
|
|
|
|
|
|
7630
|
|
|
|
|
|
|
Returns (string) output file path.
|
7631
|
|
|
|
|
|
|
|
7632
|
|
|
|
|
|
|
Input:
|
7633
|
|
|
|
|
|
|
|
7634
|
|
|
|
|
|
|
None
|
7635
|
|
|
|
|
|
|
|
7636
|
|
|
|
|
|
|
Output:
|
7637
|
|
|
|
|
|
|
|
7638
|
|
|
|
|
|
|
$string -> word2phrase output file path
|
7639
|
|
|
|
|
|
|
|
7640
|
|
|
|
|
|
|
Example:
|
7641
|
|
|
|
|
|
|
|
7642
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7643
|
|
|
|
|
|
|
|
7644
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7645
|
|
|
|
|
|
|
my $filePath = $interface->W2PGetOutputFilePath();
|
7646
|
|
|
|
|
|
|
|
7647
|
|
|
|
|
|
|
print( "Output File Path: $filePath\n" ) if defined( $filePath );
|
7648
|
|
|
|
|
|
|
undef( $interface );
|
7649
|
|
|
|
|
|
|
|
7650
|
|
|
|
|
|
|
=head3 W2PGetMinCount
|
7651
|
|
|
|
|
|
|
|
7652
|
|
|
|
|
|
|
Description:
|
7653
|
|
|
|
|
|
|
|
7654
|
|
|
|
|
|
|
Returns (integer) minimum bi-gram range.
|
7655
|
|
|
|
|
|
|
|
7656
|
|
|
|
|
|
|
Input:
|
7657
|
|
|
|
|
|
|
|
7658
|
|
|
|
|
|
|
None
|
7659
|
|
|
|
|
|
|
|
7660
|
|
|
|
|
|
|
Output:
|
7661
|
|
|
|
|
|
|
|
7662
|
|
|
|
|
|
|
$value -> Minimum bi-gram frequency (Positive Integer)
|
7663
|
|
|
|
|
|
|
|
7664
|
|
|
|
|
|
|
Example:
|
7665
|
|
|
|
|
|
|
|
7666
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7667
|
|
|
|
|
|
|
|
7668
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7669
|
|
|
|
|
|
|
my $mincount = $interface->W2PGetMinCount();
|
7670
|
|
|
|
|
|
|
|
7671
|
|
|
|
|
|
|
print( "MinCount: $mincount\n" ) if defined( $mincount );
|
7672
|
|
|
|
|
|
|
undef( $interface );
|
7673
|
|
|
|
|
|
|
|
7674
|
|
|
|
|
|
|
=head3 W2PGetThreshold
|
7675
|
|
|
|
|
|
|
|
7676
|
|
|
|
|
|
|
Description:
|
7677
|
|
|
|
|
|
|
|
7678
|
|
|
|
|
|
|
Returns (integer) maximum bi-gram range.
|
7679
|
|
|
|
|
|
|
|
7680
|
|
|
|
|
|
|
Input:
|
7681
|
|
|
|
|
|
|
|
7682
|
|
|
|
|
|
|
None
|
7683
|
|
|
|
|
|
|
|
7684
|
|
|
|
|
|
|
Output:
|
7685
|
|
|
|
|
|
|
|
7686
|
|
|
|
|
|
|
$value -> Maximum bi-gram frequency (Positive Integer)
|
7687
|
|
|
|
|
|
|
|
7688
|
|
|
|
|
|
|
Example:
|
7689
|
|
|
|
|
|
|
|
7690
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7691
|
|
|
|
|
|
|
|
7692
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7693
|
|
|
|
|
|
|
my $mincount = $interface->W2PGetThreshold();
|
7694
|
|
|
|
|
|
|
|
7695
|
|
|
|
|
|
|
print( "MinCount: $mincount\n" ) if defined( $mincount );
|
7696
|
|
|
|
|
|
|
undef( $interface );
|
7697
|
|
|
|
|
|
|
|
7698
|
|
|
|
|
|
|
=head3 W2PGetW2PDebug
|
7699
|
|
|
|
|
|
|
|
7700
|
|
|
|
|
|
|
Description:
|
7701
|
|
|
|
|
|
|
|
7702
|
|
|
|
|
|
|
Returns word2phrase debug parameter value.
|
7703
|
|
|
|
|
|
|
|
7704
|
|
|
|
|
|
|
Input:
|
7705
|
|
|
|
|
|
|
|
7706
|
|
|
|
|
|
|
None
|
7707
|
|
|
|
|
|
|
|
7708
|
|
|
|
|
|
|
Output:
|
7709
|
|
|
|
|
|
|
|
7710
|
|
|
|
|
|
|
$value -> 0 = No debugging, 1 = Show debugging, 2 = Show even more debugging
|
7711
|
|
|
|
|
|
|
|
7712
|
|
|
|
|
|
|
Example:
|
7713
|
|
|
|
|
|
|
|
7714
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7715
|
|
|
|
|
|
|
|
7716
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7717
|
|
|
|
|
|
|
my $interfacedebug = $interface->W2PGetW2PDebug();
|
7718
|
|
|
|
|
|
|
|
7719
|
|
|
|
|
|
|
print( "Word2Phrase Debug Level: $interfacedebug\n" ) if defined( $interfacedebug );
|
7720
|
|
|
|
|
|
|
|
7721
|
|
|
|
|
|
|
undef( $interface );
|
7722
|
|
|
|
|
|
|
|
7723
|
|
|
|
|
|
|
=head3 W2PGetWorkingDir
|
7724
|
|
|
|
|
|
|
|
7725
|
|
|
|
|
|
|
Description:
|
7726
|
|
|
|
|
|
|
|
7727
|
|
|
|
|
|
|
Returns (string) working directory path.
|
7728
|
|
|
|
|
|
|
|
7729
|
|
|
|
|
|
|
Input:
|
7730
|
|
|
|
|
|
|
|
7731
|
|
|
|
|
|
|
None
|
7732
|
|
|
|
|
|
|
|
7733
|
|
|
|
|
|
|
Output:
|
7734
|
|
|
|
|
|
|
|
7735
|
|
|
|
|
|
|
$string -> Current working directory path
|
7736
|
|
|
|
|
|
|
|
7737
|
|
|
|
|
|
|
Example:
|
7738
|
|
|
|
|
|
|
|
7739
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7740
|
|
|
|
|
|
|
|
7741
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7742
|
|
|
|
|
|
|
my $workingDir = $interface->W2PGetWorkingDir();
|
7743
|
|
|
|
|
|
|
|
7744
|
|
|
|
|
|
|
print( "Working Directory: $workingDir\n" ) if defined( $workingDir );
|
7745
|
|
|
|
|
|
|
|
7746
|
|
|
|
|
|
|
undef( $interface );
|
7747
|
|
|
|
|
|
|
|
7748
|
|
|
|
|
|
|
=head3 W2PGetWord2PhraseExeDir
|
7749
|
|
|
|
|
|
|
|
7750
|
|
|
|
|
|
|
Description:
|
7751
|
|
|
|
|
|
|
|
7752
|
|
|
|
|
|
|
Returns (string) word2phrase executable directory path.
|
7753
|
|
|
|
|
|
|
|
7754
|
|
|
|
|
|
|
Input:
|
7755
|
|
|
|
|
|
|
|
7756
|
|
|
|
|
|
|
None
|
7757
|
|
|
|
|
|
|
|
7758
|
|
|
|
|
|
|
Output:
|
7759
|
|
|
|
|
|
|
|
7760
|
|
|
|
|
|
|
$string -> Word2Phrase executable directory path
|
7761
|
|
|
|
|
|
|
|
7762
|
|
|
|
|
|
|
Example:
|
7763
|
|
|
|
|
|
|
|
7764
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7765
|
|
|
|
|
|
|
|
7766
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7767
|
|
|
|
|
|
|
my $workingDir = $interface->W2PGetWord2PhraseExeDir();
|
7768
|
|
|
|
|
|
|
|
7769
|
|
|
|
|
|
|
print( "Word2Phrase Executable Directory: $workingDir\n" ) if defined( $workingDir );
|
7770
|
|
|
|
|
|
|
|
7771
|
|
|
|
|
|
|
undef( $interface );
|
7772
|
|
|
|
|
|
|
|
7773
|
|
|
|
|
|
|
=head3 W2PGetOverwriteOldFile
|
7774
|
|
|
|
|
|
|
|
7775
|
|
|
|
|
|
|
Description:
|
7776
|
|
|
|
|
|
|
|
7777
|
|
|
|
|
|
|
Returns the current value of the overwrite training file variable.
|
7778
|
|
|
|
|
|
|
|
7779
|
|
|
|
|
|
|
Input:
|
7780
|
|
|
|
|
|
|
|
7781
|
|
|
|
|
|
|
None
|
7782
|
|
|
|
|
|
|
|
7783
|
|
|
|
|
|
|
Output:
|
7784
|
|
|
|
|
|
|
|
7785
|
|
|
|
|
|
|
$value -> 1 = True/Overwrite or 0 = False/Append to current file
|
7786
|
|
|
|
|
|
|
|
7787
|
|
|
|
|
|
|
Example:
|
7788
|
|
|
|
|
|
|
|
7789
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7790
|
|
|
|
|
|
|
|
7791
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7792
|
|
|
|
|
|
|
my $overwrite = $interface->W2PGetOverwriteOldFile();
|
7793
|
|
|
|
|
|
|
|
7794
|
|
|
|
|
|
|
if defined( $overwrite )
|
7795
|
|
|
|
|
|
|
{
|
7796
|
|
|
|
|
|
|
print( "Overwrite Old File: " );
|
7797
|
|
|
|
|
|
|
print( "Yes\n" ) if $overwrite == 1;
|
7798
|
|
|
|
|
|
|
print( "No\n" ) if $overwrite == 0;
|
7799
|
|
|
|
|
|
|
}
|
7800
|
|
|
|
|
|
|
|
7801
|
|
|
|
|
|
|
undef( $interface );
|
7802
|
|
|
|
|
|
|
|
7803
|
|
|
|
|
|
|
=head2 Word2Phrase Mutator Functions
|
7804
|
|
|
|
|
|
|
|
7805
|
|
|
|
|
|
|
=head3 W2PSetTrainFilePath
|
7806
|
|
|
|
|
|
|
|
7807
|
|
|
|
|
|
|
Description:
|
7808
|
|
|
|
|
|
|
|
7809
|
|
|
|
|
|
|
Sets training file path.
|
7810
|
|
|
|
|
|
|
|
7811
|
|
|
|
|
|
|
Input:
|
7812
|
|
|
|
|
|
|
|
7813
|
|
|
|
|
|
|
$string -> Training file path
|
7814
|
|
|
|
|
|
|
|
7815
|
|
|
|
|
|
|
Output:
|
7816
|
|
|
|
|
|
|
|
7817
|
|
|
|
|
|
|
None
|
7818
|
|
|
|
|
|
|
|
7819
|
|
|
|
|
|
|
Example:
|
7820
|
|
|
|
|
|
|
|
7821
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7822
|
|
|
|
|
|
|
|
7823
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7824
|
|
|
|
|
|
|
$interface->W2PSetTrainFilePath( "filePath" );
|
7825
|
|
|
|
|
|
|
|
7826
|
|
|
|
|
|
|
undef( $interface );
|
7827
|
|
|
|
|
|
|
|
7828
|
|
|
|
|
|
|
=head3 W2PSetOutputFilePath
|
7829
|
|
|
|
|
|
|
|
7830
|
|
|
|
|
|
|
Description:
|
7831
|
|
|
|
|
|
|
|
7832
|
|
|
|
|
|
|
Sets word2phrase output file path.
|
7833
|
|
|
|
|
|
|
|
7834
|
|
|
|
|
|
|
Input:
|
7835
|
|
|
|
|
|
|
|
7836
|
|
|
|
|
|
|
$string -> word2phrase output file path
|
7837
|
|
|
|
|
|
|
|
7838
|
|
|
|
|
|
|
Output:
|
7839
|
|
|
|
|
|
|
|
7840
|
|
|
|
|
|
|
None
|
7841
|
|
|
|
|
|
|
|
7842
|
|
|
|
|
|
|
Example:
|
7843
|
|
|
|
|
|
|
|
7844
|
|
|
|
|
|
|
use Word2vec::Interface;
|
7845
|
|
|
|
|
|
|
|
7846
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7847
|
|
|
|
|
|
|
$interface->W2PSetOutputFilePath( "filePath" );
|
7848
|
|
|
|
|
|
|
|
7849
|
|
|
|
|
|
|
undef( $interface );
|
7850
|
|
|
|
|
|
|
|
7851
|
|
|
|
|
|
|
=head3 W2PSetMinCount
|
7852
|
|
|
|
|
|
|
|
7853
|
|
|
|
|
|
|
Description:
|
7854
|
|
|
|
|
|
|
|
7855
|
|
|
|
|
|
|
Sets minimum range value.
|
7856
|
|
|
|
|
|
|
|
7857
|
|
|
|
|
|
|
Input:
|
7858
|
|
|
|
|
|
|
|
7859
|
|
|
|
|
|
|
$value -> Minimum frequency value (Positive integer)
|
7860
|
|
|
|
|
|
|
|
7861
|
|
|
|
|
|
|
Output:
|
7862
|
|
|
|
|
|
|
|
7863
|
|
|
|
|
|
|
None
|
7864
|
|
|
|
|
|
|
|
7865
|
|
|
|
|
|
|
Example:
|
7866
|
|
|
|
|
|
|
|
7867
|
|
|
|
|
|
|
use Word2vec::Interface:
|
7868
|
|
|
|
|
|
|
|
7869
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7870
|
|
|
|
|
|
|
$interface->W2PSetMinCount( 1 );
|
7871
|
|
|
|
|
|
|
|
7872
|
|
|
|
|
|
|
undef( $interface );
|
7873
|
|
|
|
|
|
|
|
7874
|
|
|
|
|
|
|
=head3 W2PSetThreshold
|
7875
|
|
|
|
|
|
|
|
7876
|
|
|
|
|
|
|
Description:
|
7877
|
|
|
|
|
|
|
|
7878
|
|
|
|
|
|
|
Sets maximum range value.
|
7879
|
|
|
|
|
|
|
|
7880
|
|
|
|
|
|
|
Input:
|
7881
|
|
|
|
|
|
|
|
7882
|
|
|
|
|
|
|
$value -> Maximum frequency value (Positive integer)
|
7883
|
|
|
|
|
|
|
|
7884
|
|
|
|
|
|
|
Output:
|
7885
|
|
|
|
|
|
|
|
7886
|
|
|
|
|
|
|
None
|
7887
|
|
|
|
|
|
|
|
7888
|
|
|
|
|
|
|
Example:
|
7889
|
|
|
|
|
|
|
|
7890
|
|
|
|
|
|
|
use Word2vec::Interface:
|
7891
|
|
|
|
|
|
|
|
7892
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7893
|
|
|
|
|
|
|
$interface->W2PSetThreshold( 100 );
|
7894
|
|
|
|
|
|
|
|
7895
|
|
|
|
|
|
|
undef( $interface );
|
7896
|
|
|
|
|
|
|
|
7897
|
|
|
|
|
|
|
=head3 W2PSetW2PDebug
|
7898
|
|
|
|
|
|
|
|
7899
|
|
|
|
|
|
|
Description:
|
7900
|
|
|
|
|
|
|
|
7901
|
|
|
|
|
|
|
Sets word2phrase debug parameter.
|
7902
|
|
|
|
|
|
|
|
7903
|
|
|
|
|
|
|
Input:
|
7904
|
|
|
|
|
|
|
|
7905
|
|
|
|
|
|
|
$value -> word2phrase debug parameter (0 = No debug info, 1 = Show debug info, 2 = Show more debug info.)
|
7906
|
|
|
|
|
|
|
|
7907
|
|
|
|
|
|
|
Output:
|
7908
|
|
|
|
|
|
|
|
7909
|
|
|
|
|
|
|
None
|
7910
|
|
|
|
|
|
|
|
7911
|
|
|
|
|
|
|
Example:
|
7912
|
|
|
|
|
|
|
|
7913
|
|
|
|
|
|
|
use Word2vec::Interface:
|
7914
|
|
|
|
|
|
|
|
7915
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7916
|
|
|
|
|
|
|
$interface->W2PSetW2PDebug( 2 );
|
7917
|
|
|
|
|
|
|
|
7918
|
|
|
|
|
|
|
undef( $interface );
|
7919
|
|
|
|
|
|
|
|
7920
|
|
|
|
|
|
|
=head3 W2PSetWorkingDir
|
7921
|
|
|
|
|
|
|
|
7922
|
|
|
|
|
|
|
Description:
|
7923
|
|
|
|
|
|
|
|
7924
|
|
|
|
|
|
|
Sets working directory path.
|
7925
|
|
|
|
|
|
|
|
7926
|
|
|
|
|
|
|
Input:
|
7927
|
|
|
|
|
|
|
|
7928
|
|
|
|
|
|
|
$string -> Current working directory path.
|
7929
|
|
|
|
|
|
|
|
7930
|
|
|
|
|
|
|
Output:
|
7931
|
|
|
|
|
|
|
|
7932
|
|
|
|
|
|
|
None
|
7933
|
|
|
|
|
|
|
|
7934
|
|
|
|
|
|
|
Example:
|
7935
|
|
|
|
|
|
|
|
7936
|
|
|
|
|
|
|
use Word2vec::Interface:
|
7937
|
|
|
|
|
|
|
|
7938
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7939
|
|
|
|
|
|
|
$interface->W2PSetWorkingDir( "filePath" );
|
7940
|
|
|
|
|
|
|
|
7941
|
|
|
|
|
|
|
undef( $interface );
|
7942
|
|
|
|
|
|
|
|
7943
|
|
|
|
|
|
|
=head3 W2PSetWord2PhraseExeDir
|
7944
|
|
|
|
|
|
|
|
7945
|
|
|
|
|
|
|
Description:
|
7946
|
|
|
|
|
|
|
|
7947
|
|
|
|
|
|
|
Sets word2phrase executable file directory path.
|
7948
|
|
|
|
|
|
|
|
7949
|
|
|
|
|
|
|
Input:
|
7950
|
|
|
|
|
|
|
|
7951
|
|
|
|
|
|
|
$string -> Word2Phrase executable directory path.
|
7952
|
|
|
|
|
|
|
|
7953
|
|
|
|
|
|
|
Output:
|
7954
|
|
|
|
|
|
|
|
7955
|
|
|
|
|
|
|
None
|
7956
|
|
|
|
|
|
|
|
7957
|
|
|
|
|
|
|
Example:
|
7958
|
|
|
|
|
|
|
|
7959
|
|
|
|
|
|
|
use Word2vec::Interface:
|
7960
|
|
|
|
|
|
|
|
7961
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7962
|
|
|
|
|
|
|
$interface->W2PSetWord2PhraseExeDir( "filePath" );
|
7963
|
|
|
|
|
|
|
|
7964
|
|
|
|
|
|
|
undef( $interface );
|
7965
|
|
|
|
|
|
|
|
7966
|
|
|
|
|
|
|
=head3 W2PSetOverwriteOldFile
|
7967
|
|
|
|
|
|
|
|
7968
|
|
|
|
|
|
|
Description:
|
7969
|
|
|
|
|
|
|
|
7970
|
|
|
|
|
|
|
Enables overwriting word2phrase output file if one already exists with the same output file name.
|
7971
|
|
|
|
|
|
|
|
7972
|
|
|
|
|
|
|
Input:
|
7973
|
|
|
|
|
|
|
|
7974
|
|
|
|
|
|
|
$value -> Integer: 1 = Overwrite old file, 0 = No not overwrite old file.
|
7975
|
|
|
|
|
|
|
|
7976
|
|
|
|
|
|
|
Output:
|
7977
|
|
|
|
|
|
|
|
7978
|
|
|
|
|
|
|
None
|
7979
|
|
|
|
|
|
|
|
7980
|
|
|
|
|
|
|
Example:
|
7981
|
|
|
|
|
|
|
|
7982
|
|
|
|
|
|
|
use Word2vec::Interface:
|
7983
|
|
|
|
|
|
|
|
7984
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
7985
|
|
|
|
|
|
|
$interface->W2PSetOverwriteOldFile( 1 );
|
7986
|
|
|
|
|
|
|
|
7987
|
|
|
|
|
|
|
undef( $interface );
|
7988
|
|
|
|
|
|
|
|
7989
|
|
|
|
|
|
|
=head2 XMLToW2V Main Functions
|
7990
|
|
|
|
|
|
|
|
7991
|
|
|
|
|
|
|
=head3 XTWConvertMedlineXMLToW2V
|
7992
|
|
|
|
|
|
|
|
7993
|
|
|
|
|
|
|
Description:
|
7994
|
|
|
|
|
|
|
|
7995
|
|
|
|
|
|
|
Parses specified parameter Medline XML file or directory of files, creating a text corpus. Returns 0 if successful or -1 during an error.
|
7996
|
|
|
|
|
|
|
|
7997
|
|
|
|
|
|
|
Note: Supports plain Medline XML or gun-zipped XML files.
|
7998
|
|
|
|
|
|
|
|
7999
|
|
|
|
|
|
|
Input:
|
8000
|
|
|
|
|
|
|
|
8001
|
|
|
|
|
|
|
$filePath -> XML file path to parse. (This can be a single file or directory of XML/XML.gz files).
|
8002
|
|
|
|
|
|
|
|
8003
|
|
|
|
|
|
|
Output:
|
8004
|
|
|
|
|
|
|
|
8005
|
|
|
|
|
|
|
$value -> '0' = Successful / '-1' = Un-Successful
|
8006
|
|
|
|
|
|
|
|
8007
|
|
|
|
|
|
|
Example:
|
8008
|
|
|
|
|
|
|
|
8009
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8010
|
|
|
|
|
|
|
|
8011
|
|
|
|
|
|
|
$interface = Word2vec::Interface->new(); # Note: Specifying no parameters implies default settings
|
8012
|
|
|
|
|
|
|
$interface->XTWSetSavePath( "testCorpus.txt" );
|
8013
|
|
|
|
|
|
|
$interface->XTWSetStoreTitle( 1 );
|
8014
|
|
|
|
|
|
|
$interface->XTWSetStoreAbstract( 1 );
|
8015
|
|
|
|
|
|
|
$interface->XTWSetBeginDate( "01/01/2004" );
|
8016
|
|
|
|
|
|
|
$interface->XTWSetEndDate( "08/13/2016" );
|
8017
|
|
|
|
|
|
|
$interface->XTWSetOverwriteExistingFile( 1 );
|
8018
|
|
|
|
|
|
|
$interface->XTWConvertMedlineXMLToW2V( "/xmlDirectory/" );
|
8019
|
|
|
|
|
|
|
undef( $interface );
|
8020
|
|
|
|
|
|
|
|
8021
|
|
|
|
|
|
|
=head3 XTWCreateCompoundWordBST
|
8022
|
|
|
|
|
|
|
|
8023
|
|
|
|
|
|
|
Description:
|
8024
|
|
|
|
|
|
|
|
8025
|
|
|
|
|
|
|
Creates a binary search tree using compound word data in memory and stores root node. This also clears the compound word array afterwards.
|
8026
|
|
|
|
|
|
|
|
8027
|
|
|
|
|
|
|
Warning: Compound word file must be loaded into memory using XTWReadCompoundWordDataFromFile() prior to calling this method. This function
|
8028
|
|
|
|
|
|
|
will also delete the compound word array upon completion as it will no longer be necessary.
|
8029
|
|
|
|
|
|
|
|
8030
|
|
|
|
|
|
|
Input:
|
8031
|
|
|
|
|
|
|
|
8032
|
|
|
|
|
|
|
None
|
8033
|
|
|
|
|
|
|
|
8034
|
|
|
|
|
|
|
Output:
|
8035
|
|
|
|
|
|
|
|
8036
|
|
|
|
|
|
|
$value -> '0' = Successful / '-1' = Un-successful
|
8037
|
|
|
|
|
|
|
|
8038
|
|
|
|
|
|
|
Example:
|
8039
|
|
|
|
|
|
|
|
8040
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8041
|
|
|
|
|
|
|
|
8042
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8043
|
|
|
|
|
|
|
$interface->XTWReadCompoundWordDataFromFile( "samples/compoundword.txt" );
|
8044
|
|
|
|
|
|
|
$interface->CreateCompoundWordBST();
|
8045
|
|
|
|
|
|
|
|
8046
|
|
|
|
|
|
|
=head3 XTWCompoundifyString
|
8047
|
|
|
|
|
|
|
|
8048
|
|
|
|
|
|
|
Description:
|
8049
|
|
|
|
|
|
|
|
8050
|
|
|
|
|
|
|
Compoundifies string parameter based on compound word data in memory using the compound word binary search tree.
|
8051
|
|
|
|
|
|
|
|
8052
|
|
|
|
|
|
|
Warning: Compound word file must be loaded into memory using XTWReadCompoundWordDataFromFile() prior to calling this method.
|
8053
|
|
|
|
|
|
|
|
8054
|
|
|
|
|
|
|
Input:
|
8055
|
|
|
|
|
|
|
|
8056
|
|
|
|
|
|
|
$string -> String to compoundify
|
8057
|
|
|
|
|
|
|
|
8058
|
|
|
|
|
|
|
Output:
|
8059
|
|
|
|
|
|
|
|
8060
|
|
|
|
|
|
|
$string -> Compounded string or "(null)" if string parameter is not defined.
|
8061
|
|
|
|
|
|
|
|
8062
|
|
|
|
|
|
|
Example:
|
8063
|
|
|
|
|
|
|
|
8064
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8065
|
|
|
|
|
|
|
|
8066
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8067
|
|
|
|
|
|
|
$interface->XTWReadCompoundWordDataFromFile( "samples/compoundword.txt" );
|
8068
|
|
|
|
|
|
|
$interface->CreateCompoundWordBST();
|
8069
|
|
|
|
|
|
|
my $compoundedString = $interface->CompoundifyString( "String to compoundify" );
|
8070
|
|
|
|
|
|
|
print( "Compounded String: $compoundedString\n" );
|
8071
|
|
|
|
|
|
|
|
8072
|
|
|
|
|
|
|
undef( $interface );
|
8073
|
|
|
|
|
|
|
|
8074
|
|
|
|
|
|
|
=head3 XTWReadCompoundWordDataFromFile
|
8075
|
|
|
|
|
|
|
|
8076
|
|
|
|
|
|
|
Description:
|
8077
|
|
|
|
|
|
|
|
8078
|
|
|
|
|
|
|
Reads compound word file and stores in memory. $autoSetMaxCompWordLength parameter is not required to be set. This
|
8079
|
|
|
|
|
|
|
parameter instructs the method to auto set the maximum compound word length dependent on the longest compound word found.
|
8080
|
|
|
|
|
|
|
|
8081
|
|
|
|
|
|
|
Note: $autoSetMaxCompWordLength options: defined = True and Undefined = False.
|
8082
|
|
|
|
|
|
|
|
8083
|
|
|
|
|
|
|
Input:
|
8084
|
|
|
|
|
|
|
|
8085
|
|
|
|
|
|
|
$filePath -> Compound word file path
|
8086
|
|
|
|
|
|
|
$autoSetMaxCompWordLength -> Maximum length of a given compoundified phrase the module's compoundify algorithm will permit.
|
8087
|
|
|
|
|
|
|
|
8088
|
|
|
|
|
|
|
Note: Calling this method with $autoSetMaxCompWordLength defined will automatically set the maxCompoundWordLength variable to the longest compound phrase.
|
8089
|
|
|
|
|
|
|
|
8090
|
|
|
|
|
|
|
Output:
|
8091
|
|
|
|
|
|
|
|
8092
|
|
|
|
|
|
|
$value -> '0' = Successful / '-1' = Un-successful
|
8093
|
|
|
|
|
|
|
|
8094
|
|
|
|
|
|
|
Example:
|
8095
|
|
|
|
|
|
|
|
8096
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8097
|
|
|
|
|
|
|
|
8098
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8099
|
|
|
|
|
|
|
$interface->XTWReadCompoundWordDataFromFile( "samples/compoundword.txt", 1 );
|
8100
|
|
|
|
|
|
|
|
8101
|
|
|
|
|
|
|
undef( $interface );
|
8102
|
|
|
|
|
|
|
|
8103
|
|
|
|
|
|
|
=head3 XTWSaveCompoundWordListToFile
|
8104
|
|
|
|
|
|
|
|
8105
|
|
|
|
|
|
|
Description:
|
8106
|
|
|
|
|
|
|
|
8107
|
|
|
|
|
|
|
Saves compound word data in memory to a specified file location.
|
8108
|
|
|
|
|
|
|
|
8109
|
|
|
|
|
|
|
Input:
|
8110
|
|
|
|
|
|
|
|
8111
|
|
|
|
|
|
|
$savePath -> Path to save compound word list to file.
|
8112
|
|
|
|
|
|
|
|
8113
|
|
|
|
|
|
|
Output:
|
8114
|
|
|
|
|
|
|
|
8115
|
|
|
|
|
|
|
$value -> '0' = Successful / '-1' = Un-successful
|
8116
|
|
|
|
|
|
|
|
8117
|
|
|
|
|
|
|
Example:
|
8118
|
|
|
|
|
|
|
|
8119
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8120
|
|
|
|
|
|
|
|
8121
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8122
|
|
|
|
|
|
|
$interface->XTWReadCompoundWordDataFromFile( "samples/compoundword.txt" );
|
8123
|
|
|
|
|
|
|
$interface->XTWSaveCompoundWordDataFromFile( "samples/newcompoundword.txt" );
|
8124
|
|
|
|
|
|
|
undef( $interface );
|
8125
|
|
|
|
|
|
|
|
8126
|
|
|
|
|
|
|
=head3 XTWReadTextFromFile
|
8127
|
|
|
|
|
|
|
|
8128
|
|
|
|
|
|
|
Description:
|
8129
|
|
|
|
|
|
|
|
8130
|
|
|
|
|
|
|
Reads a plain text file with utf8 encoding in memory. Returns string data if successful and "(null)" if unsuccessful.
|
8131
|
|
|
|
|
|
|
|
8132
|
|
|
|
|
|
|
Input:
|
8133
|
|
|
|
|
|
|
|
8134
|
|
|
|
|
|
|
$filePath -> Text file to read into memory
|
8135
|
|
|
|
|
|
|
|
8136
|
|
|
|
|
|
|
Output:
|
8137
|
|
|
|
|
|
|
|
8138
|
|
|
|
|
|
|
$string -> String data if successful or "(null)" if un-successful.
|
8139
|
|
|
|
|
|
|
|
8140
|
|
|
|
|
|
|
Example:
|
8141
|
|
|
|
|
|
|
|
8142
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8143
|
|
|
|
|
|
|
|
8144
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8145
|
|
|
|
|
|
|
my $textData = $interface->XTWReadTextFromFile( "samples/textcorpus.txt" );
|
8146
|
|
|
|
|
|
|
print( "Text Data: $textData\n" );
|
8147
|
|
|
|
|
|
|
undef( $interface );
|
8148
|
|
|
|
|
|
|
|
8149
|
|
|
|
|
|
|
=head3 XTWSaveTextToFile
|
8150
|
|
|
|
|
|
|
|
8151
|
|
|
|
|
|
|
Description:
|
8152
|
|
|
|
|
|
|
|
8153
|
|
|
|
|
|
|
Saves a plain text file with utf8 encoding in a specified location.
|
8154
|
|
|
|
|
|
|
|
8155
|
|
|
|
|
|
|
Input:
|
8156
|
|
|
|
|
|
|
|
8157
|
|
|
|
|
|
|
$savePath -> Path to save string data.
|
8158
|
|
|
|
|
|
|
$string -> String to save
|
8159
|
|
|
|
|
|
|
|
8160
|
|
|
|
|
|
|
Output:
|
8161
|
|
|
|
|
|
|
|
8162
|
|
|
|
|
|
|
$value -> '0' = Successful / '-1' = Un-successful
|
8163
|
|
|
|
|
|
|
|
8164
|
|
|
|
|
|
|
Example:
|
8165
|
|
|
|
|
|
|
|
8166
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8167
|
|
|
|
|
|
|
|
8168
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8169
|
|
|
|
|
|
|
my $result = $interface->XTWSaveTextToFile( "text.txt", "Hello world!" );
|
8170
|
|
|
|
|
|
|
|
8171
|
|
|
|
|
|
|
print( "File saved\n" ) if $result == 0;
|
8172
|
|
|
|
|
|
|
print( "File unable to save\n" ) if $result == -1;
|
8173
|
|
|
|
|
|
|
|
8174
|
|
|
|
|
|
|
undef( $interface );
|
8175
|
|
|
|
|
|
|
|
8176
|
|
|
|
|
|
|
=head3 XTWReadXMLDataFromFile
|
8177
|
|
|
|
|
|
|
|
8178
|
|
|
|
|
|
|
Description:
|
8179
|
|
|
|
|
|
|
|
8180
|
|
|
|
|
|
|
Reads an XML file from a specified location. Returns string in memory if successful and "(null)" if unsuccessful.
|
8181
|
|
|
|
|
|
|
|
8182
|
|
|
|
|
|
|
Input:
|
8183
|
|
|
|
|
|
|
|
8184
|
|
|
|
|
|
|
$filePath -> File to read given path
|
8185
|
|
|
|
|
|
|
|
8186
|
|
|
|
|
|
|
Output:
|
8187
|
|
|
|
|
|
|
|
8188
|
|
|
|
|
|
|
$value -> '0' = Successful / '-1' = Un-successful
|
8189
|
|
|
|
|
|
|
|
8190
|
|
|
|
|
|
|
Example:
|
8191
|
|
|
|
|
|
|
|
8192
|
|
|
|
|
|
|
Warning: This is a private function and is called by XML::Twig parsing functions. It should not be called outside of xmltow2v module.
|
8193
|
|
|
|
|
|
|
|
8194
|
|
|
|
|
|
|
=head3 XTWSaveTextCorpusToFile
|
8195
|
|
|
|
|
|
|
|
8196
|
|
|
|
|
|
|
Description:
|
8197
|
|
|
|
|
|
|
|
8198
|
|
|
|
|
|
|
Saves text corpus data to specified file path. This method will append to any existing file if $appendToFile parameter
|
8199
|
|
|
|
|
|
|
is defined or "overwrite" option is disabled. Enabling "overwrite" option will overwrite any existing files.
|
8200
|
|
|
|
|
|
|
|
8201
|
|
|
|
|
|
|
Input:
|
8202
|
|
|
|
|
|
|
|
8203
|
|
|
|
|
|
|
$savePath -> Path to save the text corpus
|
8204
|
|
|
|
|
|
|
$appendToFile -> Specifies whether the module will overwrite any existing data or append to existing text corpus data.
|
8205
|
|
|
|
|
|
|
|
8206
|
|
|
|
|
|
|
Note: Leaving this variable undefined will fetch the "Overwrite" member variable and set the value to this parameter.
|
8207
|
|
|
|
|
|
|
|
8208
|
|
|
|
|
|
|
Output:
|
8209
|
|
|
|
|
|
|
|
8210
|
|
|
|
|
|
|
$value -> '0' = Successful / '-1' = Un-successful
|
8211
|
|
|
|
|
|
|
|
8212
|
|
|
|
|
|
|
Example:
|
8213
|
|
|
|
|
|
|
|
8214
|
|
|
|
|
|
|
Warning: This is a private function and is called by XML::Twig parsing functions. It should not be called outside of xmltow2v module.
|
8215
|
|
|
|
|
|
|
|
8216
|
|
|
|
|
|
|
=head3 XTWIsDateInSpecifiedRange
|
8217
|
|
|
|
|
|
|
|
8218
|
|
|
|
|
|
|
Description:
|
8219
|
|
|
|
|
|
|
|
8220
|
|
|
|
|
|
|
Checks to see if $date is within $beginDate and $endDate range. Returns 1 if true and 0 if false.
|
8221
|
|
|
|
|
|
|
|
8222
|
|
|
|
|
|
|
Note: Date Format: XX/XX/XXXX (Month/Day/Year)
|
8223
|
|
|
|
|
|
|
|
8224
|
|
|
|
|
|
|
Input:
|
8225
|
|
|
|
|
|
|
|
8226
|
|
|
|
|
|
|
$date -> Date to check against minimum and maximum data range. (String)
|
8227
|
|
|
|
|
|
|
$beginDate -> Minimum date range (String)
|
8228
|
|
|
|
|
|
|
$endDate -> Maximum date range (String)
|
8229
|
|
|
|
|
|
|
|
8230
|
|
|
|
|
|
|
Output:
|
8231
|
|
|
|
|
|
|
|
8232
|
|
|
|
|
|
|
$value -> '1' = True/Date is within specified range Or '0' = False/Date is not within specified range.
|
8233
|
|
|
|
|
|
|
|
8234
|
|
|
|
|
|
|
Example:
|
8235
|
|
|
|
|
|
|
|
8236
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8237
|
|
|
|
|
|
|
|
8238
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8239
|
|
|
|
|
|
|
print( "Is \"01/01/2004\" within the date range: \"02/21/1985\" to \"08/13/2016\"?\n" );
|
8240
|
|
|
|
|
|
|
print( "Yes\n" ) if $interface->XTWIsDateInSpecifiedRange( "01/01/2004", "02/21/1985", "08/13/2016" ) == 1;
|
8241
|
|
|
|
|
|
|
print( "No\n" ) if $interface->XTWIsDateInSpecifiedRange( "01/01/2004", "02/21/1985", "08/13/2016" ) == 0;
|
8242
|
|
|
|
|
|
|
|
8243
|
|
|
|
|
|
|
undef( $interface );
|
8244
|
|
|
|
|
|
|
|
8245
|
|
|
|
|
|
|
=head3 XTWIsFileOrDirectory
|
8246
|
|
|
|
|
|
|
|
8247
|
|
|
|
|
|
|
Description:
|
8248
|
|
|
|
|
|
|
|
8249
|
|
|
|
|
|
|
Checks to see if specified path is a file or directory.
|
8250
|
|
|
|
|
|
|
|
8251
|
|
|
|
|
|
|
Input:
|
8252
|
|
|
|
|
|
|
|
8253
|
|
|
|
|
|
|
$path -> File or directory path. (String)
|
8254
|
|
|
|
|
|
|
|
8255
|
|
|
|
|
|
|
Output:
|
8256
|
|
|
|
|
|
|
|
8257
|
|
|
|
|
|
|
$string -> Returns: "file" = file, "dir" = directory and "unknown" if the path is not a file or directory (undefined).
|
8258
|
|
|
|
|
|
|
|
8259
|
|
|
|
|
|
|
Example:
|
8260
|
|
|
|
|
|
|
|
8261
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8262
|
|
|
|
|
|
|
|
8263
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8264
|
|
|
|
|
|
|
my $path = "path/to/a/directory";
|
8265
|
|
|
|
|
|
|
|
8266
|
|
|
|
|
|
|
print( "Is \"$path\" a file or directory? " . $interface->XTWIsFileOrDirectory( $path ) . "\n" );
|
8267
|
|
|
|
|
|
|
|
8268
|
|
|
|
|
|
|
$path = "path/to/a/file.file";
|
8269
|
|
|
|
|
|
|
|
8270
|
|
|
|
|
|
|
print( "Is \"$path\" a file or directory? " . $interface->XTWIsFileOrDirectory( $path ) . "\n" );
|
8271
|
|
|
|
|
|
|
|
8272
|
|
|
|
|
|
|
undef( $interface );
|
8273
|
|
|
|
|
|
|
|
8274
|
|
|
|
|
|
|
=head3 XTWRemoveSpecialCharactersFromString
|
8275
|
|
|
|
|
|
|
|
8276
|
|
|
|
|
|
|
Description:
|
8277
|
|
|
|
|
|
|
|
8278
|
|
|
|
|
|
|
Removes special characters from string parameter, removes extra spaces and converts text to lowercase.
|
8279
|
|
|
|
|
|
|
|
8280
|
|
|
|
|
|
|
Note: This method is called when parsing and compiling Medline title/abstract data.
|
8281
|
|
|
|
|
|
|
|
8282
|
|
|
|
|
|
|
Input:
|
8283
|
|
|
|
|
|
|
|
8284
|
|
|
|
|
|
|
$string -> String passed to remove special characters from and convert to lowercase.
|
8285
|
|
|
|
|
|
|
|
8286
|
|
|
|
|
|
|
Output:
|
8287
|
|
|
|
|
|
|
|
8288
|
|
|
|
|
|
|
$string -> String with all special characters removed and converted to lowercase.
|
8289
|
|
|
|
|
|
|
|
8290
|
|
|
|
|
|
|
Example:
|
8291
|
|
|
|
|
|
|
|
8292
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8293
|
|
|
|
|
|
|
|
8294
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8295
|
|
|
|
|
|
|
|
8296
|
|
|
|
|
|
|
my $str = "Heart Attack is$ an!@ also KNOWN as an Acute MYOCARDIAL inFARCTion!";
|
8297
|
|
|
|
|
|
|
|
8298
|
|
|
|
|
|
|
print( "Original String: $str\n" );
|
8299
|
|
|
|
|
|
|
|
8300
|
|
|
|
|
|
|
$str = $interface->XTWRemoveSpecialCharactersFromString( $str );
|
8301
|
|
|
|
|
|
|
|
8302
|
|
|
|
|
|
|
print( "Modified String: $str\n" );
|
8303
|
|
|
|
|
|
|
|
8304
|
|
|
|
|
|
|
undef( $interface );
|
8305
|
|
|
|
|
|
|
|
8306
|
|
|
|
|
|
|
=head3 XTWGetFileType
|
8307
|
|
|
|
|
|
|
|
8308
|
|
|
|
|
|
|
Description:
|
8309
|
|
|
|
|
|
|
|
8310
|
|
|
|
|
|
|
Returns file data type (string).
|
8311
|
|
|
|
|
|
|
|
8312
|
|
|
|
|
|
|
Input:
|
8313
|
|
|
|
|
|
|
|
8314
|
|
|
|
|
|
|
$filePath -> File to check located at file path
|
8315
|
|
|
|
|
|
|
|
8316
|
|
|
|
|
|
|
Output:
|
8317
|
|
|
|
|
|
|
|
8318
|
|
|
|
|
|
|
$string -> File type
|
8319
|
|
|
|
|
|
|
|
8320
|
|
|
|
|
|
|
Example:
|
8321
|
|
|
|
|
|
|
|
8322
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8323
|
|
|
|
|
|
|
|
8324
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new()
|
8325
|
|
|
|
|
|
|
my $fileType = $interface->XTWGetFileType( "samples/textcorpus.txt" );
|
8326
|
|
|
|
|
|
|
|
8327
|
|
|
|
|
|
|
undef( $interface );
|
8328
|
|
|
|
|
|
|
|
8329
|
|
|
|
|
|
|
=head3 XTWDateCheck
|
8330
|
|
|
|
|
|
|
|
8331
|
|
|
|
|
|
|
Description:
|
8332
|
|
|
|
|
|
|
|
8333
|
|
|
|
|
|
|
Checks specified begin and end date strings for formatting and logic errors.
|
8334
|
|
|
|
|
|
|
|
8335
|
|
|
|
|
|
|
Input:
|
8336
|
|
|
|
|
|
|
|
8337
|
|
|
|
|
|
|
None
|
8338
|
|
|
|
|
|
|
|
8339
|
|
|
|
|
|
|
Output:
|
8340
|
|
|
|
|
|
|
|
8341
|
|
|
|
|
|
|
$value -> "0" = Passed Checks / "-1" = Failed Checks
|
8342
|
|
|
|
|
|
|
|
8343
|
|
|
|
|
|
|
Example:
|
8344
|
|
|
|
|
|
|
|
8345
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8346
|
|
|
|
|
|
|
|
8347
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new()
|
8348
|
|
|
|
|
|
|
print "Passed Date Checks\n" if ( $interface->_DateCheck() == 0 );
|
8349
|
|
|
|
|
|
|
print "Failed Date Checks\n" if ( $interface->_DateCheck() == -1 );
|
8350
|
|
|
|
|
|
|
|
8351
|
|
|
|
|
|
|
undef( $interface );
|
8352
|
|
|
|
|
|
|
|
8353
|
|
|
|
|
|
|
=head2 XMLToW2V Accessor Functions
|
8354
|
|
|
|
|
|
|
|
8355
|
|
|
|
|
|
|
=head3 XTWGetDebugLog
|
8356
|
|
|
|
|
|
|
|
8357
|
|
|
|
|
|
|
Description:
|
8358
|
|
|
|
|
|
|
|
8359
|
|
|
|
|
|
|
Returns the _debugLog member variable set during Word2vec::Interface object initialization of new function.
|
8360
|
|
|
|
|
|
|
|
8361
|
|
|
|
|
|
|
Input:
|
8362
|
|
|
|
|
|
|
|
8363
|
|
|
|
|
|
|
None
|
8364
|
|
|
|
|
|
|
|
8365
|
|
|
|
|
|
|
Output:
|
8366
|
|
|
|
|
|
|
|
8367
|
|
|
|
|
|
|
$value -> '0' = False, '1' = True
|
8368
|
|
|
|
|
|
|
|
8369
|
|
|
|
|
|
|
Example:
|
8370
|
|
|
|
|
|
|
|
8371
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8372
|
|
|
|
|
|
|
|
8373
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new()
|
8374
|
|
|
|
|
|
|
my $debugLog = $interface->XTWGetDebugLog();
|
8375
|
|
|
|
|
|
|
|
8376
|
|
|
|
|
|
|
print( "Debug Logging Enabled\n" ) if $debugLog == 1;
|
8377
|
|
|
|
|
|
|
print( "Debug Logging Disabled\n" ) if $debugLog == 0;
|
8378
|
|
|
|
|
|
|
|
8379
|
|
|
|
|
|
|
|
8380
|
|
|
|
|
|
|
undef( $interface );
|
8381
|
|
|
|
|
|
|
|
8382
|
|
|
|
|
|
|
=head3 XTWGetWriteLog
|
8383
|
|
|
|
|
|
|
|
8384
|
|
|
|
|
|
|
Description:
|
8385
|
|
|
|
|
|
|
|
8386
|
|
|
|
|
|
|
Returns the _writeLog member variable set during Word2vec::Interface object initialization of new function.
|
8387
|
|
|
|
|
|
|
|
8388
|
|
|
|
|
|
|
Input:
|
8389
|
|
|
|
|
|
|
|
8390
|
|
|
|
|
|
|
None
|
8391
|
|
|
|
|
|
|
|
8392
|
|
|
|
|
|
|
Output:
|
8393
|
|
|
|
|
|
|
|
8394
|
|
|
|
|
|
|
$value -> '0' = False, '1' = True
|
8395
|
|
|
|
|
|
|
|
8396
|
|
|
|
|
|
|
Example:
|
8397
|
|
|
|
|
|
|
|
8398
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8399
|
|
|
|
|
|
|
|
8400
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8401
|
|
|
|
|
|
|
my $writeLog = $interface->XTWGetWriteLog();
|
8402
|
|
|
|
|
|
|
|
8403
|
|
|
|
|
|
|
print( "Write Logging Enabled\n" ) if $writeLog == 1;
|
8404
|
|
|
|
|
|
|
print( "Write Logging Disabled\n" ) if $writeLog == 0;
|
8405
|
|
|
|
|
|
|
|
8406
|
|
|
|
|
|
|
undef( $interface );
|
8407
|
|
|
|
|
|
|
|
8408
|
|
|
|
|
|
|
=head3 XTWGetStoreTitle
|
8409
|
|
|
|
|
|
|
|
8410
|
|
|
|
|
|
|
Description:
|
8411
|
|
|
|
|
|
|
|
8412
|
|
|
|
|
|
|
Returns the _storeTitle member variable set during Word2vec::Interface object instantiation of new function.
|
8413
|
|
|
|
|
|
|
|
8414
|
|
|
|
|
|
|
Input:
|
8415
|
|
|
|
|
|
|
|
8416
|
|
|
|
|
|
|
None
|
8417
|
|
|
|
|
|
|
|
8418
|
|
|
|
|
|
|
Output:
|
8419
|
|
|
|
|
|
|
|
8420
|
|
|
|
|
|
|
$value -> '1' = True / '0' = False
|
8421
|
|
|
|
|
|
|
|
8422
|
|
|
|
|
|
|
Example:
|
8423
|
|
|
|
|
|
|
|
8424
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8425
|
|
|
|
|
|
|
|
8426
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8427
|
|
|
|
|
|
|
my $storeTitle = $interface->XTWGetStoreTitle();
|
8428
|
|
|
|
|
|
|
|
8429
|
|
|
|
|
|
|
print( "Store Title Option: Enabled\n" ) if $storeTitle == 1;
|
8430
|
|
|
|
|
|
|
print( "Store Title Option: Disabled\n" ) if $storeTitle == 0;
|
8431
|
|
|
|
|
|
|
|
8432
|
|
|
|
|
|
|
undef( $interface );
|
8433
|
|
|
|
|
|
|
|
8434
|
|
|
|
|
|
|
=head3 XTWGetStoreAbstract
|
8435
|
|
|
|
|
|
|
|
8436
|
|
|
|
|
|
|
Description:
|
8437
|
|
|
|
|
|
|
|
8438
|
|
|
|
|
|
|
Returns the _storeAbstract member variable set during Word2vec::Interface object instantiation of new function.
|
8439
|
|
|
|
|
|
|
|
8440
|
|
|
|
|
|
|
Input:
|
8441
|
|
|
|
|
|
|
|
8442
|
|
|
|
|
|
|
None
|
8443
|
|
|
|
|
|
|
|
8444
|
|
|
|
|
|
|
Output:
|
8445
|
|
|
|
|
|
|
|
8446
|
|
|
|
|
|
|
$value -> '1' = True / '0' = False
|
8447
|
|
|
|
|
|
|
|
8448
|
|
|
|
|
|
|
Example:
|
8449
|
|
|
|
|
|
|
|
8450
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8451
|
|
|
|
|
|
|
|
8452
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8453
|
|
|
|
|
|
|
my $storeAbstract = $interface->XTWGetStoreAbstract();
|
8454
|
|
|
|
|
|
|
|
8455
|
|
|
|
|
|
|
print( "Store Abstract Option: Enabled\n" ) if $storeAbsract == 1;
|
8456
|
|
|
|
|
|
|
print( "Store Abstract Option: Disabled\n" ) if $storeAbstract == 0;
|
8457
|
|
|
|
|
|
|
|
8458
|
|
|
|
|
|
|
undef( $interface );
|
8459
|
|
|
|
|
|
|
|
8460
|
|
|
|
|
|
|
=head3 XTWGetQuickParse
|
8461
|
|
|
|
|
|
|
|
8462
|
|
|
|
|
|
|
Description:
|
8463
|
|
|
|
|
|
|
|
8464
|
|
|
|
|
|
|
Returns the _quickParse member variable set during Word2vec::Interface object instantiation of new function.
|
8465
|
|
|
|
|
|
|
|
8466
|
|
|
|
|
|
|
Input:
|
8467
|
|
|
|
|
|
|
|
8468
|
|
|
|
|
|
|
None
|
8469
|
|
|
|
|
|
|
|
8470
|
|
|
|
|
|
|
Output:
|
8471
|
|
|
|
|
|
|
|
8472
|
|
|
|
|
|
|
$value -> '1' = True / '0' = False
|
8473
|
|
|
|
|
|
|
|
8474
|
|
|
|
|
|
|
Example:
|
8475
|
|
|
|
|
|
|
|
8476
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8477
|
|
|
|
|
|
|
|
8478
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8479
|
|
|
|
|
|
|
my $quickParse = $interface->XTWGetQuickParse();
|
8480
|
|
|
|
|
|
|
|
8481
|
|
|
|
|
|
|
print( "Quick Parse Option: Enabled\n" ) if $quickParse == 1;
|
8482
|
|
|
|
|
|
|
print( "Quick Parse Option: Disabled\n" ) if $quickParse == 0;
|
8483
|
|
|
|
|
|
|
|
8484
|
|
|
|
|
|
|
undef( $interface );
|
8485
|
|
|
|
|
|
|
|
8486
|
|
|
|
|
|
|
=head3 XTWGetCompoundifyText
|
8487
|
|
|
|
|
|
|
|
8488
|
|
|
|
|
|
|
Description:
|
8489
|
|
|
|
|
|
|
|
8490
|
|
|
|
|
|
|
Returns the _compoundifyText member variable set during Word2vec::Interface object instantiation of new function.
|
8491
|
|
|
|
|
|
|
|
8492
|
|
|
|
|
|
|
Input:
|
8493
|
|
|
|
|
|
|
|
8494
|
|
|
|
|
|
|
None
|
8495
|
|
|
|
|
|
|
|
8496
|
|
|
|
|
|
|
Output:
|
8497
|
|
|
|
|
|
|
|
8498
|
|
|
|
|
|
|
$value -> '1' = True / '0' = False
|
8499
|
|
|
|
|
|
|
|
8500
|
|
|
|
|
|
|
Example:
|
8501
|
|
|
|
|
|
|
|
8502
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8503
|
|
|
|
|
|
|
|
8504
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8505
|
|
|
|
|
|
|
my $compoundify = $interface->XTWGetCompoundifyText();
|
8506
|
|
|
|
|
|
|
|
8507
|
|
|
|
|
|
|
print( "Compoundify Text Option: Enabled\n" ) if $compoundify == 1;
|
8508
|
|
|
|
|
|
|
print( "Compoundify Text Option: Disabled\n" ) if $compoundify == 0;
|
8509
|
|
|
|
|
|
|
|
8510
|
|
|
|
|
|
|
undef( $interface );
|
8511
|
|
|
|
|
|
|
|
8512
|
|
|
|
|
|
|
=head3 XTWGetNumOfThreads
|
8513
|
|
|
|
|
|
|
|
8514
|
|
|
|
|
|
|
Description:
|
8515
|
|
|
|
|
|
|
|
8516
|
|
|
|
|
|
|
Returns the _numOfThreads member variable set during Word2vec::Interface object instantiation of new function.
|
8517
|
|
|
|
|
|
|
|
8518
|
|
|
|
|
|
|
Input:
|
8519
|
|
|
|
|
|
|
|
8520
|
|
|
|
|
|
|
None
|
8521
|
|
|
|
|
|
|
|
8522
|
|
|
|
|
|
|
Output:
|
8523
|
|
|
|
|
|
|
|
8524
|
|
|
|
|
|
|
$value -> Number of threads
|
8525
|
|
|
|
|
|
|
|
8526
|
|
|
|
|
|
|
Example:
|
8527
|
|
|
|
|
|
|
|
8528
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8529
|
|
|
|
|
|
|
|
8530
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8531
|
|
|
|
|
|
|
my $numOfThreads = $interface->XTWGetNumOfThreads();
|
8532
|
|
|
|
|
|
|
|
8533
|
|
|
|
|
|
|
print( "Number of threads: $numOfThreads\n" );
|
8534
|
|
|
|
|
|
|
|
8535
|
|
|
|
|
|
|
undef( $interface );
|
8536
|
|
|
|
|
|
|
|
8537
|
|
|
|
|
|
|
=head3 XTWGetWorkingDir
|
8538
|
|
|
|
|
|
|
|
8539
|
|
|
|
|
|
|
Description:
|
8540
|
|
|
|
|
|
|
|
8541
|
|
|
|
|
|
|
Returns the _workingDir member variable set during Word2vec::Interface object instantiation of new function.
|
8542
|
|
|
|
|
|
|
|
8543
|
|
|
|
|
|
|
Input:
|
8544
|
|
|
|
|
|
|
|
8545
|
|
|
|
|
|
|
None
|
8546
|
|
|
|
|
|
|
|
8547
|
|
|
|
|
|
|
Output:
|
8548
|
|
|
|
|
|
|
|
8549
|
|
|
|
|
|
|
$string -> Working directory string
|
8550
|
|
|
|
|
|
|
|
8551
|
|
|
|
|
|
|
Example:
|
8552
|
|
|
|
|
|
|
|
8553
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8554
|
|
|
|
|
|
|
|
8555
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8556
|
|
|
|
|
|
|
my $workingDirectory = $interface->XTWGetWorkingDir();
|
8557
|
|
|
|
|
|
|
|
8558
|
|
|
|
|
|
|
print( "Working Directory: $workingDirectory\n" );
|
8559
|
|
|
|
|
|
|
|
8560
|
|
|
|
|
|
|
undef( $interface );
|
8561
|
|
|
|
|
|
|
|
8562
|
|
|
|
|
|
|
=head3 XTWGetSavePath
|
8563
|
|
|
|
|
|
|
|
8564
|
|
|
|
|
|
|
Description:
|
8565
|
|
|
|
|
|
|
|
8566
|
|
|
|
|
|
|
Returns the _saveDir member variable set during Word2vec::Interface object instantiation of new function.
|
8567
|
|
|
|
|
|
|
|
8568
|
|
|
|
|
|
|
Input:
|
8569
|
|
|
|
|
|
|
|
8570
|
|
|
|
|
|
|
None
|
8571
|
|
|
|
|
|
|
|
8572
|
|
|
|
|
|
|
Output:
|
8573
|
|
|
|
|
|
|
|
8574
|
|
|
|
|
|
|
$string -> Save directory string
|
8575
|
|
|
|
|
|
|
|
8576
|
|
|
|
|
|
|
Example:
|
8577
|
|
|
|
|
|
|
|
8578
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8579
|
|
|
|
|
|
|
|
8580
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8581
|
|
|
|
|
|
|
my $savePath = $interface->XTWGetSavePath();
|
8582
|
|
|
|
|
|
|
|
8583
|
|
|
|
|
|
|
print( "Save Directory: $savePath\n" );
|
8584
|
|
|
|
|
|
|
|
8585
|
|
|
|
|
|
|
undef( $interface );
|
8586
|
|
|
|
|
|
|
|
8587
|
|
|
|
|
|
|
=head3 XTWGetBeginDate
|
8588
|
|
|
|
|
|
|
|
8589
|
|
|
|
|
|
|
Description:
|
8590
|
|
|
|
|
|
|
|
8591
|
|
|
|
|
|
|
Returns the _beginDate member variable set during Word2vec::Interface object instantiation of new function.
|
8592
|
|
|
|
|
|
|
|
8593
|
|
|
|
|
|
|
Input:
|
8594
|
|
|
|
|
|
|
|
8595
|
|
|
|
|
|
|
None
|
8596
|
|
|
|
|
|
|
|
8597
|
|
|
|
|
|
|
Output:
|
8598
|
|
|
|
|
|
|
|
8599
|
|
|
|
|
|
|
$date -> Beginning date range - Format: XX/XX/XXXX (Mon/Day/Year)
|
8600
|
|
|
|
|
|
|
|
8601
|
|
|
|
|
|
|
Example:
|
8602
|
|
|
|
|
|
|
|
8603
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8604
|
|
|
|
|
|
|
|
8605
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8606
|
|
|
|
|
|
|
my $date = $interface->XTWGetBeginDate();
|
8607
|
|
|
|
|
|
|
|
8608
|
|
|
|
|
|
|
print( "Date: $date\n" );
|
8609
|
|
|
|
|
|
|
|
8610
|
|
|
|
|
|
|
undef( $interface );
|
8611
|
|
|
|
|
|
|
|
8612
|
|
|
|
|
|
|
=head3 XTWGetEndDate
|
8613
|
|
|
|
|
|
|
|
8614
|
|
|
|
|
|
|
Description:
|
8615
|
|
|
|
|
|
|
|
8616
|
|
|
|
|
|
|
Returns the _endDate member variable set during Word2vec::Interface object instantiation of new function.
|
8617
|
|
|
|
|
|
|
|
8618
|
|
|
|
|
|
|
Input:
|
8619
|
|
|
|
|
|
|
|
8620
|
|
|
|
|
|
|
None
|
8621
|
|
|
|
|
|
|
|
8622
|
|
|
|
|
|
|
Output:
|
8623
|
|
|
|
|
|
|
|
8624
|
|
|
|
|
|
|
$date -> End date range - Format: XX/XX/XXXX (Mon/Day/Year).
|
8625
|
|
|
|
|
|
|
|
8626
|
|
|
|
|
|
|
Example:
|
8627
|
|
|
|
|
|
|
|
8628
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8629
|
|
|
|
|
|
|
|
8630
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8631
|
|
|
|
|
|
|
my $date = $interface->XTWGetEndDate();
|
8632
|
|
|
|
|
|
|
|
8633
|
|
|
|
|
|
|
print( "Date: $date\n" );
|
8634
|
|
|
|
|
|
|
|
8635
|
|
|
|
|
|
|
undef( $interface );
|
8636
|
|
|
|
|
|
|
|
8637
|
|
|
|
|
|
|
=head3 XTWGetXMLStringToParse
|
8638
|
|
|
|
|
|
|
|
8639
|
|
|
|
|
|
|
Returns the XML data (string) to be parsed.
|
8640
|
|
|
|
|
|
|
|
8641
|
|
|
|
|
|
|
Description:
|
8642
|
|
|
|
|
|
|
|
8643
|
|
|
|
|
|
|
Returns the _xmlStringToParse member variable set during Word2vec::Interface object instantiation of new function.
|
8644
|
|
|
|
|
|
|
|
8645
|
|
|
|
|
|
|
Input:
|
8646
|
|
|
|
|
|
|
|
8647
|
|
|
|
|
|
|
None
|
8648
|
|
|
|
|
|
|
|
8649
|
|
|
|
|
|
|
Output:
|
8650
|
|
|
|
|
|
|
|
8651
|
|
|
|
|
|
|
$string -> Medline XML data string
|
8652
|
|
|
|
|
|
|
|
8653
|
|
|
|
|
|
|
Example:
|
8654
|
|
|
|
|
|
|
|
8655
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8656
|
|
|
|
|
|
|
|
8657
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8658
|
|
|
|
|
|
|
my $xmlStr = $interface->XTWGetXMLStringToParse();
|
8659
|
|
|
|
|
|
|
|
8660
|
|
|
|
|
|
|
print( "XML String: $xmlStr\n" );
|
8661
|
|
|
|
|
|
|
|
8662
|
|
|
|
|
|
|
undef( $interface );
|
8663
|
|
|
|
|
|
|
|
8664
|
|
|
|
|
|
|
=head3 XTWGetTextCorpusStr
|
8665
|
|
|
|
|
|
|
|
8666
|
|
|
|
|
|
|
Description:
|
8667
|
|
|
|
|
|
|
|
8668
|
|
|
|
|
|
|
Returns the _textCorpusStr member variable set during Word2vec::Interface object instantiation of new function.
|
8669
|
|
|
|
|
|
|
|
8670
|
|
|
|
|
|
|
Input:
|
8671
|
|
|
|
|
|
|
|
8672
|
|
|
|
|
|
|
None
|
8673
|
|
|
|
|
|
|
|
8674
|
|
|
|
|
|
|
Output:
|
8675
|
|
|
|
|
|
|
|
8676
|
|
|
|
|
|
|
$string -> Text corpus string
|
8677
|
|
|
|
|
|
|
|
8678
|
|
|
|
|
|
|
Example:
|
8679
|
|
|
|
|
|
|
|
8680
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8681
|
|
|
|
|
|
|
|
8682
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8683
|
|
|
|
|
|
|
my $str = $interface->XTWGetTextCorpusStr();
|
8684
|
|
|
|
|
|
|
|
8685
|
|
|
|
|
|
|
print( "Text Corpus: $str\n" );
|
8686
|
|
|
|
|
|
|
|
8687
|
|
|
|
|
|
|
undef( $interface );
|
8688
|
|
|
|
|
|
|
|
8689
|
|
|
|
|
|
|
=head3 XTWGetFileHandle
|
8690
|
|
|
|
|
|
|
|
8691
|
|
|
|
|
|
|
Description:
|
8692
|
|
|
|
|
|
|
|
8693
|
|
|
|
|
|
|
Returns the _fileHandle member variable set during Word2vec::Interface object instantiation of new function.
|
8694
|
|
|
|
|
|
|
|
8695
|
|
|
|
|
|
|
Warning: This is a private function. File handle is used by 'xmltow2v::WriteLog()' method. Do not manipulate this file handle as errors can result.
|
8696
|
|
|
|
|
|
|
|
8697
|
|
|
|
|
|
|
Input:
|
8698
|
|
|
|
|
|
|
|
8699
|
|
|
|
|
|
|
None
|
8700
|
|
|
|
|
|
|
|
8701
|
|
|
|
|
|
|
Output:
|
8702
|
|
|
|
|
|
|
|
8703
|
|
|
|
|
|
|
$fileHandle -> Returns file handle for WriteLog() method.
|
8704
|
|
|
|
|
|
|
|
8705
|
|
|
|
|
|
|
Example:
|
8706
|
|
|
|
|
|
|
|
8707
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8708
|
|
|
|
|
|
|
|
8709
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8710
|
|
|
|
|
|
|
my $fileHandle = $interface->XTWGetFileHandle();
|
8711
|
|
|
|
|
|
|
|
8712
|
|
|
|
|
|
|
undef( $interface );
|
8713
|
|
|
|
|
|
|
|
8714
|
|
|
|
|
|
|
=head3 XTWGetTwigHandler
|
8715
|
|
|
|
|
|
|
|
8716
|
|
|
|
|
|
|
Returns XML::Twig handler.
|
8717
|
|
|
|
|
|
|
|
8718
|
|
|
|
|
|
|
Description:
|
8719
|
|
|
|
|
|
|
|
8720
|
|
|
|
|
|
|
Returns the _twigHandler member variable set during Word2vec::Interface object instantiation of new function.
|
8721
|
|
|
|
|
|
|
|
8722
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated.
|
8723
|
|
|
|
|
|
|
|
8724
|
|
|
|
|
|
|
Input:
|
8725
|
|
|
|
|
|
|
|
8726
|
|
|
|
|
|
|
None
|
8727
|
|
|
|
|
|
|
|
8728
|
|
|
|
|
|
|
Output:
|
8729
|
|
|
|
|
|
|
|
8730
|
|
|
|
|
|
|
$twigHandler -> XML::Twig handler.
|
8731
|
|
|
|
|
|
|
|
8732
|
|
|
|
|
|
|
Example:
|
8733
|
|
|
|
|
|
|
|
8734
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8735
|
|
|
|
|
|
|
|
8736
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8737
|
|
|
|
|
|
|
my $xmlHandler = $interface->XTWGetTwigHandler();
|
8738
|
|
|
|
|
|
|
|
8739
|
|
|
|
|
|
|
undef( $interface );
|
8740
|
|
|
|
|
|
|
|
8741
|
|
|
|
|
|
|
=head3 XTWGetParsedCount
|
8742
|
|
|
|
|
|
|
|
8743
|
|
|
|
|
|
|
Description:
|
8744
|
|
|
|
|
|
|
|
8745
|
|
|
|
|
|
|
Returns the _parsedCount member variable set during Word2vec::Interface object instantiation of new function.
|
8746
|
|
|
|
|
|
|
|
8747
|
|
|
|
|
|
|
Input:
|
8748
|
|
|
|
|
|
|
|
8749
|
|
|
|
|
|
|
None
|
8750
|
|
|
|
|
|
|
|
8751
|
|
|
|
|
|
|
Output:
|
8752
|
|
|
|
|
|
|
|
8753
|
|
|
|
|
|
|
$value -> Number of parsed Medline articles.
|
8754
|
|
|
|
|
|
|
|
8755
|
|
|
|
|
|
|
Example:
|
8756
|
|
|
|
|
|
|
|
8757
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8758
|
|
|
|
|
|
|
|
8759
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8760
|
|
|
|
|
|
|
my $numOfParsed = $interface->XTWGetParsedCount();
|
8761
|
|
|
|
|
|
|
|
8762
|
|
|
|
|
|
|
print( "Number of parsed Medline articles: $numOfParsed\n" );
|
8763
|
|
|
|
|
|
|
|
8764
|
|
|
|
|
|
|
undef( $interface );
|
8765
|
|
|
|
|
|
|
|
8766
|
|
|
|
|
|
|
=head3 XTWGetTempStr
|
8767
|
|
|
|
|
|
|
|
8768
|
|
|
|
|
|
|
Description:
|
8769
|
|
|
|
|
|
|
|
8770
|
|
|
|
|
|
|
Returns the _tempStr member variable set during Word2vec::Interface object instantiation of new function.
|
8771
|
|
|
|
|
|
|
|
8772
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated. Used by module as a temporary storage
|
8773
|
|
|
|
|
|
|
location for parsed Medline 'Title' and 'Abstract' flag string data.
|
8774
|
|
|
|
|
|
|
|
8775
|
|
|
|
|
|
|
Input:
|
8776
|
|
|
|
|
|
|
|
8777
|
|
|
|
|
|
|
None
|
8778
|
|
|
|
|
|
|
|
8779
|
|
|
|
|
|
|
Output:
|
8780
|
|
|
|
|
|
|
|
8781
|
|
|
|
|
|
|
$string -> Temporary string storage location.
|
8782
|
|
|
|
|
|
|
|
8783
|
|
|
|
|
|
|
Example:
|
8784
|
|
|
|
|
|
|
|
8785
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8786
|
|
|
|
|
|
|
|
8787
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8788
|
|
|
|
|
|
|
my $tempStr = $interface->XTWGetTempStr();
|
8789
|
|
|
|
|
|
|
|
8790
|
|
|
|
|
|
|
print( "Temp String: $tempStr\n" );
|
8791
|
|
|
|
|
|
|
|
8792
|
|
|
|
|
|
|
undef( $interface );
|
8793
|
|
|
|
|
|
|
|
8794
|
|
|
|
|
|
|
=head3 XTWGetTempDate
|
8795
|
|
|
|
|
|
|
|
8796
|
|
|
|
|
|
|
Description:
|
8797
|
|
|
|
|
|
|
|
8798
|
|
|
|
|
|
|
Returns the _tempDate member variable set during Word2vec::Interface object instantiation of new function.
|
8799
|
|
|
|
|
|
|
Used by module as a temporary storage location for parsed Medline 'DateCreated' flag string data.
|
8800
|
|
|
|
|
|
|
|
8801
|
|
|
|
|
|
|
Input:
|
8802
|
|
|
|
|
|
|
|
8803
|
|
|
|
|
|
|
None
|
8804
|
|
|
|
|
|
|
|
8805
|
|
|
|
|
|
|
Output:
|
8806
|
|
|
|
|
|
|
|
8807
|
|
|
|
|
|
|
$date -> Date string - Format: XX/XX/XXXX (Mon/Day/Year).
|
8808
|
|
|
|
|
|
|
|
8809
|
|
|
|
|
|
|
Example:
|
8810
|
|
|
|
|
|
|
|
8811
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8812
|
|
|
|
|
|
|
|
8813
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8814
|
|
|
|
|
|
|
my $date = $interface->XTWGetTempDate();
|
8815
|
|
|
|
|
|
|
|
8816
|
|
|
|
|
|
|
print( "Temp Date: $date\n" );
|
8817
|
|
|
|
|
|
|
|
8818
|
|
|
|
|
|
|
undef( $interface );
|
8819
|
|
|
|
|
|
|
|
8820
|
|
|
|
|
|
|
=head3 XTWGetCompoundWordAry
|
8821
|
|
|
|
|
|
|
|
8822
|
|
|
|
|
|
|
Description:
|
8823
|
|
|
|
|
|
|
|
8824
|
|
|
|
|
|
|
Returns the _compoundWordAry member array reference set during Word2vec::Interface object instantiation of new function.
|
8825
|
|
|
|
|
|
|
|
8826
|
|
|
|
|
|
|
Warning: Compound word data must be loaded in memory first via XTWReadCompoundWordDataFromFile().
|
8827
|
|
|
|
|
|
|
|
8828
|
|
|
|
|
|
|
Input:
|
8829
|
|
|
|
|
|
|
|
8830
|
|
|
|
|
|
|
None
|
8831
|
|
|
|
|
|
|
|
8832
|
|
|
|
|
|
|
Output:
|
8833
|
|
|
|
|
|
|
|
8834
|
|
|
|
|
|
|
$arrayReference -> Compound word array reference.
|
8835
|
|
|
|
|
|
|
|
8836
|
|
|
|
|
|
|
Example:
|
8837
|
|
|
|
|
|
|
|
8838
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8839
|
|
|
|
|
|
|
|
8840
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8841
|
|
|
|
|
|
|
my $arrayReference = $interface->XTWGetCompoundWordAry();
|
8842
|
|
|
|
|
|
|
my @compoundWord = @{ $arrayReference };
|
8843
|
|
|
|
|
|
|
|
8844
|
|
|
|
|
|
|
print( "Compound Word Array: @compoundWord\n" );
|
8845
|
|
|
|
|
|
|
|
8846
|
|
|
|
|
|
|
undef( $interface );
|
8847
|
|
|
|
|
|
|
|
8848
|
|
|
|
|
|
|
=head3 XTWGetCompoundWordBST
|
8849
|
|
|
|
|
|
|
|
8850
|
|
|
|
|
|
|
Description:
|
8851
|
|
|
|
|
|
|
|
8852
|
|
|
|
|
|
|
Returns the _compoundWordBST member variable set during Word2vec::Interface object instantiation of new function.
|
8853
|
|
|
|
|
|
|
|
8854
|
|
|
|
|
|
|
Input:
|
8855
|
|
|
|
|
|
|
|
8856
|
|
|
|
|
|
|
None
|
8857
|
|
|
|
|
|
|
|
8858
|
|
|
|
|
|
|
Output:
|
8859
|
|
|
|
|
|
|
|
8860
|
|
|
|
|
|
|
$bst -> Compound word binary search tree.
|
8861
|
|
|
|
|
|
|
|
8862
|
|
|
|
|
|
|
Example:
|
8863
|
|
|
|
|
|
|
|
8864
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8865
|
|
|
|
|
|
|
|
8866
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8867
|
|
|
|
|
|
|
my $bst = $interface->XTWGetCompoundWordBST();
|
8868
|
|
|
|
|
|
|
|
8869
|
|
|
|
|
|
|
undef( $interface );
|
8870
|
|
|
|
|
|
|
|
8871
|
|
|
|
|
|
|
=head3 XTWGetMaxCompoundWordLength
|
8872
|
|
|
|
|
|
|
|
8873
|
|
|
|
|
|
|
Description:
|
8874
|
|
|
|
|
|
|
|
8875
|
|
|
|
|
|
|
Returns the _maxCompoundWordLength member variable set during Word2vec::Interface object instantiation of new function.
|
8876
|
|
|
|
|
|
|
|
8877
|
|
|
|
|
|
|
Note: If not defined, it is automatically set to and returns 20.
|
8878
|
|
|
|
|
|
|
|
8879
|
|
|
|
|
|
|
Input:
|
8880
|
|
|
|
|
|
|
|
8881
|
|
|
|
|
|
|
None
|
8882
|
|
|
|
|
|
|
|
8883
|
|
|
|
|
|
|
Output:
|
8884
|
|
|
|
|
|
|
|
8885
|
|
|
|
|
|
|
$value -> Maximum number of compound words in a given phrase.
|
8886
|
|
|
|
|
|
|
|
8887
|
|
|
|
|
|
|
Example:
|
8888
|
|
|
|
|
|
|
|
8889
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8890
|
|
|
|
|
|
|
|
8891
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8892
|
|
|
|
|
|
|
my $compoundWordLength = $interface->XTWGetMaxCompoundWordLength();
|
8893
|
|
|
|
|
|
|
|
8894
|
|
|
|
|
|
|
print( "Maximum Compound Word Length: $compoundWordLength\n" );
|
8895
|
|
|
|
|
|
|
|
8896
|
|
|
|
|
|
|
undef( $interface );
|
8897
|
|
|
|
|
|
|
|
8898
|
|
|
|
|
|
|
=head3 XTWGetOverwriteExistingFile
|
8899
|
|
|
|
|
|
|
|
8900
|
|
|
|
|
|
|
Description:
|
8901
|
|
|
|
|
|
|
|
8902
|
|
|
|
|
|
|
Returns the _overwriteExisitingFile member variable set during Word2vec::Interface object instantiation of new function.
|
8903
|
|
|
|
|
|
|
Enables overwriting of existing text corpus if set to '1' or appends to the existing text corpus if set to '0'.
|
8904
|
|
|
|
|
|
|
|
8905
|
|
|
|
|
|
|
Input:
|
8906
|
|
|
|
|
|
|
|
8907
|
|
|
|
|
|
|
None
|
8908
|
|
|
|
|
|
|
|
8909
|
|
|
|
|
|
|
Output:
|
8910
|
|
|
|
|
|
|
|
8911
|
|
|
|
|
|
|
$value -> '1' = Overwrite existing file / '0' = Append to exiting file.
|
8912
|
|
|
|
|
|
|
|
8913
|
|
|
|
|
|
|
Example:
|
8914
|
|
|
|
|
|
|
|
8915
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8916
|
|
|
|
|
|
|
|
8917
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8918
|
|
|
|
|
|
|
my $overwriteExitingFile = $interface->XTWGetOverwriteExistingFile();
|
8919
|
|
|
|
|
|
|
|
8920
|
|
|
|
|
|
|
print( "Overwrite Existing File? YES\n" ) if ( $overwriteExistingFile == 1 );
|
8921
|
|
|
|
|
|
|
print( "Overwrite Existing File? NO\n" ) if ( $overwriteExistingFile == 0 );
|
8922
|
|
|
|
|
|
|
|
8923
|
|
|
|
|
|
|
undef( $interface );
|
8924
|
|
|
|
|
|
|
|
8925
|
|
|
|
|
|
|
=head2 XMLToW2V Mutator Functions
|
8926
|
|
|
|
|
|
|
|
8927
|
|
|
|
|
|
|
=head3 XTWSetStoreTitle
|
8928
|
|
|
|
|
|
|
|
8929
|
|
|
|
|
|
|
Description:
|
8930
|
|
|
|
|
|
|
|
8931
|
|
|
|
|
|
|
Sets member variable to passed integer parameter. Instructs module to store article title if true or omit if false.
|
8932
|
|
|
|
|
|
|
|
8933
|
|
|
|
|
|
|
Input:
|
8934
|
|
|
|
|
|
|
|
8935
|
|
|
|
|
|
|
$value -> '1' = Store Titles / '0' = Omit Titles
|
8936
|
|
|
|
|
|
|
|
8937
|
|
|
|
|
|
|
Ouput:
|
8938
|
|
|
|
|
|
|
|
8939
|
|
|
|
|
|
|
None
|
8940
|
|
|
|
|
|
|
|
8941
|
|
|
|
|
|
|
Example:
|
8942
|
|
|
|
|
|
|
|
8943
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8944
|
|
|
|
|
|
|
|
8945
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8946
|
|
|
|
|
|
|
$interface->XTWSetStoreTitle( 1 );
|
8947
|
|
|
|
|
|
|
|
8948
|
|
|
|
|
|
|
undef( $interface );
|
8949
|
|
|
|
|
|
|
|
8950
|
|
|
|
|
|
|
=head3 XTWSetStoreAbstract
|
8951
|
|
|
|
|
|
|
|
8952
|
|
|
|
|
|
|
Description:
|
8953
|
|
|
|
|
|
|
|
8954
|
|
|
|
|
|
|
Sets member variable to passed integer parameter. Instructs module to store article abstracts if true or omit if false.
|
8955
|
|
|
|
|
|
|
|
8956
|
|
|
|
|
|
|
Input:
|
8957
|
|
|
|
|
|
|
|
8958
|
|
|
|
|
|
|
$value -> '1' = Store Abstracts / '0' = Omit Abstracts
|
8959
|
|
|
|
|
|
|
|
8960
|
|
|
|
|
|
|
Ouput:
|
8961
|
|
|
|
|
|
|
|
8962
|
|
|
|
|
|
|
None
|
8963
|
|
|
|
|
|
|
|
8964
|
|
|
|
|
|
|
Example:
|
8965
|
|
|
|
|
|
|
|
8966
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8967
|
|
|
|
|
|
|
|
8968
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8969
|
|
|
|
|
|
|
$interface->XTWSetStoreAbstract( 1 );
|
8970
|
|
|
|
|
|
|
|
8971
|
|
|
|
|
|
|
undef( $interface );
|
8972
|
|
|
|
|
|
|
|
8973
|
|
|
|
|
|
|
=head3 XTWSetWorkingDir
|
8974
|
|
|
|
|
|
|
|
8975
|
|
|
|
|
|
|
Description:
|
8976
|
|
|
|
|
|
|
|
8977
|
|
|
|
|
|
|
Sets member variable to passed string parameter. Represents the working directory.
|
8978
|
|
|
|
|
|
|
|
8979
|
|
|
|
|
|
|
Input:
|
8980
|
|
|
|
|
|
|
|
8981
|
|
|
|
|
|
|
$string -> Working directory string
|
8982
|
|
|
|
|
|
|
|
8983
|
|
|
|
|
|
|
Ouput:
|
8984
|
|
|
|
|
|
|
|
8985
|
|
|
|
|
|
|
None
|
8986
|
|
|
|
|
|
|
|
8987
|
|
|
|
|
|
|
Example:
|
8988
|
|
|
|
|
|
|
|
8989
|
|
|
|
|
|
|
use Word2vec::Interface;
|
8990
|
|
|
|
|
|
|
|
8991
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
8992
|
|
|
|
|
|
|
$interface->XTWSetWorkingDir( "/samples/" );
|
8993
|
|
|
|
|
|
|
|
8994
|
|
|
|
|
|
|
undef( $interface );
|
8995
|
|
|
|
|
|
|
|
8996
|
|
|
|
|
|
|
=head3 XTWSetSavePath
|
8997
|
|
|
|
|
|
|
|
8998
|
|
|
|
|
|
|
Description:
|
8999
|
|
|
|
|
|
|
|
9000
|
|
|
|
|
|
|
Sets member variable to passed integer parameter. Represents the text corpus save path.
|
9001
|
|
|
|
|
|
|
|
9002
|
|
|
|
|
|
|
Input:
|
9003
|
|
|
|
|
|
|
|
9004
|
|
|
|
|
|
|
$string -> Text corpus save path
|
9005
|
|
|
|
|
|
|
|
9006
|
|
|
|
|
|
|
Output:
|
9007
|
|
|
|
|
|
|
|
9008
|
|
|
|
|
|
|
None
|
9009
|
|
|
|
|
|
|
|
9010
|
|
|
|
|
|
|
Example:
|
9011
|
|
|
|
|
|
|
|
9012
|
|
|
|
|
|
|
use Word2vec::Interface;
|
9013
|
|
|
|
|
|
|
|
9014
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
9015
|
|
|
|
|
|
|
$interface->XTWSetSavePath( "samples/textcorpus.txt" );
|
9016
|
|
|
|
|
|
|
|
9017
|
|
|
|
|
|
|
undef( $interface );
|
9018
|
|
|
|
|
|
|
|
9019
|
|
|
|
|
|
|
=head3 XTWSetQuickParse
|
9020
|
|
|
|
|
|
|
|
9021
|
|
|
|
|
|
|
Description:
|
9022
|
|
|
|
|
|
|
|
9023
|
|
|
|
|
|
|
Sets member variable to passed integer parameter. Instructs module to utilize quick parse
|
9024
|
|
|
|
|
|
|
routines to speed up text corpus compilation. This method is somewhat less accurate due to its non-exhaustive nature.
|
9025
|
|
|
|
|
|
|
|
9026
|
|
|
|
|
|
|
Input:
|
9027
|
|
|
|
|
|
|
|
9028
|
|
|
|
|
|
|
$value -> '1' = Enable Quick Parse / '0' = Disable Quick Parse
|
9029
|
|
|
|
|
|
|
|
9030
|
|
|
|
|
|
|
Ouput:
|
9031
|
|
|
|
|
|
|
|
9032
|
|
|
|
|
|
|
None
|
9033
|
|
|
|
|
|
|
|
9034
|
|
|
|
|
|
|
Example:
|
9035
|
|
|
|
|
|
|
|
9036
|
|
|
|
|
|
|
use Word2vec::Interface;
|
9037
|
|
|
|
|
|
|
|
9038
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
9039
|
|
|
|
|
|
|
$interface->XTWSetQuickParse( 1 );
|
9040
|
|
|
|
|
|
|
|
9041
|
|
|
|
|
|
|
undef( $interface );
|
9042
|
|
|
|
|
|
|
|
9043
|
|
|
|
|
|
|
=head3 XTWSetCompoundifyText
|
9044
|
|
|
|
|
|
|
|
9045
|
|
|
|
|
|
|
Description:
|
9046
|
|
|
|
|
|
|
|
9047
|
|
|
|
|
|
|
Sets member variable to passed integer parameter. Instructs module to utilize 'compoundify' option if true.
|
9048
|
|
|
|
|
|
|
|
9049
|
|
|
|
|
|
|
Warning: This requires compound word data to be loaded into memory with XTWReadCompoundWordDataFromFile() method prior
|
9050
|
|
|
|
|
|
|
to executing text corpus compilation.
|
9051
|
|
|
|
|
|
|
|
9052
|
|
|
|
|
|
|
Input:
|
9053
|
|
|
|
|
|
|
|
9054
|
|
|
|
|
|
|
$value -> '1' = Compoundify text / '0' = Do not compoundify text
|
9055
|
|
|
|
|
|
|
|
9056
|
|
|
|
|
|
|
Ouput:
|
9057
|
|
|
|
|
|
|
|
9058
|
|
|
|
|
|
|
None
|
9059
|
|
|
|
|
|
|
|
9060
|
|
|
|
|
|
|
Example:
|
9061
|
|
|
|
|
|
|
|
9062
|
|
|
|
|
|
|
use Word2vec::Interface;
|
9063
|
|
|
|
|
|
|
|
9064
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
9065
|
|
|
|
|
|
|
$interface->XTWSetCompoundifyText( 1 );
|
9066
|
|
|
|
|
|
|
|
9067
|
|
|
|
|
|
|
undef( $interface );
|
9068
|
|
|
|
|
|
|
|
9069
|
|
|
|
|
|
|
=head3 XTWSetNumOfThreads
|
9070
|
|
|
|
|
|
|
|
9071
|
|
|
|
|
|
|
Description:
|
9072
|
|
|
|
|
|
|
|
9073
|
|
|
|
|
|
|
Sets member variable to passed integer parameter. Sets the requested number of threads to parse Medline XML files
|
9074
|
|
|
|
|
|
|
and compile the text corpus.
|
9075
|
|
|
|
|
|
|
|
9076
|
|
|
|
|
|
|
Input:
|
9077
|
|
|
|
|
|
|
|
9078
|
|
|
|
|
|
|
$value -> Integer (Positive value)
|
9079
|
|
|
|
|
|
|
|
9080
|
|
|
|
|
|
|
Ouput:
|
9081
|
|
|
|
|
|
|
|
9082
|
|
|
|
|
|
|
None
|
9083
|
|
|
|
|
|
|
|
9084
|
|
|
|
|
|
|
Example:
|
9085
|
|
|
|
|
|
|
|
9086
|
|
|
|
|
|
|
use Word2vec::Interface;
|
9087
|
|
|
|
|
|
|
|
9088
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
9089
|
|
|
|
|
|
|
$interface->XTWSetNumOfThreads( 4 );
|
9090
|
|
|
|
|
|
|
|
9091
|
|
|
|
|
|
|
undef( $interface );
|
9092
|
|
|
|
|
|
|
|
9093
|
|
|
|
|
|
|
=head3 XTWSetBeginDate
|
9094
|
|
|
|
|
|
|
|
9095
|
|
|
|
|
|
|
Description:
|
9096
|
|
|
|
|
|
|
|
9097
|
|
|
|
|
|
|
Sets member variable to passed string parameter. Sets beginning date range for earliest articles to store, by
|
9098
|
|
|
|
|
|
|
'DateCreated' Medline tag, within the text corpus during compilation.
|
9099
|
|
|
|
|
|
|
|
9100
|
|
|
|
|
|
|
Note: Expected format - "XX/XX/XXXX" (Mon/Day/Year)
|
9101
|
|
|
|
|
|
|
|
9102
|
|
|
|
|
|
|
Input:
|
9103
|
|
|
|
|
|
|
|
9104
|
|
|
|
|
|
|
$string -> Date string - Format: "XX/XX/XXXX"
|
9105
|
|
|
|
|
|
|
|
9106
|
|
|
|
|
|
|
Ouput:
|
9107
|
|
|
|
|
|
|
|
9108
|
|
|
|
|
|
|
None
|
9109
|
|
|
|
|
|
|
|
9110
|
|
|
|
|
|
|
Example:
|
9111
|
|
|
|
|
|
|
|
9112
|
|
|
|
|
|
|
use Word2vec::Interface;
|
9113
|
|
|
|
|
|
|
|
9114
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
9115
|
|
|
|
|
|
|
$interface->XTWSetBeginDate( "01/01/2004" );
|
9116
|
|
|
|
|
|
|
|
9117
|
|
|
|
|
|
|
undef( $interface );
|
9118
|
|
|
|
|
|
|
|
9119
|
|
|
|
|
|
|
=head3 XTWSetEndDate
|
9120
|
|
|
|
|
|
|
|
9121
|
|
|
|
|
|
|
Description:
|
9122
|
|
|
|
|
|
|
|
9123
|
|
|
|
|
|
|
Sets member variable to passed string parameter. Sets ending date range for latest article to store, by
|
9124
|
|
|
|
|
|
|
'DateCreated' Medline tag, within the text corpus during compilation.
|
9125
|
|
|
|
|
|
|
|
9126
|
|
|
|
|
|
|
Note: Expected format - "XX/XX/XXXX" (Mon/Day/Year)
|
9127
|
|
|
|
|
|
|
|
9128
|
|
|
|
|
|
|
Input:
|
9129
|
|
|
|
|
|
|
|
9130
|
|
|
|
|
|
|
$string -> Date string - Format: "XX/XX/XXXX"
|
9131
|
|
|
|
|
|
|
|
9132
|
|
|
|
|
|
|
Ouput:
|
9133
|
|
|
|
|
|
|
|
9134
|
|
|
|
|
|
|
None
|
9135
|
|
|
|
|
|
|
|
9136
|
|
|
|
|
|
|
Example:
|
9137
|
|
|
|
|
|
|
|
9138
|
|
|
|
|
|
|
use Word2vec::Interface;
|
9139
|
|
|
|
|
|
|
|
9140
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
9141
|
|
|
|
|
|
|
$interface->XTWSetEndDate( "08/13/2016" );
|
9142
|
|
|
|
|
|
|
|
9143
|
|
|
|
|
|
|
undef( $interface );
|
9144
|
|
|
|
|
|
|
|
9145
|
|
|
|
|
|
|
=head3 XTWSetXMLStringToParse
|
9146
|
|
|
|
|
|
|
|
9147
|
|
|
|
|
|
|
Description:
|
9148
|
|
|
|
|
|
|
|
9149
|
|
|
|
|
|
|
Sets member variable to passed string parameter. This string normally consists of Medline XML data to be
|
9150
|
|
|
|
|
|
|
parsed for text corpus compilation.
|
9151
|
|
|
|
|
|
|
|
9152
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated.
|
9153
|
|
|
|
|
|
|
|
9154
|
|
|
|
|
|
|
Input:
|
9155
|
|
|
|
|
|
|
|
9156
|
|
|
|
|
|
|
$string -> String
|
9157
|
|
|
|
|
|
|
|
9158
|
|
|
|
|
|
|
Ouput:
|
9159
|
|
|
|
|
|
|
|
9160
|
|
|
|
|
|
|
None
|
9161
|
|
|
|
|
|
|
|
9162
|
|
|
|
|
|
|
Example:
|
9163
|
|
|
|
|
|
|
|
9164
|
|
|
|
|
|
|
use Word2vec::Interface;
|
9165
|
|
|
|
|
|
|
|
9166
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
9167
|
|
|
|
|
|
|
$interface->XTWSetXMLStringToParse( "Hello World!" );
|
9168
|
|
|
|
|
|
|
|
9169
|
|
|
|
|
|
|
undef( $interface );
|
9170
|
|
|
|
|
|
|
|
9171
|
|
|
|
|
|
|
=head3 XTWSetTextCorpusStr
|
9172
|
|
|
|
|
|
|
|
9173
|
|
|
|
|
|
|
Description:
|
9174
|
|
|
|
|
|
|
|
9175
|
|
|
|
|
|
|
Sets member variable to passed string parameter. Overwrites any stored text corpus data in memory to the string parameter.
|
9176
|
|
|
|
|
|
|
|
9177
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated.
|
9178
|
|
|
|
|
|
|
|
9179
|
|
|
|
|
|
|
Input:
|
9180
|
|
|
|
|
|
|
|
9181
|
|
|
|
|
|
|
$string -> String
|
9182
|
|
|
|
|
|
|
|
9183
|
|
|
|
|
|
|
Ouput:
|
9184
|
|
|
|
|
|
|
|
9185
|
|
|
|
|
|
|
None
|
9186
|
|
|
|
|
|
|
|
9187
|
|
|
|
|
|
|
Example:
|
9188
|
|
|
|
|
|
|
|
9189
|
|
|
|
|
|
|
use Word2vec::Interface;
|
9190
|
|
|
|
|
|
|
|
9191
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
9192
|
|
|
|
|
|
|
$interface->XTWSetTextCorpusStr( "Hello World!" );
|
9193
|
|
|
|
|
|
|
|
9194
|
|
|
|
|
|
|
undef( $interface );
|
9195
|
|
|
|
|
|
|
|
9196
|
|
|
|
|
|
|
=head3 XTWAppendStrToTextCorpus
|
9197
|
|
|
|
|
|
|
|
9198
|
|
|
|
|
|
|
Description:
|
9199
|
|
|
|
|
|
|
|
9200
|
|
|
|
|
|
|
Sets member variable to passed string parameter. Appends string parameter to text corpus string in memory.
|
9201
|
|
|
|
|
|
|
|
9202
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated.
|
9203
|
|
|
|
|
|
|
|
9204
|
|
|
|
|
|
|
Input:
|
9205
|
|
|
|
|
|
|
|
9206
|
|
|
|
|
|
|
$string -> String
|
9207
|
|
|
|
|
|
|
|
9208
|
|
|
|
|
|
|
Ouput:
|
9209
|
|
|
|
|
|
|
|
9210
|
|
|
|
|
|
|
None
|
9211
|
|
|
|
|
|
|
|
9212
|
|
|
|
|
|
|
Example:
|
9213
|
|
|
|
|
|
|
|
9214
|
|
|
|
|
|
|
use Word2vec::Interface;
|
9215
|
|
|
|
|
|
|
|
9216
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
9217
|
|
|
|
|
|
|
$interface->XTWAppendStrToTextCorpus( "Hello World!" );
|
9218
|
|
|
|
|
|
|
|
9219
|
|
|
|
|
|
|
undef( $interface );
|
9220
|
|
|
|
|
|
|
|
9221
|
|
|
|
|
|
|
=head3 XTWClearTextCorpus
|
9222
|
|
|
|
|
|
|
|
9223
|
|
|
|
|
|
|
Description:
|
9224
|
|
|
|
|
|
|
|
9225
|
|
|
|
|
|
|
Clears text corpus data in memory.
|
9226
|
|
|
|
|
|
|
|
9227
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated.
|
9228
|
|
|
|
|
|
|
|
9229
|
|
|
|
|
|
|
Input:
|
9230
|
|
|
|
|
|
|
|
9231
|
|
|
|
|
|
|
None
|
9232
|
|
|
|
|
|
|
|
9233
|
|
|
|
|
|
|
Ouput:
|
9234
|
|
|
|
|
|
|
|
9235
|
|
|
|
|
|
|
None
|
9236
|
|
|
|
|
|
|
|
9237
|
|
|
|
|
|
|
Example:
|
9238
|
|
|
|
|
|
|
|
9239
|
|
|
|
|
|
|
use Word2vec::Interface;
|
9240
|
|
|
|
|
|
|
|
9241
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
9242
|
|
|
|
|
|
|
$interface->XTWClearTextCorpus();
|
9243
|
|
|
|
|
|
|
|
9244
|
|
|
|
|
|
|
undef( $interface );
|
9245
|
|
|
|
|
|
|
|
9246
|
|
|
|
|
|
|
=head3 XTWSetTempStr
|
9247
|
|
|
|
|
|
|
|
9248
|
|
|
|
|
|
|
Description:
|
9249
|
|
|
|
|
|
|
|
9250
|
|
|
|
|
|
|
Sets member variable to passed string parameter. Sets temporary member string to passed string parameter.
|
9251
|
|
|
|
|
|
|
(Temporary placeholder for Medline Title and Abstract data).
|
9252
|
|
|
|
|
|
|
|
9253
|
|
|
|
|
|
|
Note: This removes special characters and converts all characters to lowercase.
|
9254
|
|
|
|
|
|
|
|
9255
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated.
|
9256
|
|
|
|
|
|
|
|
9257
|
|
|
|
|
|
|
Input:
|
9258
|
|
|
|
|
|
|
|
9259
|
|
|
|
|
|
|
$string -> String
|
9260
|
|
|
|
|
|
|
|
9261
|
|
|
|
|
|
|
Ouput:
|
9262
|
|
|
|
|
|
|
|
9263
|
|
|
|
|
|
|
None
|
9264
|
|
|
|
|
|
|
|
9265
|
|
|
|
|
|
|
Example:
|
9266
|
|
|
|
|
|
|
|
9267
|
|
|
|
|
|
|
use Word2vec::Interface;
|
9268
|
|
|
|
|
|
|
|
9269
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
9270
|
|
|
|
|
|
|
$interface->XTWSetTempStr( "Hello World!" );
|
9271
|
|
|
|
|
|
|
|
9272
|
|
|
|
|
|
|
undef( $interface );
|
9273
|
|
|
|
|
|
|
|
9274
|
|
|
|
|
|
|
=head3 XTWAppendToTempStr
|
9275
|
|
|
|
|
|
|
|
9276
|
|
|
|
|
|
|
Description:
|
9277
|
|
|
|
|
|
|
|
9278
|
|
|
|
|
|
|
Appends string parameter to temporary member string in memory.
|
9279
|
|
|
|
|
|
|
|
9280
|
|
|
|
|
|
|
Note: This removes special characters and converts all characters to lowercase.
|
9281
|
|
|
|
|
|
|
|
9282
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated.
|
9283
|
|
|
|
|
|
|
|
9284
|
|
|
|
|
|
|
Input:
|
9285
|
|
|
|
|
|
|
|
9286
|
|
|
|
|
|
|
$string -> String
|
9287
|
|
|
|
|
|
|
|
9288
|
|
|
|
|
|
|
Ouput:
|
9289
|
|
|
|
|
|
|
|
9290
|
|
|
|
|
|
|
None
|
9291
|
|
|
|
|
|
|
|
9292
|
|
|
|
|
|
|
Example:
|
9293
|
|
|
|
|
|
|
|
9294
|
|
|
|
|
|
|
use Word2vec::Interface;
|
9295
|
|
|
|
|
|
|
|
9296
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
9297
|
|
|
|
|
|
|
$interface->XTWAppendToTempStr( "Hello World!" );
|
9298
|
|
|
|
|
|
|
|
9299
|
|
|
|
|
|
|
undef( $interface );
|
9300
|
|
|
|
|
|
|
|
9301
|
|
|
|
|
|
|
=head3 XTWClearTempStr
|
9302
|
|
|
|
|
|
|
|
9303
|
|
|
|
|
|
|
Clears the temporary string storage in memory.
|
9304
|
|
|
|
|
|
|
|
9305
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated.
|
9306
|
|
|
|
|
|
|
|
9307
|
|
|
|
|
|
|
Input:
|
9308
|
|
|
|
|
|
|
|
9309
|
|
|
|
|
|
|
None
|
9310
|
|
|
|
|
|
|
|
9311
|
|
|
|
|
|
|
Ouput:
|
9312
|
|
|
|
|
|
|
|
9313
|
|
|
|
|
|
|
None
|
9314
|
|
|
|
|
|
|
|
9315
|
|
|
|
|
|
|
Example:
|
9316
|
|
|
|
|
|
|
|
9317
|
|
|
|
|
|
|
use Word2vec::Interface;
|
9318
|
|
|
|
|
|
|
|
9319
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
9320
|
|
|
|
|
|
|
$interface->XTWClearTempStr();
|
9321
|
|
|
|
|
|
|
|
9322
|
|
|
|
|
|
|
undef( $interface );
|
9323
|
|
|
|
|
|
|
|
9324
|
|
|
|
|
|
|
=head3 XTWSetTempDate
|
9325
|
|
|
|
|
|
|
|
9326
|
|
|
|
|
|
|
Description:
|
9327
|
|
|
|
|
|
|
|
9328
|
|
|
|
|
|
|
Sets member variable to passed string parameter. Sets temporary date string to passed string.
|
9329
|
|
|
|
|
|
|
|
9330
|
|
|
|
|
|
|
Note: Date Format - "XX/XX/XXXX" (Mon/Day/Year)
|
9331
|
|
|
|
|
|
|
|
9332
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated.
|
9333
|
|
|
|
|
|
|
|
9334
|
|
|
|
|
|
|
Input:
|
9335
|
|
|
|
|
|
|
|
9336
|
|
|
|
|
|
|
$string -> Date string - Format: "XX/XX/XXXX"
|
9337
|
|
|
|
|
|
|
|
9338
|
|
|
|
|
|
|
Ouput:
|
9339
|
|
|
|
|
|
|
|
9340
|
|
|
|
|
|
|
None
|
9341
|
|
|
|
|
|
|
|
9342
|
|
|
|
|
|
|
Example:
|
9343
|
|
|
|
|
|
|
|
9344
|
|
|
|
|
|
|
use Word2vec::Interface;
|
9345
|
|
|
|
|
|
|
|
9346
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
9347
|
|
|
|
|
|
|
$interface->XTWSetTempDate( "08/13/2016" );
|
9348
|
|
|
|
|
|
|
|
9349
|
|
|
|
|
|
|
undef( $interface );
|
9350
|
|
|
|
|
|
|
|
9351
|
|
|
|
|
|
|
=head3 XTWClearTempDate
|
9352
|
|
|
|
|
|
|
|
9353
|
|
|
|
|
|
|
Description:
|
9354
|
|
|
|
|
|
|
|
9355
|
|
|
|
|
|
|
Clears the temporary date storage location in memory.
|
9356
|
|
|
|
|
|
|
|
9357
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated.
|
9358
|
|
|
|
|
|
|
|
9359
|
|
|
|
|
|
|
Input:
|
9360
|
|
|
|
|
|
|
|
9361
|
|
|
|
|
|
|
None
|
9362
|
|
|
|
|
|
|
|
9363
|
|
|
|
|
|
|
Ouput:
|
9364
|
|
|
|
|
|
|
|
9365
|
|
|
|
|
|
|
None
|
9366
|
|
|
|
|
|
|
|
9367
|
|
|
|
|
|
|
Example:
|
9368
|
|
|
|
|
|
|
|
9369
|
|
|
|
|
|
|
use Word2vec::Interface;
|
9370
|
|
|
|
|
|
|
|
9371
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
9372
|
|
|
|
|
|
|
$interface->XTWClearTempDate();
|
9373
|
|
|
|
|
|
|
|
9374
|
|
|
|
|
|
|
undef( $interface );
|
9375
|
|
|
|
|
|
|
|
9376
|
|
|
|
|
|
|
=head3 XTWSetCompoundWordAry
|
9377
|
|
|
|
|
|
|
|
9378
|
|
|
|
|
|
|
Description:
|
9379
|
|
|
|
|
|
|
|
9380
|
|
|
|
|
|
|
Sets member variable to de-referenced passed array reference parameter. Stores compound word array by
|
9381
|
|
|
|
|
|
|
de-referencing array reference parameter.
|
9382
|
|
|
|
|
|
|
|
9383
|
|
|
|
|
|
|
Note: Clears previous data if existing.
|
9384
|
|
|
|
|
|
|
|
9385
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated.
|
9386
|
|
|
|
|
|
|
|
9387
|
|
|
|
|
|
|
Input:
|
9388
|
|
|
|
|
|
|
|
9389
|
|
|
|
|
|
|
$arrayReference -> Array reference of compound words
|
9390
|
|
|
|
|
|
|
|
9391
|
|
|
|
|
|
|
Ouput:
|
9392
|
|
|
|
|
|
|
|
9393
|
|
|
|
|
|
|
None
|
9394
|
|
|
|
|
|
|
|
9395
|
|
|
|
|
|
|
Example:
|
9396
|
|
|
|
|
|
|
|
9397
|
|
|
|
|
|
|
use Word2vec::Interface;
|
9398
|
|
|
|
|
|
|
|
9399
|
|
|
|
|
|
|
my @compoundWordAry = ( "big dog", "respiratory failure", "seven large masses" );
|
9400
|
|
|
|
|
|
|
|
9401
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
9402
|
|
|
|
|
|
|
$interface->XTWSetCompoundWordAry( \@compoundWordAry );
|
9403
|
|
|
|
|
|
|
|
9404
|
|
|
|
|
|
|
undef( $interface );
|
9405
|
|
|
|
|
|
|
|
9406
|
|
|
|
|
|
|
=head3 XTWClearCompoundWordAry
|
9407
|
|
|
|
|
|
|
|
9408
|
|
|
|
|
|
|
Description:
|
9409
|
|
|
|
|
|
|
|
9410
|
|
|
|
|
|
|
Clears compound word array in memory.
|
9411
|
|
|
|
|
|
|
|
9412
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated.
|
9413
|
|
|
|
|
|
|
|
9414
|
|
|
|
|
|
|
Input:
|
9415
|
|
|
|
|
|
|
|
9416
|
|
|
|
|
|
|
None
|
9417
|
|
|
|
|
|
|
|
9418
|
|
|
|
|
|
|
Ouput:
|
9419
|
|
|
|
|
|
|
|
9420
|
|
|
|
|
|
|
None
|
9421
|
|
|
|
|
|
|
|
9422
|
|
|
|
|
|
|
Example:
|
9423
|
|
|
|
|
|
|
|
9424
|
|
|
|
|
|
|
use Word2vec::Interface;
|
9425
|
|
|
|
|
|
|
|
9426
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
9427
|
|
|
|
|
|
|
$interface->XTWClearCompoundWordAry();
|
9428
|
|
|
|
|
|
|
|
9429
|
|
|
|
|
|
|
undef( $interface );
|
9430
|
|
|
|
|
|
|
|
9431
|
|
|
|
|
|
|
=head3 XTWSetCompoundWordBST
|
9432
|
|
|
|
|
|
|
|
9433
|
|
|
|
|
|
|
Description:
|
9434
|
|
|
|
|
|
|
|
9435
|
|
|
|
|
|
|
Sets member variable to passed Word2vec::Bst parameter. Sets compound word binary search tree to passed binary tree parameter.
|
9436
|
|
|
|
|
|
|
|
9437
|
|
|
|
|
|
|
Note: Un-defines previous binary tree if existing.
|
9438
|
|
|
|
|
|
|
|
9439
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated.
|
9440
|
|
|
|
|
|
|
|
9441
|
|
|
|
|
|
|
Input:
|
9442
|
|
|
|
|
|
|
|
9443
|
|
|
|
|
|
|
Word2vec::Bst -> Binary Search Tree
|
9444
|
|
|
|
|
|
|
|
9445
|
|
|
|
|
|
|
Ouput:
|
9446
|
|
|
|
|
|
|
|
9447
|
|
|
|
|
|
|
None
|
9448
|
|
|
|
|
|
|
|
9449
|
|
|
|
|
|
|
Example:
|
9450
|
|
|
|
|
|
|
|
9451
|
|
|
|
|
|
|
use Word2vec::Interface;
|
9452
|
|
|
|
|
|
|
|
9453
|
|
|
|
|
|
|
my @compoundWordAry = ( "big dog", "respiratory failure", "seven large masses" );
|
9454
|
|
|
|
|
|
|
@compoundWordAry = sort( @compoundWordAry );
|
9455
|
|
|
|
|
|
|
|
9456
|
|
|
|
|
|
|
my $arySize = @compoundWordAry;
|
9457
|
|
|
|
|
|
|
|
9458
|
|
|
|
|
|
|
my $bst = Word2vec::Bst;
|
9459
|
|
|
|
|
|
|
$bst->CreateTree( \@compoundWordAry, 0, $arySize, undef );
|
9460
|
|
|
|
|
|
|
|
9461
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
9462
|
|
|
|
|
|
|
$interface->XTWSetCompoundWordBST( $bst );
|
9463
|
|
|
|
|
|
|
|
9464
|
|
|
|
|
|
|
undef( $interface );
|
9465
|
|
|
|
|
|
|
|
9466
|
|
|
|
|
|
|
=head3 XTWClearCompoundWordBST
|
9467
|
|
|
|
|
|
|
|
9468
|
|
|
|
|
|
|
Description:
|
9469
|
|
|
|
|
|
|
|
9470
|
|
|
|
|
|
|
Clears/Un-defines existing compound word binary search tree from memory.
|
9471
|
|
|
|
|
|
|
|
9472
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated.
|
9473
|
|
|
|
|
|
|
|
9474
|
|
|
|
|
|
|
Input:
|
9475
|
|
|
|
|
|
|
|
9476
|
|
|
|
|
|
|
None
|
9477
|
|
|
|
|
|
|
|
9478
|
|
|
|
|
|
|
Ouput:
|
9479
|
|
|
|
|
|
|
|
9480
|
|
|
|
|
|
|
None
|
9481
|
|
|
|
|
|
|
|
9482
|
|
|
|
|
|
|
Example:
|
9483
|
|
|
|
|
|
|
|
9484
|
|
|
|
|
|
|
use Word2vec::Interface;
|
9485
|
|
|
|
|
|
|
|
9486
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
9487
|
|
|
|
|
|
|
$interface->XTWClearCompoundWordBST();
|
9488
|
|
|
|
|
|
|
|
9489
|
|
|
|
|
|
|
undef( $interface );
|
9490
|
|
|
|
|
|
|
|
9491
|
|
|
|
|
|
|
=head3 XTWSetMaxCompoundWordLength
|
9492
|
|
|
|
|
|
|
|
9493
|
|
|
|
|
|
|
Description:
|
9494
|
|
|
|
|
|
|
|
9495
|
|
|
|
|
|
|
Sets member variable to passed integer parameter. Sets maximum number of compound words in a phrase for comparison.
|
9496
|
|
|
|
|
|
|
|
9497
|
|
|
|
|
|
|
ie. "medical campus of Virginia Commonwealth University" can be interpreted as a compound word of 6 words.
|
9498
|
|
|
|
|
|
|
Setting this variable to 3 will only attempt compoundifying a maximum amount of three words.
|
9499
|
|
|
|
|
|
|
The result would be "medical_campus_of Virginia commonwealth university" even-though an exact representation
|
9500
|
|
|
|
|
|
|
of this compounded string can exist. Setting this variable to 6 will result in compounding all six words if
|
9501
|
|
|
|
|
|
|
they exists in the compound word array/bst.
|
9502
|
|
|
|
|
|
|
|
9503
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated.
|
9504
|
|
|
|
|
|
|
|
9505
|
|
|
|
|
|
|
Input:
|
9506
|
|
|
|
|
|
|
|
9507
|
|
|
|
|
|
|
$value -> Integer
|
9508
|
|
|
|
|
|
|
|
9509
|
|
|
|
|
|
|
Ouput:
|
9510
|
|
|
|
|
|
|
|
9511
|
|
|
|
|
|
|
None
|
9512
|
|
|
|
|
|
|
|
9513
|
|
|
|
|
|
|
Example:
|
9514
|
|
|
|
|
|
|
|
9515
|
|
|
|
|
|
|
use Word2vec::Interface;
|
9516
|
|
|
|
|
|
|
|
9517
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
9518
|
|
|
|
|
|
|
$interface->XTWSetMaxCompoundWordLength( 8 );
|
9519
|
|
|
|
|
|
|
|
9520
|
|
|
|
|
|
|
undef( $interface );
|
9521
|
|
|
|
|
|
|
|
9522
|
|
|
|
|
|
|
=head3 XTWSetOverwriteExistingFile
|
9523
|
|
|
|
|
|
|
|
9524
|
|
|
|
|
|
|
Description:
|
9525
|
|
|
|
|
|
|
|
9526
|
|
|
|
|
|
|
Sets member variable to passed integer parameter. Sets option to overwrite existing text corpus during compilation
|
9527
|
|
|
|
|
|
|
if 1 or append to existing text corpus if 0.
|
9528
|
|
|
|
|
|
|
|
9529
|
|
|
|
|
|
|
Input:
|
9530
|
|
|
|
|
|
|
|
9531
|
|
|
|
|
|
|
$value -> '1' = Overwrite existing text corpus / '0' = Append to existing text corpus during compilation.
|
9532
|
|
|
|
|
|
|
|
9533
|
|
|
|
|
|
|
Output:
|
9534
|
|
|
|
|
|
|
|
9535
|
|
|
|
|
|
|
None
|
9536
|
|
|
|
|
|
|
|
9537
|
|
|
|
|
|
|
Example:
|
9538
|
|
|
|
|
|
|
|
9539
|
|
|
|
|
|
|
use Word2vec::Interface;
|
9540
|
|
|
|
|
|
|
|
9541
|
|
|
|
|
|
|
my $interface = Word2vec::Interface->new();
|
9542
|
|
|
|
|
|
|
$interface->XTWSetOverWriteExistingFile( 1 );
|
9543
|
|
|
|
|
|
|
|
9544
|
|
|
|
|
|
|
undef( $xmltow2v );
|
9545
|
|
|
|
|
|
|
|
9546
|
|
|
|
|
|
|
=head1 Author
|
9547
|
|
|
|
|
|
|
|
9548
|
|
|
|
|
|
|
Clint Cuffy, Virginia Commonwealth University
|
9549
|
|
|
|
|
|
|
|
9550
|
|
|
|
|
|
|
=head1 COPYRIGHT
|
9551
|
|
|
|
|
|
|
|
9552
|
|
|
|
|
|
|
Copyright (c) 2016
|
9553
|
|
|
|
|
|
|
|
9554
|
|
|
|
|
|
|
Bridget T McInnes, Virginia Commonwealth University
|
9555
|
|
|
|
|
|
|
btmcinnes at vcu dot edu
|
9556
|
|
|
|
|
|
|
|
9557
|
|
|
|
|
|
|
Clint Cuffy, Virginia Commonwealth University
|
9558
|
|
|
|
|
|
|
cuffyca at vcu dot edu
|
9559
|
|
|
|
|
|
|
|
9560
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it
|
9561
|
|
|
|
|
|
|
under the terms of the GNU General Public License as published by the Free
|
9562
|
|
|
|
|
|
|
Software Foundation; either version 2 of the License, or (at your option)
|
9563
|
|
|
|
|
|
|
any later version.
|
9564
|
|
|
|
|
|
|
|
9565
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, but WITHOUT
|
9566
|
|
|
|
|
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
9567
|
|
|
|
|
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
9568
|
|
|
|
|
|
|
|
9569
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License along with
|
9570
|
|
|
|
|
|
|
this program; if not, write to:
|
9571
|
|
|
|
|
|
|
|
9572
|
|
|
|
|
|
|
The Free Software Foundation, Inc.,
|
9573
|
|
|
|
|
|
|
59 Temple Place - Suite 330,
|
9574
|
|
|
|
|
|
|
Boston, MA 02111-1307, USA.
|
9575
|
|
|
|
|
|
|
|
9576
|
|
|
|
|
|
|
=cut
|