| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | #!usr/bin/perl | 
| 2 |  |  |  |  |  |  |  | 
| 3 |  |  |  |  |  |  | ###################################################################################### | 
| 4 |  |  |  |  |  |  | #                                                                                    # | 
| 5 |  |  |  |  |  |  | #    Author: Clint Cuffy                                                             # | 
| 6 |  |  |  |  |  |  | #    Date:    06/16/2016                                                             # | 
| 7 |  |  |  |  |  |  | #    Revised: 04/06/2017                                                             # | 
| 8 |  |  |  |  |  |  | #    UMLS Similarity - Medline XML-To-Word2Vec Input Format Conversion Module        # | 
| 9 |  |  |  |  |  |  | #                                                                                    # | 
| 10 |  |  |  |  |  |  | ###################################################################################### | 
| 11 |  |  |  |  |  |  | #                                                                                    # | 
| 12 |  |  |  |  |  |  | #    Description:                                                                    # | 
| 13 |  |  |  |  |  |  | #    ============                                                                    # | 
| 14 |  |  |  |  |  |  | #                 Perl Medline XML-To-Word2Vec Input Format Conversion Module        # | 
| 15 |  |  |  |  |  |  | #                 for the "word2vec" package.                                        # | 
| 16 |  |  |  |  |  |  | #    Features:                                                                       # | 
| 17 |  |  |  |  |  |  | #    =========                                                                       # | 
| 18 |  |  |  |  |  |  | #                 Supports Parsing Individual Files or Directories                   # | 
| 19 |  |  |  |  |  |  | #                 Plain XML files or .gz XML files (extracts and processes in RAM)   # | 
| 20 |  |  |  |  |  |  | #                 Include results by specified Date Ranges: 00/00/0000 Format        # | 
| 21 |  |  |  |  |  |  | #                 Include results by title, abstract or both per article             # | 
| 22 |  |  |  |  |  |  | #                 Multi-Threading Support - Divides work by number of threads        # | 
| 23 |  |  |  |  |  |  | #                 Text Compoundify                                                   # | 
| 24 |  |  |  |  |  |  | #                                                                                    # | 
| 25 |  |  |  |  |  |  | ###################################################################################### | 
| 26 |  |  |  |  |  |  |  | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | package Word2vec::Xmltow2v; | 
| 29 |  |  |  |  |  |  |  | 
| 30 | 4 |  |  | 4 |  | 57343 | use strict; | 
|  | 4 |  |  |  |  | 9 |  | 
|  | 4 |  |  |  |  | 110 |  | 
| 31 | 4 |  |  | 4 |  | 20 | use warnings; | 
|  | 4 |  |  |  |  | 8 |  | 
|  | 4 |  |  |  |  | 88 |  | 
| 32 |  |  |  |  |  |  |  | 
| 33 |  |  |  |  |  |  | # Standard Package(s) | 
| 34 | 4 |  |  | 4 |  | 1389 | use utf8; | 
|  | 4 |  |  |  |  | 48 |  | 
|  | 4 |  |  |  |  | 19 |  | 
| 35 | 4 |  |  | 4 |  | 1416 | use threads; | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 36 |  |  |  |  |  |  | use threads::shared; | 
| 37 |  |  |  |  |  |  | use IO::Uncompress::Gunzip qw(gunzip $GunzipError); | 
| 38 |  |  |  |  |  |  |  | 
| 39 |  |  |  |  |  |  | # CPAN Package(s) | 
| 40 |  |  |  |  |  |  | use Cwd; | 
| 41 |  |  |  |  |  |  | use File::Type; | 
| 42 |  |  |  |  |  |  | use Text::Unidecode; | 
| 43 |  |  |  |  |  |  | use XML::Twig; | 
| 44 |  |  |  |  |  |  | use Sys::CpuAffinity; | 
| 45 |  |  |  |  |  |  |  | 
| 46 |  |  |  |  |  |  | # Word2Vec Utility Package(s) | 
| 47 |  |  |  |  |  |  | use Word2vec::Bst; | 
| 48 |  |  |  |  |  |  |  | 
| 49 |  |  |  |  |  |  |  | 
| 50 |  |  |  |  |  |  | use vars qw($VERSION); | 
| 51 |  |  |  |  |  |  |  | 
| 52 |  |  |  |  |  |  | $VERSION = '0.02'; | 
| 53 |  |  |  |  |  |  |  | 
| 54 |  |  |  |  |  |  |  | 
| 55 |  |  |  |  |  |  | # Global Variables | 
| 56 |  |  |  |  |  |  | my $debugLock  :shared; | 
| 57 |  |  |  |  |  |  | my $writeLock  :shared; | 
| 58 |  |  |  |  |  |  | my $queueLock  :shared; | 
| 59 |  |  |  |  |  |  | my $appendLock :shared; | 
| 60 |  |  |  |  |  |  | my @xmlJobQueue       :shared; | 
| 61 |  |  |  |  |  |  | my $totalJobCount     :shared; | 
| 62 |  |  |  |  |  |  | my $finishedJobCount  :shared; | 
| 63 |  |  |  |  |  |  | my $preCompWordCount  :shared; | 
| 64 |  |  |  |  |  |  | my $postCompWordCount :shared; | 
| 65 |  |  |  |  |  |  | my $compoundWordCount :shared; | 
| 66 |  |  |  |  |  |  |  | 
| 67 |  |  |  |  |  |  |  | 
| 68 |  |  |  |  |  |  | ###################################################################################### | 
| 69 |  |  |  |  |  |  | #    Constructor | 
| 70 |  |  |  |  |  |  | ###################################################################################### | 
| 71 |  |  |  |  |  |  |  | 
| 72 |  |  |  |  |  |  | BEGIN | 
| 73 |  |  |  |  |  |  | { | 
| 74 |  |  |  |  |  |  | # CONSTRUCTOR : DO SOMETHING HERE | 
| 75 |  |  |  |  |  |  | } | 
| 76 |  |  |  |  |  |  |  | 
| 77 |  |  |  |  |  |  |  | 
| 78 |  |  |  |  |  |  | ###################################################################################### | 
| 79 |  |  |  |  |  |  | #    Deconstructor | 
| 80 |  |  |  |  |  |  | ###################################################################################### | 
| 81 |  |  |  |  |  |  |  | 
| 82 |  |  |  |  |  |  | END | 
| 83 |  |  |  |  |  |  | { | 
| 84 |  |  |  |  |  |  | # DECONSTRUCTOR : DO SOMETHING HERE | 
| 85 |  |  |  |  |  |  | } | 
| 86 |  |  |  |  |  |  |  | 
| 87 |  |  |  |  |  |  |  | 
| 88 |  |  |  |  |  |  | ###################################################################################### | 
| 89 |  |  |  |  |  |  | #    new Class Operator | 
| 90 |  |  |  |  |  |  | ###################################################################################### | 
| 91 |  |  |  |  |  |  |  | 
| 92 |  |  |  |  |  |  | sub new | 
| 93 |  |  |  |  |  |  | { | 
| 94 |  |  |  |  |  |  | my $class = shift; | 
| 95 |  |  |  |  |  |  | my $self = { | 
| 96 |  |  |  |  |  |  | # Private Member Variables | 
| 97 |  |  |  |  |  |  | _debugLog              => shift,                # Boolean (Binary): 0 = False, 1 = True | 
| 98 |  |  |  |  |  |  | _writeLog              => shift,                # Boolean (Binary): 0 = False, 1 = True | 
| 99 |  |  |  |  |  |  | _storeTitle            => shift,                # Boolean (Binary): 0 = False, 1 = True | 
| 100 |  |  |  |  |  |  | _storeAbstract         => shift,                # Boolean (Binary): 0 = False, 1 = True | 
| 101 |  |  |  |  |  |  | _quickParse            => shift,                # Boolean (Binary): 0 = False, 1 = True | 
| 102 |  |  |  |  |  |  | _compoundifyText       => shift,                # Boolean (Binary): 0 = False, 1 = True | 
| 103 |  |  |  |  |  |  | _numOfThreads          => shift,                # Integer | 
| 104 |  |  |  |  |  |  | _workingDir            => shift,                # String | 
| 105 |  |  |  |  |  |  | _savePath              => shift,                # String | 
| 106 |  |  |  |  |  |  | _beginDate             => shift,                # String Format: Month/Day/Year | 
| 107 |  |  |  |  |  |  | _endDate               => shift,                # String Format: Month/Day/Year | 
| 108 |  |  |  |  |  |  | _xmlStringToParse      => shift,                # String | 
| 109 |  |  |  |  |  |  | _textCorpusStr         => shift,                # String | 
| 110 |  |  |  |  |  |  | _fileHandle            => shift,                # File Handle | 
| 111 |  |  |  |  |  |  | _twigHandler           => shift,                # File Handle | 
| 112 |  |  |  |  |  |  | _parsedCount           => shift,                # Int | 
| 113 |  |  |  |  |  |  | _tempDate              => shift,                # String (Temporary Placeholder) | 
| 114 |  |  |  |  |  |  | _tempStr               => shift,                # String (Temporary Placeholder) | 
| 115 |  |  |  |  |  |  | _compoundWordAry       => shift,                # Array Of Compound Words | 
| 116 |  |  |  |  |  |  | _compoundWordBST       => shift,                # Binary Search Tree Reference | 
| 117 |  |  |  |  |  |  | _maxCompoundWordLength => shift,                # Integer | 
| 118 |  |  |  |  |  |  | _overwriteExistingFile => shift,                # Integer | 
| 119 |  |  |  |  |  |  | _compoundWordCount     => shift,                # Integer | 
| 120 |  |  |  |  |  |  | }; | 
| 121 |  |  |  |  |  |  |  | 
| 122 |  |  |  |  |  |  | # Set debug log variable to false if not defined | 
| 123 |  |  |  |  |  |  | $self->{ _debugLog } = 0 if !defined ( $self->{ _debugLog } ); | 
| 124 |  |  |  |  |  |  | $self->{ _writeLog } = 0 if !defined ( $self->{ _writeLog } ); | 
| 125 |  |  |  |  |  |  | $self->{ _storeTitle } = 1 if !defined ( $self->{ _storeTitle } ); | 
| 126 |  |  |  |  |  |  | $self->{ _storeAbstract } = 1 if !defined ( $self->{ _storeAbstract } ); | 
| 127 |  |  |  |  |  |  | $self->{ _quickParse } = 0 if !defined ( $self->{ _quickParse } ); | 
| 128 |  |  |  |  |  |  | $self->{ _compoundifyText } = 0 if !defined ( $self->{ _compoundifyText } ); | 
| 129 |  |  |  |  |  |  | $self->{ _numOfThreads } = Sys::CpuAffinity::getNumCpus() if !defined ( $self->{ _numOfThreads } ); | 
| 130 |  |  |  |  |  |  | $self->{ _workingDir } = Cwd::getcwd() if !defined ( $self->{ _workingDir } ); | 
| 131 |  |  |  |  |  |  | $self->{ _savePath } = Cwd::getcwd() if !defined ( $self->{ _savePath } ); | 
| 132 |  |  |  |  |  |  | $self->{ _beginDate } = "00/00/0000" if !defined ( $self->{ _beginDate } ); | 
| 133 |  |  |  |  |  |  | $self->{ _endDate } = "99/99/9999" if !defined ( $self->{ _endDate } ); | 
| 134 |  |  |  |  |  |  | $self->{ _xmlStringToParse } = "(null)" if !defined ( $self->{ _xmlStringToParse } ); | 
| 135 |  |  |  |  |  |  | $self->{ _textCorpusStr } = "" if !defined ( $self->{ _textCorpusStr } ); | 
| 136 |  |  |  |  |  |  | $self->{ _twigHandler } = 0 if !defined ( $self->{ _twigHandler } ); | 
| 137 |  |  |  |  |  |  | $self->{ _parsedCount } = 0 if !defined ( $self->{ _parsedCount } ); | 
| 138 |  |  |  |  |  |  | $self->{ _tempDate } = "" if !defined ( $self->{ _tempDate } ); | 
| 139 |  |  |  |  |  |  | $self->{ _tempStr } = "" if !defined ( $self->{ _tempStr } ); | 
| 140 |  |  |  |  |  |  | $self->{ _outputFileName } = "textcorpus.txt" if !defined ( $self->{ _outputFileName } ); | 
| 141 |  |  |  |  |  |  | @{ $self->{ _compoundWordAry } } = () if !defined ( $self->{ _compoundWordAry } ); | 
| 142 |  |  |  |  |  |  | @{ $self->{ _compoundWordAry } } = @{ $self->{ _compoundWordAry } } if defined ( $self->{ _compoundWordAry } ); | 
| 143 |  |  |  |  |  |  | $self->{ _compoundWordBST } = Word2vec::Bst->new() if !defined ( $self->{ _compoundWordBST } ); | 
| 144 |  |  |  |  |  |  | $self->{ _maxCompoundWordLength } = 20 if !defined ( $self->{ _maxCompoundWordLength } ); | 
| 145 |  |  |  |  |  |  | $self->{ _overwriteExistingFile } = 0 if !defined ( $self->{ _overwriteExistingFile } ); | 
| 146 |  |  |  |  |  |  |  | 
| 147 |  |  |  |  |  |  | # Initialize Thread Safe Counting Variables | 
| 148 |  |  |  |  |  |  | @xmlJobQueue = (); | 
| 149 |  |  |  |  |  |  | $compoundWordCount = 0; | 
| 150 |  |  |  |  |  |  | $preCompWordCount  = 0; | 
| 151 |  |  |  |  |  |  | $postCompWordCount = 0; | 
| 152 |  |  |  |  |  |  |  | 
| 153 |  |  |  |  |  |  | # Open File Handler if checked variable is true | 
| 154 |  |  |  |  |  |  | if( $self->{ _writeLog } ) | 
| 155 |  |  |  |  |  |  | { | 
| 156 |  |  |  |  |  |  | open( $self->{ _fileHandle }, '>:utf8', 'Xmltow2vLog.txt' ); | 
| 157 |  |  |  |  |  |  | $self->{ _fileHandle }->autoflush( 1 );             # Auto-flushes writes to log | 
| 158 |  |  |  |  |  |  | } | 
| 159 |  |  |  |  |  |  |  | 
| 160 |  |  |  |  |  |  | # Declare XML parser | 
| 161 |  |  |  |  |  |  | # Quick Parse Method(s): Much Faster With Less Hardware Requirements and Accuracy | 
| 162 |  |  |  |  |  |  | if( $self->{ _quickParse } == 1 ) | 
| 163 |  |  |  |  |  |  | { | 
| 164 |  |  |  |  |  |  | $self->{ _twigHandler } = XML::Twig->new( | 
| 165 |  |  |  |  |  |  | twig_handlers => | 
| 166 |  |  |  |  |  |  | { | 
| 167 |  |  |  |  |  |  | 'DateCreated' => sub { _QuickParseDateCreated( @_, $self ) }, | 
| 168 |  |  |  |  |  |  | 'Journal' => sub { _QuickParseJournal( @_, $self ) }, | 
| 169 |  |  |  |  |  |  | 'Article' => sub { _QuickParseArticle( @_, $self ) }, | 
| 170 |  |  |  |  |  |  | 'OtherAbstract' => sub { _QuickParseOtherAbstract( @_, $self ) }, | 
| 171 |  |  |  |  |  |  | }, | 
| 172 |  |  |  |  |  |  | ); | 
| 173 |  |  |  |  |  |  | } | 
| 174 |  |  |  |  |  |  | # Default Parse Method: Much Slower With High RAM Requirements and Better Accuracy | 
| 175 |  |  |  |  |  |  | else | 
| 176 |  |  |  |  |  |  | { | 
| 177 |  |  |  |  |  |  | $self->{ _twigHandler } = XML::Twig->new( | 
| 178 |  |  |  |  |  |  | twig_handlers => | 
| 179 |  |  |  |  |  |  | { | 
| 180 |  |  |  |  |  |  | 'MedlineCitationSet' => sub { _ParseMedlineCitationSet( @_, $self ) }, | 
| 181 |  |  |  |  |  |  | }, | 
| 182 |  |  |  |  |  |  | ); | 
| 183 |  |  |  |  |  |  | } | 
| 184 |  |  |  |  |  |  |  | 
| 185 |  |  |  |  |  |  | bless $self, $class; | 
| 186 |  |  |  |  |  |  |  | 
| 187 |  |  |  |  |  |  | $self->WriteLog( "New - Debug On" ); | 
| 188 |  |  |  |  |  |  | $self->WriteLog( "New - QuickParse Enabled" ) if( $self->{ _quickParse } == 1 ); | 
| 189 |  |  |  |  |  |  |  | 
| 190 |  |  |  |  |  |  | if( $self->{ _xmlStringToParse } ne "(null)" ) | 
| 191 |  |  |  |  |  |  | { | 
| 192 |  |  |  |  |  |  | #$self->_RemoveXMLVersion( \$self->{ _xmlStringToParse } ); | 
| 193 |  |  |  |  |  |  |  | 
| 194 |  |  |  |  |  |  | if( $self->_CheckForNullData ( $self->{ _xmlStringToParse } ) ) | 
| 195 |  |  |  |  |  |  | { | 
| 196 |  |  |  |  |  |  | $self->WriteLog( "New - Error: XML String is null" ); | 
| 197 |  |  |  |  |  |  | } | 
| 198 |  |  |  |  |  |  | else | 
| 199 |  |  |  |  |  |  | { | 
| 200 |  |  |  |  |  |  | $self->{ _twigHandler }->parse( $self->{ _xmlStringToParse } ); | 
| 201 |  |  |  |  |  |  | } | 
| 202 |  |  |  |  |  |  | } | 
| 203 |  |  |  |  |  |  | else | 
| 204 |  |  |  |  |  |  | { | 
| 205 |  |  |  |  |  |  | $self->WriteLog( "New - No XML String Argument To Parse" ); | 
| 206 |  |  |  |  |  |  | } | 
| 207 |  |  |  |  |  |  |  | 
| 208 |  |  |  |  |  |  | return $self; | 
| 209 |  |  |  |  |  |  | } | 
| 210 |  |  |  |  |  |  |  | 
| 211 |  |  |  |  |  |  |  | 
| 212 |  |  |  |  |  |  | ###################################################################################### | 
| 213 |  |  |  |  |  |  | #    DESTROY | 
| 214 |  |  |  |  |  |  | ###################################################################################### | 
| 215 |  |  |  |  |  |  |  | 
| 216 |  |  |  |  |  |  | sub DESTROY | 
| 217 |  |  |  |  |  |  | { | 
| 218 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 219 |  |  |  |  |  |  |  | 
| 220 |  |  |  |  |  |  | # Close FileHandle | 
| 221 |  |  |  |  |  |  | close( $self->{ _fileHandle } ) if( $self->{ _fileHandle } ); | 
| 222 |  |  |  |  |  |  | } | 
| 223 |  |  |  |  |  |  |  | 
| 224 |  |  |  |  |  |  |  | 
| 225 |  |  |  |  |  |  | ###################################################################################### | 
| 226 |  |  |  |  |  |  | #    Module Functions | 
| 227 |  |  |  |  |  |  | ###################################################################################### | 
| 228 |  |  |  |  |  |  |  | 
| 229 |  |  |  |  |  |  | sub ConvertMedlineXMLToW2V | 
| 230 |  |  |  |  |  |  | { | 
| 231 |  |  |  |  |  |  | my ( $self, $dir ) = @_; | 
| 232 |  |  |  |  |  |  | $dir = $self->GetWorkingDir() if !defined ( $dir ); | 
| 233 |  |  |  |  |  |  |  | 
| 234 |  |  |  |  |  |  | my $result = $self->_DateCheck(); | 
| 235 |  |  |  |  |  |  |  | 
| 236 |  |  |  |  |  |  | # Check(s) | 
| 237 |  |  |  |  |  |  | $self->WriteLog( "ConvertMedlineXMLToW2v - Error: Date Check Failed" ) if ( $result == -1 ); | 
| 238 |  |  |  |  |  |  | return -1 if ( $result == -1 ); | 
| 239 |  |  |  |  |  |  |  | 
| 240 |  |  |  |  |  |  | $self->WriteLog( "ConvertMedlineXMLToW2V - Error: StoreTitle and StoreAbstract Variables Set To 0 - No Data Will Be Extracted" ) | 
| 241 |  |  |  |  |  |  | if ( $self->GetStoreTitle() == 0 && $self->GetStoreAbstract() == 0 ); | 
| 242 |  |  |  |  |  |  | return -1 if ( $self->GetStoreTitle() == 0 && $self->GetStoreAbstract() == 0 ); | 
| 243 |  |  |  |  |  |  |  | 
| 244 |  |  |  |  |  |  | # Check To See If Overwrite Existing File Option Is Enabled And Overwrite | 
| 245 |  |  |  |  |  |  | $self->WriteLog( "ConvertMedlineXMLToW2V - Overwrite Existing File Option Enabled" ) if $self->GetOverwriteExistingFile() == 1; | 
| 246 |  |  |  |  |  |  | $self->WriteLog( "ConvertMedlineXMLToW2V - Existing File Found - Removing Existing File" ) if ( $self->GetOverwriteExistingFile() == 1 && -e $self->GetSavePath() ); | 
| 247 |  |  |  |  |  |  | unlink( $self->GetSavePath() ) if ( $self->GetOverwriteExistingFile() == 1 && -e $self->GetSavePath() ); | 
| 248 |  |  |  |  |  |  |  | 
| 249 |  |  |  |  |  |  | my $isFileOrDir = $self->IsFileOrDirectory( $dir ); | 
| 250 |  |  |  |  |  |  |  | 
| 251 |  |  |  |  |  |  | # Process File In Working Directory | 
| 252 |  |  |  |  |  |  | if( $isFileOrDir eq "file" ) | 
| 253 |  |  |  |  |  |  | { | 
| 254 |  |  |  |  |  |  | $self->SetXMLStringToParse( $self->_ReadXMLDataFromFile( $dir ) ); | 
| 255 |  |  |  |  |  |  | return -1 if ( $self->GetXMLStringToParse() ) eq "(null)"; | 
| 256 |  |  |  |  |  |  |  | 
| 257 |  |  |  |  |  |  | $self->WriteLog( "ConvertMedlineXMLToW2V - Parsing XML File: $dir" ); | 
| 258 |  |  |  |  |  |  | $self->_ParseXMLString( $self->GetXMLStringToParse() ); | 
| 259 |  |  |  |  |  |  | $self->WriteLog( "ConvertMedlineXMLToW2V - Parsing Complete" ); | 
| 260 |  |  |  |  |  |  | } | 
| 261 |  |  |  |  |  |  | # Process All Files In Directory | 
| 262 |  |  |  |  |  |  | elsif( $isFileOrDir eq "dir" ) | 
| 263 |  |  |  |  |  |  | { | 
| 264 |  |  |  |  |  |  | $self->WriteLog( "ConvertMedlineXMLToW2V - No File Specified/Using Directory Option" ); | 
| 265 |  |  |  |  |  |  | $self->WriteLog( "ConvertMedlineXMLToW2V - Obtaining File(s) In Directory" ); | 
| 266 |  |  |  |  |  |  |  | 
| 267 |  |  |  |  |  |  | # Read File Name(s) From Specified Directory | 
| 268 |  |  |  |  |  |  | opendir( my $dirHandle, "$dir" ) or $result = -1; | 
| 269 |  |  |  |  |  |  | $self->WriteLog( "ConvertMedlineXMLToW2V - Error: Can't open $dir: $!" ) if $result == -1; | 
| 270 |  |  |  |  |  |  | return -1 if $result == -1; | 
| 271 |  |  |  |  |  |  |  | 
| 272 |  |  |  |  |  |  | for my $file ( readdir( $dirHandle ) ) | 
| 273 |  |  |  |  |  |  | { | 
| 274 |  |  |  |  |  |  | push( @xmlJobQueue, $file ) if ( ( index( $file, ".xml" ) != -1 ) && ( index( $file, ".xml.gz") == -1 ) ); | 
| 275 |  |  |  |  |  |  | push( @xmlJobQueue, $file ) if ( index( $file, ".gz" ) != -1 ); | 
| 276 |  |  |  |  |  |  | } | 
| 277 |  |  |  |  |  |  |  | 
| 278 |  |  |  |  |  |  | closedir $dirHandle; | 
| 279 |  |  |  |  |  |  | undef $dirHandle; | 
| 280 |  |  |  |  |  |  |  | 
| 281 |  |  |  |  |  |  | # Set Total Job Count | 
| 282 |  |  |  |  |  |  | $totalJobCount = @xmlJobQueue; | 
| 283 |  |  |  |  |  |  |  | 
| 284 |  |  |  |  |  |  | $self->WriteLog( "ConvertMedlineXMLToW2V - Parsing $totalJobCount File(s)" ); | 
| 285 |  |  |  |  |  |  | $self->WriteLog( "ConvertMedlineXMLToW2V - Starting Worker Thread(s) / Compiling Text Corpus" ); | 
| 286 |  |  |  |  |  |  |  | 
| 287 |  |  |  |  |  |  | # Start Thread(s) | 
| 288 |  |  |  |  |  |  | for( my $i = 0; $i < $self->GetNumOfThreads(); $i++ ) | 
| 289 |  |  |  |  |  |  | { | 
| 290 |  |  |  |  |  |  | my $thread = threads->create( "_ThreadedConvert", $self, $dir ); | 
| 291 |  |  |  |  |  |  | } | 
| 292 |  |  |  |  |  |  |  | 
| 293 |  |  |  |  |  |  | # Join All Running Threads Prior To Termination | 
| 294 |  |  |  |  |  |  | my @threadAry = threads->list(); | 
| 295 |  |  |  |  |  |  |  | 
| 296 |  |  |  |  |  |  | for my $thread ( @threadAry ) | 
| 297 |  |  |  |  |  |  | { | 
| 298 |  |  |  |  |  |  | $thread->join() if ( $thread->is_running() || $thread->is_joinable() ); | 
| 299 |  |  |  |  |  |  | } | 
| 300 |  |  |  |  |  |  |  | 
| 301 |  |  |  |  |  |  | print( "Parsed $finishedJobCount of $totalJobCount Files\n" ) if ( $self->GetDebugLog() == 0 ); | 
| 302 |  |  |  |  |  |  | print( "Number Of Compound Words: $compoundWordCount\n" ) if ( $self->GetDebugLog() == 0 ); | 
| 303 |  |  |  |  |  |  | print( "Number Of Words (Before Compounding): $preCompWordCount\n" ) if ( $self->GetDebugLog() == 0 ); | 
| 304 |  |  |  |  |  |  | print( "Number Of Words (After Compounding): $postCompWordCount\n" ) if ( $self->GetDebugLog() == 0 ); | 
| 305 |  |  |  |  |  |  | $self->WriteLog( "ConvertMedlineXMLToW2V - Parsed $finishedJobCount of $totalJobCount Files" ); | 
| 306 |  |  |  |  |  |  | $self->WriteLog( "ConvertMedlineXMLToW2V - Number Of Compound Words: $compoundWordCount" ); | 
| 307 |  |  |  |  |  |  | $self->WriteLog( "ConvertMedlineXMLToW2V - Number Of Words (Before Compounding): $preCompWordCount" ); | 
| 308 |  |  |  |  |  |  | $self->WriteLog( "ConvertMedlineXMLToW2V - Number Of Words (After Compounding): $postCompWordCount" ); | 
| 309 |  |  |  |  |  |  | $self->WriteLog( "ConvertMedlineXMLToW2V - Parsing Complete" ); | 
| 310 |  |  |  |  |  |  |  | 
| 311 |  |  |  |  |  |  | # Clean Up | 
| 312 |  |  |  |  |  |  | ClearTempStr(); | 
| 313 |  |  |  |  |  |  | ClearTextCorpusStr(); | 
| 314 |  |  |  |  |  |  | $totalJobCount     = 0; | 
| 315 |  |  |  |  |  |  | $preCompWordCount  = 0; | 
| 316 |  |  |  |  |  |  | $compoundWordCount = 0; | 
| 317 |  |  |  |  |  |  | $postCompWordCount = 0; | 
| 318 |  |  |  |  |  |  | } | 
| 319 |  |  |  |  |  |  | else | 
| 320 |  |  |  |  |  |  | { | 
| 321 |  |  |  |  |  |  | $self->WriteLog( "ConvertMedlineXMLToW2V - Unknown Parameter Type: Not File Or Directory" ); | 
| 322 |  |  |  |  |  |  | } | 
| 323 |  |  |  |  |  |  |  | 
| 324 |  |  |  |  |  |  | return 0; | 
| 325 |  |  |  |  |  |  | } | 
| 326 |  |  |  |  |  |  |  | 
| 327 |  |  |  |  |  |  | sub _ThreadedConvert | 
| 328 |  |  |  |  |  |  | { | 
| 329 |  |  |  |  |  |  | my ( $self, $dir ) = @_; | 
| 330 |  |  |  |  |  |  |  | 
| 331 |  |  |  |  |  |  | my $keepWorking = 1; | 
| 332 |  |  |  |  |  |  | my $tid = threads->tid(); | 
| 333 |  |  |  |  |  |  |  | 
| 334 |  |  |  |  |  |  | $self->WriteLog( "_ThreadedConvert - Warning: Requested Thread $tid Not Needed/Threads Exceed Work Load - Terminating Thread" ) if ( @xmlJobQueue == 0 ); | 
| 335 |  |  |  |  |  |  | return 1 if ( @xmlJobQueue == 0 ); | 
| 336 |  |  |  |  |  |  |  | 
| 337 |  |  |  |  |  |  | $self->WriteLog( "_ThreadedConvert - Starting Thread: $tid" ); | 
| 338 |  |  |  |  |  |  | $self->WriteLog( "_ThreadedConvert - Thread $tid Parsing File(s) In Job Queue" ); | 
| 339 |  |  |  |  |  |  |  | 
| 340 |  |  |  |  |  |  | while( $keepWorking == 1 ) | 
| 341 |  |  |  |  |  |  | { | 
| 342 |  |  |  |  |  |  | my $file; | 
| 343 |  |  |  |  |  |  |  | 
| 344 |  |  |  |  |  |  | # Prevent Other Threads From Reading Shared Job Queue (Array) At The Same Time | 
| 345 |  |  |  |  |  |  | { | 
| 346 |  |  |  |  |  |  | lock( $queueLock ); | 
| 347 |  |  |  |  |  |  |  | 
| 348 |  |  |  |  |  |  | # Fetch A File Name To Parse | 
| 349 |  |  |  |  |  |  | my $index = 0; | 
| 350 |  |  |  |  |  |  |  | 
| 351 |  |  |  |  |  |  | # Keep Iterating Through Queue While Elements Are Not Defined | 
| 352 |  |  |  |  |  |  | while( $index < @xmlJobQueue ) | 
| 353 |  |  |  |  |  |  | { | 
| 354 |  |  |  |  |  |  | $file = $xmlJobQueue[$index]; | 
| 355 |  |  |  |  |  |  | delete( $xmlJobQueue[$index] ) if defined( $file ); | 
| 356 |  |  |  |  |  |  |  | 
| 357 |  |  |  |  |  |  | # Exit Loop If Element Array Defined | 
| 358 |  |  |  |  |  |  | $index = @xmlJobQueue if defined( $file ); | 
| 359 |  |  |  |  |  |  |  | 
| 360 |  |  |  |  |  |  | $index++; | 
| 361 |  |  |  |  |  |  | } | 
| 362 |  |  |  |  |  |  |  | 
| 363 |  |  |  |  |  |  | # Increment Parsed File Counter | 
| 364 |  |  |  |  |  |  | $finishedJobCount++ if defined( $file ); | 
| 365 |  |  |  |  |  |  |  | 
| 366 |  |  |  |  |  |  | # Exit The Main Loop If The Last Element Was Parsed | 
| 367 |  |  |  |  |  |  | $keepWorking = 0 if ( @xmlJobQueue == 0 ); | 
| 368 |  |  |  |  |  |  | } | 
| 369 |  |  |  |  |  |  |  | 
| 370 |  |  |  |  |  |  | if( defined( $file ) ) | 
| 371 |  |  |  |  |  |  | { | 
| 372 |  |  |  |  |  |  | print( "Thread $tid: Parsing $file\n" ) if ( !$self->GetDebugLog() ); | 
| 373 |  |  |  |  |  |  | $self->WriteLog( "_ThreadedConvert - Thread $tid: Processing File: $file" ); | 
| 374 |  |  |  |  |  |  | $self->SetXMLStringToParse( $self->_ReadXMLDataFromFile( "$dir/$file" ) ); | 
| 375 |  |  |  |  |  |  | $self->WriteLog( "_ThreadedConvert - Thread $tid: Parsing XML Data" ); | 
| 376 |  |  |  |  |  |  | $self->_ParseXMLString( $self->GetXMLStringToParse() ); | 
| 377 |  |  |  |  |  |  | $self->WriteLog( "_ThreadedConvert - Thread $tid: Parsed $file" ); | 
| 378 |  |  |  |  |  |  | print( "Thread $tid: Parsed $file\n" ) if ( !$self->GetDebugLog() ); | 
| 379 |  |  |  |  |  |  | $self->_SaveTextCorpusToFile( $self->GetSavePath(), 1 ); | 
| 380 |  |  |  |  |  |  | $self->ClearTextCorpusStr(); | 
| 381 |  |  |  |  |  |  | } | 
| 382 |  |  |  |  |  |  | } | 
| 383 |  |  |  |  |  |  |  | 
| 384 |  |  |  |  |  |  | $self->WriteLog( "_ThreadedConvert - Thread $tid Finished - Terminating" ); | 
| 385 |  |  |  |  |  |  |  | 
| 386 |  |  |  |  |  |  | return 0; | 
| 387 |  |  |  |  |  |  | } | 
| 388 |  |  |  |  |  |  |  | 
| 389 |  |  |  |  |  |  | sub _ParseXMLString | 
| 390 |  |  |  |  |  |  | { | 
| 391 |  |  |  |  |  |  | my ( $self, $string ) = @_; | 
| 392 |  |  |  |  |  |  | $string = "" if !defined ( $string ); | 
| 393 |  |  |  |  |  |  |  | 
| 394 |  |  |  |  |  |  | if( $self->_CheckParseRequirements( $string ) eq -1 ) | 
| 395 |  |  |  |  |  |  | { | 
| 396 |  |  |  |  |  |  | return -1; | 
| 397 |  |  |  |  |  |  | } | 
| 398 |  |  |  |  |  |  |  | 
| 399 |  |  |  |  |  |  | # REMOVEME | 
| 400 |  |  |  |  |  |  | #$self->_RemoveXMLVersion( \$string ); | 
| 401 |  |  |  |  |  |  |  | 
| 402 |  |  |  |  |  |  | if( $self->_CheckForNullData( $string ) ) | 
| 403 |  |  |  |  |  |  | { | 
| 404 |  |  |  |  |  |  | $self->WriteLog( "_ParseXMLString - Cannot Parse (null) string" ); | 
| 405 |  |  |  |  |  |  | return -1; | 
| 406 |  |  |  |  |  |  | } | 
| 407 |  |  |  |  |  |  | else | 
| 408 |  |  |  |  |  |  | { | 
| 409 |  |  |  |  |  |  | $self->{ _twigHandler }->parse( $string ); | 
| 410 |  |  |  |  |  |  | $self->WriteLog( "_ParseXMLString: Released PubmedArticle from memory" ); | 
| 411 |  |  |  |  |  |  |  | 
| 412 |  |  |  |  |  |  | # Print how many entries were parsed | 
| 413 |  |  |  |  |  |  | $self->WriteLog( "_ParseXMLString: Parsed " . $self->GetParsedCount()  . " entries" ); | 
| 414 |  |  |  |  |  |  | } | 
| 415 |  |  |  |  |  |  |  | 
| 416 |  |  |  |  |  |  | return 0; | 
| 417 |  |  |  |  |  |  | } | 
| 418 |  |  |  |  |  |  |  | 
| 419 |  |  |  |  |  |  | sub _CheckParseRequirements | 
| 420 |  |  |  |  |  |  | { | 
| 421 |  |  |  |  |  |  | my ( $self, $string ) = @_; | 
| 422 |  |  |  |  |  |  | $string = "" if !defined ( $string ); | 
| 423 |  |  |  |  |  |  |  | 
| 424 |  |  |  |  |  |  | if( $string eq "" ) | 
| 425 |  |  |  |  |  |  | { | 
| 426 |  |  |  |  |  |  | $self->WriteLog( "_CheckParseRequirements - Error: Nothing To Parse" ); | 
| 427 |  |  |  |  |  |  | return -1; | 
| 428 |  |  |  |  |  |  | } | 
| 429 |  |  |  |  |  |  | elsif( $self->GetTwigHandler() == 0 ) | 
| 430 |  |  |  |  |  |  | { | 
| 431 |  |  |  |  |  |  | $self->WriteLog( "_CheckParseRequirements - Error: Unable To Parse XML Data/TwigHandler = (null)" ); | 
| 432 |  |  |  |  |  |  | return -1; | 
| 433 |  |  |  |  |  |  | } | 
| 434 |  |  |  |  |  |  |  | 
| 435 |  |  |  |  |  |  | return 0; | 
| 436 |  |  |  |  |  |  | } | 
| 437 |  |  |  |  |  |  |  | 
| 438 |  |  |  |  |  |  | # Checks to see if Medline XML data in memory is a null string | 
| 439 |  |  |  |  |  |  | sub _CheckForNullData | 
| 440 |  |  |  |  |  |  | { | 
| 441 |  |  |  |  |  |  | my ( $self, $temp ) = @_; | 
| 442 |  |  |  |  |  |  | my $nullStr = "(null)"; | 
| 443 |  |  |  |  |  |  |  | 
| 444 |  |  |  |  |  |  | if( my $n = index( $temp, $nullStr ) != -1 ) | 
| 445 |  |  |  |  |  |  | { | 
| 446 |  |  |  |  |  |  | # Return True | 
| 447 |  |  |  |  |  |  | return 1 if $n == 0; | 
| 448 |  |  |  |  |  |  | } | 
| 449 |  |  |  |  |  |  |  | 
| 450 |  |  |  |  |  |  | # Return False | 
| 451 |  |  |  |  |  |  | return 0; | 
| 452 |  |  |  |  |  |  | } | 
| 453 |  |  |  |  |  |  |  | 
| 454 |  |  |  |  |  |  | # Removes the XML Version string prior to parsing the XML string | 
| 455 |  |  |  |  |  |  | sub _RemoveXMLVersion | 
| 456 |  |  |  |  |  |  | { | 
| 457 |  |  |  |  |  |  | my ( $self, $temp ) = @_; | 
| 458 |  |  |  |  |  |  |  | 
| 459 |  |  |  |  |  |  | # Checking For XML Version | 
| 460 |  |  |  |  |  |  | my $xmlVersion = ' | 
| 461 |  |  |  |  |  |  | my $docType = '!DOCTYPE'; | 
| 462 |  |  |  |  |  |  |  | 
| 463 |  |  |  |  |  |  | my $line = ""; | 
| 464 |  |  |  |  |  |  | my $newXMLString = ""; | 
| 465 |  |  |  |  |  |  |  | 
| 466 |  |  |  |  |  |  | foreach $line ( split /\n/ , ${$temp} ) | 
| 467 |  |  |  |  |  |  | { | 
| 468 |  |  |  |  |  |  | if( index( $line, $xmlVersion ) == -1 && index( $line, $docType ) == -1  ) | 
| 469 |  |  |  |  |  |  | { | 
| 470 |  |  |  |  |  |  | $newXMLString .= ( $line . "\n" ); | 
| 471 |  |  |  |  |  |  | } | 
| 472 |  |  |  |  |  |  | } | 
| 473 |  |  |  |  |  |  |  | 
| 474 |  |  |  |  |  |  | ${$temp} = $newXMLString; | 
| 475 |  |  |  |  |  |  | } | 
| 476 |  |  |  |  |  |  |  | 
| 477 |  |  |  |  |  |  | sub _ParseMedlineCitationSet | 
| 478 |  |  |  |  |  |  | { | 
| 479 |  |  |  |  |  |  | my ( $twigSelf, $root, $self ) = @_; | 
| 480 |  |  |  |  |  |  | my @pubMedArticles = $root->children(); | 
| 481 |  |  |  |  |  |  |  | 
| 482 |  |  |  |  |  |  | my $parsedData = 0; | 
| 483 |  |  |  |  |  |  |  | 
| 484 |  |  |  |  |  |  | foreach my $pubMedArticle ( @pubMedArticles ) | 
| 485 |  |  |  |  |  |  | { | 
| 486 |  |  |  |  |  |  | # Parse XML Data | 
| 487 |  |  |  |  |  |  | $parsedData = $self->_ParseMedlineArticle( $pubMedArticle ); | 
| 488 |  |  |  |  |  |  |  | 
| 489 |  |  |  |  |  |  | # Compoundify String If Option Is Enabled | 
| 490 |  |  |  |  |  |  | if( $self->GetCompoundifyText() == 1 && ( $self->IsDateInSpecifiedRange( $self->GetTempDate(), $self->GetBeginDate(), $self->GetEndDate() ) == 1 ) ) | 
| 491 |  |  |  |  |  |  | { | 
| 492 |  |  |  |  |  |  | my $tempStr = $self->CompoundifyString( lc( $self->GetTempStr() ) ); | 
| 493 |  |  |  |  |  |  |  | 
| 494 |  |  |  |  |  |  | # Append Article Data To Text Corpus | 
| 495 |  |  |  |  |  |  | $self->AppendStrToTextCorpus( $tempStr ); | 
| 496 |  |  |  |  |  |  | } | 
| 497 |  |  |  |  |  |  | elsif( $self->IsDateInSpecifiedRange( $self->GetTempDate(), $self->GetBeginDate(), $self->GetEndDate() ) == 1 ) | 
| 498 |  |  |  |  |  |  | { | 
| 499 |  |  |  |  |  |  | # Append Article Data To Text Corpus | 
| 500 |  |  |  |  |  |  | $self->AppendStrToTextCorpus( $self->GetTempStr() ); | 
| 501 |  |  |  |  |  |  | } | 
| 502 |  |  |  |  |  |  |  | 
| 503 |  |  |  |  |  |  | # Clear string placeholders | 
| 504 |  |  |  |  |  |  | $self->ClearTempStr(); | 
| 505 |  |  |  |  |  |  | $self->ClearTempDate(); | 
| 506 |  |  |  |  |  |  |  | 
| 507 |  |  |  |  |  |  | # Increment Parsed Counter | 
| 508 |  |  |  |  |  |  | $self->{ _parsedCount }++ if ( $parsedData == 1 ); | 
| 509 |  |  |  |  |  |  |  | 
| 510 |  |  |  |  |  |  | # Release the stored XML section from memory (not fully tested) | 
| 511 |  |  |  |  |  |  | $pubMedArticle->purge() if defined( $pubMedArticle ); | 
| 512 |  |  |  |  |  |  |  | 
| 513 |  |  |  |  |  |  | # Reset Parsed Data Flag | 
| 514 |  |  |  |  |  |  | $parsedData = 0; | 
| 515 |  |  |  |  |  |  | } | 
| 516 |  |  |  |  |  |  |  | 
| 517 |  |  |  |  |  |  | # Release the stored XML section from memory (not fully tested) | 
| 518 |  |  |  |  |  |  | $root->purge(); | 
| 519 |  |  |  |  |  |  | $self->WriteLog( "_ParseMedlineCitationSet: Released PubmedArticleSet group from memory" ); | 
| 520 |  |  |  |  |  |  | } | 
| 521 |  |  |  |  |  |  |  | 
| 522 |  |  |  |  |  |  | sub _ParseMedlineArticle | 
| 523 |  |  |  |  |  |  | { | 
| 524 |  |  |  |  |  |  | my ( $self, $medlineArticle ) = @_; | 
| 525 |  |  |  |  |  |  |  | 
| 526 |  |  |  |  |  |  | my @articles = $medlineArticle->children(); | 
| 527 |  |  |  |  |  |  | my $dateCreated = ""; | 
| 528 |  |  |  |  |  |  |  | 
| 529 |  |  |  |  |  |  | for my $article ( @articles ) | 
| 530 |  |  |  |  |  |  | { | 
| 531 |  |  |  |  |  |  | if( $article->tag() eq "Article" ) | 
| 532 |  |  |  |  |  |  | { | 
| 533 |  |  |  |  |  |  | $self->_ParseArticle( $article ); | 
| 534 |  |  |  |  |  |  | } | 
| 535 |  |  |  |  |  |  | elsif( $article->tag() eq "DateCreated" ) | 
| 536 |  |  |  |  |  |  | { | 
| 537 |  |  |  |  |  |  | $self->SetTempDate( $self->_ParseDateCreated( $article ) ); | 
| 538 |  |  |  |  |  |  | } | 
| 539 |  |  |  |  |  |  | elsif( $article->tag() eq "OtherAbstract" ) | 
| 540 |  |  |  |  |  |  | { | 
| 541 |  |  |  |  |  |  | $self->_ParseOtherAbstract( $article ); | 
| 542 |  |  |  |  |  |  | } | 
| 543 |  |  |  |  |  |  | else | 
| 544 |  |  |  |  |  |  | { | 
| 545 |  |  |  |  |  |  | $self->WriteLog( "_ParseMedlineArticle - (New Data Found) - Tag: " . $article->tag() . ", Field: " . $article->field() ); | 
| 546 |  |  |  |  |  |  | } | 
| 547 |  |  |  |  |  |  |  | 
| 548 |  |  |  |  |  |  | # Release article from memory | 
| 549 |  |  |  |  |  |  | $article->purge(); | 
| 550 |  |  |  |  |  |  | } | 
| 551 |  |  |  |  |  |  |  | 
| 552 |  |  |  |  |  |  | return 1; | 
| 553 |  |  |  |  |  |  | } | 
| 554 |  |  |  |  |  |  |  | 
| 555 |  |  |  |  |  |  | sub _ParseDateCreated | 
| 556 |  |  |  |  |  |  | { | 
| 557 |  |  |  |  |  |  | my ( $self, $article ) = @_; | 
| 558 |  |  |  |  |  |  |  | 
| 559 |  |  |  |  |  |  | my $month = ""; | 
| 560 |  |  |  |  |  |  | my $day = ""; | 
| 561 |  |  |  |  |  |  | my $year = ""; | 
| 562 |  |  |  |  |  |  |  | 
| 563 |  |  |  |  |  |  | my @dateAry = $article->children(); | 
| 564 |  |  |  |  |  |  |  | 
| 565 |  |  |  |  |  |  | for my $date ( @dateAry ) | 
| 566 |  |  |  |  |  |  | { | 
| 567 |  |  |  |  |  |  | $day = $date->field() if ( $date->tag() eq "Day" ); | 
| 568 |  |  |  |  |  |  | $month = $date->field if ( $date->tag() eq "Month" ); | 
| 569 |  |  |  |  |  |  | $year = $date->field() if ( $date->tag() eq "Year" ); | 
| 570 |  |  |  |  |  |  | } | 
| 571 |  |  |  |  |  |  |  | 
| 572 |  |  |  |  |  |  | # Check(s) | 
| 573 |  |  |  |  |  |  | $day = "00" if !defined ( $day ); | 
| 574 |  |  |  |  |  |  | $month = "00" if !defined ( $month ); | 
| 575 |  |  |  |  |  |  | $year = "0000" if !defined ( $year ); | 
| 576 |  |  |  |  |  |  |  | 
| 577 |  |  |  |  |  |  | $self->WriteLog( "_ParseDateCreated - Month: $month, Day: $day, Year: $year " ); | 
| 578 |  |  |  |  |  |  |  | 
| 579 |  |  |  |  |  |  | return "$month/$day/$year"; | 
| 580 |  |  |  |  |  |  | } | 
| 581 |  |  |  |  |  |  |  | 
| 582 |  |  |  |  |  |  | sub _ParseArticle | 
| 583 |  |  |  |  |  |  | { | 
| 584 |  |  |  |  |  |  | my ( $self, $article ) = @_; | 
| 585 |  |  |  |  |  |  |  | 
| 586 |  |  |  |  |  |  | my @articleChildren = $article->children(); | 
| 587 |  |  |  |  |  |  |  | 
| 588 |  |  |  |  |  |  | for my $articleChild ( @articleChildren ) | 
| 589 |  |  |  |  |  |  | { | 
| 590 |  |  |  |  |  |  | if( $articleChild->tag() eq "Journal" ) | 
| 591 |  |  |  |  |  |  | { | 
| 592 |  |  |  |  |  |  | $self->_ParseJournal( $articleChild ); | 
| 593 |  |  |  |  |  |  | } | 
| 594 |  |  |  |  |  |  | elsif( $articleChild->tag() eq "ArticleTitle" ) | 
| 595 |  |  |  |  |  |  | { | 
| 596 |  |  |  |  |  |  | my $tempStr = Text::Unidecode::unidecode( $articleChild->field() ); | 
| 597 |  |  |  |  |  |  | chomp( $tempStr ); | 
| 598 |  |  |  |  |  |  |  | 
| 599 |  |  |  |  |  |  | # Store String | 
| 600 |  |  |  |  |  |  | $self->AppendToTempStr( $tempStr ) if ( $self->GetStoreTitle() == 1 ); | 
| 601 |  |  |  |  |  |  |  | 
| 602 |  |  |  |  |  |  | $self->WriteLog( "_ParseArticle - Tag: " . $articleChild->tag() . ", Field: " . $tempStr ); | 
| 603 |  |  |  |  |  |  | } | 
| 604 |  |  |  |  |  |  | elsif( $articleChild->tag() eq "Abstract" ) | 
| 605 |  |  |  |  |  |  | { | 
| 606 |  |  |  |  |  |  | my $tempStr = Text::Unidecode::unidecode( $articleChild->field() ); | 
| 607 |  |  |  |  |  |  | chomp( $tempStr ); | 
| 608 |  |  |  |  |  |  |  | 
| 609 |  |  |  |  |  |  | # Store String | 
| 610 |  |  |  |  |  |  | $self->AppendToTempStr( $tempStr ) if ( $self->GetStoreAbstract() == 1 ); | 
| 611 |  |  |  |  |  |  |  | 
| 612 |  |  |  |  |  |  | $self->WriteLog( "_ParseArticle - Tag: " . $articleChild->tag() . ", Field: " . $tempStr ); | 
| 613 |  |  |  |  |  |  | } | 
| 614 |  |  |  |  |  |  | else | 
| 615 |  |  |  |  |  |  | { | 
| 616 |  |  |  |  |  |  | $self->WriteLog( "_ParseArticle - (New Tag Found) - Tag: " . $articleChild->tag() . ", Field: " . $articleChild->field() ); | 
| 617 |  |  |  |  |  |  | } | 
| 618 |  |  |  |  |  |  | } | 
| 619 |  |  |  |  |  |  | } | 
| 620 |  |  |  |  |  |  |  | 
| 621 |  |  |  |  |  |  | sub _ParseJournal | 
| 622 |  |  |  |  |  |  | { | 
| 623 |  |  |  |  |  |  | my ( $self, $journalRoot ) = @_; | 
| 624 |  |  |  |  |  |  |  | 
| 625 |  |  |  |  |  |  | my @journalChildren = $journalRoot->children(); | 
| 626 |  |  |  |  |  |  |  | 
| 627 |  |  |  |  |  |  | for my $journalChild ( @journalChildren ) | 
| 628 |  |  |  |  |  |  | { | 
| 629 |  |  |  |  |  |  | if( $journalChild->tag() eq "Title" ) | 
| 630 |  |  |  |  |  |  | { | 
| 631 |  |  |  |  |  |  | my $tempStr = Text::Unidecode::unidecode( $journalChild->field() ); | 
| 632 |  |  |  |  |  |  | chomp( $tempStr ); | 
| 633 |  |  |  |  |  |  |  | 
| 634 |  |  |  |  |  |  | # Store String | 
| 635 |  |  |  |  |  |  | $self->AppendToTempStr( $tempStr ) if ( $self->GetStoreTitle() == 1 ); | 
| 636 |  |  |  |  |  |  |  | 
| 637 |  |  |  |  |  |  | $self->WriteLog( "_ParseJournal - Tag: " . $journalChild->tag() . ", Field: " . $tempStr ); | 
| 638 |  |  |  |  |  |  | } | 
| 639 |  |  |  |  |  |  | else | 
| 640 |  |  |  |  |  |  | { | 
| 641 |  |  |  |  |  |  | $self->WriteLog( "_ParseJournal - (New Tag Found) - Tag: " . $journalChild->tag() . ", Field: " . $journalChild->field() ); | 
| 642 |  |  |  |  |  |  | } | 
| 643 |  |  |  |  |  |  | } | 
| 644 |  |  |  |  |  |  | } | 
| 645 |  |  |  |  |  |  |  | 
| 646 |  |  |  |  |  |  | sub _ParseOtherAbstract | 
| 647 |  |  |  |  |  |  | { | 
| 648 |  |  |  |  |  |  | my ( $self, $abstractRoot ) = @_; | 
| 649 |  |  |  |  |  |  |  | 
| 650 |  |  |  |  |  |  | my @otherAbstractChildren = $abstractRoot->children(); | 
| 651 |  |  |  |  |  |  |  | 
| 652 |  |  |  |  |  |  | for my $abstractChild ( @otherAbstractChildren ) | 
| 653 |  |  |  |  |  |  | { | 
| 654 |  |  |  |  |  |  | if( $abstractChild->tag() eq "AbstractText" ) | 
| 655 |  |  |  |  |  |  | { | 
| 656 |  |  |  |  |  |  | my $tempStr = Text::Unidecode::unidecode( $abstractChild->field() ); | 
| 657 |  |  |  |  |  |  | chomp( $tempStr ); | 
| 658 |  |  |  |  |  |  |  | 
| 659 |  |  |  |  |  |  | # Store String | 
| 660 |  |  |  |  |  |  | $self->AppendToTempStr( $tempStr ) if ( $self->GetStoreAbstract() == 1 ); | 
| 661 |  |  |  |  |  |  |  | 
| 662 |  |  |  |  |  |  | $self->WriteLog( "_ParseOtherAbstract - Tag: " . $abstractChild->tag() . ", Field: " . $tempStr ); | 
| 663 |  |  |  |  |  |  | } | 
| 664 |  |  |  |  |  |  | else | 
| 665 |  |  |  |  |  |  | { | 
| 666 |  |  |  |  |  |  | $self->WriteLog( "_ParseOtherAbstract - (New Tag Found) - Tag: " . $abstractChild->tag() . ", Field: " . $abstractChild->field() ); | 
| 667 |  |  |  |  |  |  | } | 
| 668 |  |  |  |  |  |  | } | 
| 669 |  |  |  |  |  |  | } | 
| 670 |  |  |  |  |  |  |  | 
| 671 |  |  |  |  |  |  | sub _QuickParseDateCreated | 
| 672 |  |  |  |  |  |  | { | 
| 673 |  |  |  |  |  |  | my ( $twigSelf, $article, $self ) = @_; | 
| 674 |  |  |  |  |  |  |  | 
| 675 |  |  |  |  |  |  | my $month = ""; | 
| 676 |  |  |  |  |  |  | my $day = ""; | 
| 677 |  |  |  |  |  |  | my $year = ""; | 
| 678 |  |  |  |  |  |  |  | 
| 679 |  |  |  |  |  |  | # Clear Old Date | 
| 680 |  |  |  |  |  |  | $self->ClearTempDate(); | 
| 681 |  |  |  |  |  |  |  | 
| 682 |  |  |  |  |  |  | my @dateAry = $article->children(); | 
| 683 |  |  |  |  |  |  |  | 
| 684 |  |  |  |  |  |  | for my $date ( @dateAry ) | 
| 685 |  |  |  |  |  |  | { | 
| 686 |  |  |  |  |  |  | $day = $date->field() if ( $date->tag() eq "Day" ); | 
| 687 |  |  |  |  |  |  | $month = $date->field if ( $date->tag() eq "Month" ); | 
| 688 |  |  |  |  |  |  | $year = $date->field() if ( $date->tag() eq "Year" ); | 
| 689 |  |  |  |  |  |  | } | 
| 690 |  |  |  |  |  |  |  | 
| 691 |  |  |  |  |  |  | # Check(s) | 
| 692 |  |  |  |  |  |  | $day = "00" if !defined ( $day ); | 
| 693 |  |  |  |  |  |  | $month = "00" if !defined ( $month ); | 
| 694 |  |  |  |  |  |  | $year = "0000" if !defined ( $year ); | 
| 695 |  |  |  |  |  |  |  | 
| 696 |  |  |  |  |  |  | $self->WriteLog( "_QuickParseDateCreated - Month: $month, Day: $day, Year: $year " ); | 
| 697 |  |  |  |  |  |  |  | 
| 698 |  |  |  |  |  |  | $self->SetTempDate( "$month/$day/$year" ); | 
| 699 |  |  |  |  |  |  |  | 
| 700 |  |  |  |  |  |  | # Free Memory | 
| 701 |  |  |  |  |  |  | $article->purge(); | 
| 702 |  |  |  |  |  |  | } | 
| 703 |  |  |  |  |  |  |  | 
| 704 |  |  |  |  |  |  | sub _QuickParseJournal | 
| 705 |  |  |  |  |  |  | { | 
| 706 |  |  |  |  |  |  | my ( $twigSelf, $journalRoot, $self ) = @_; | 
| 707 |  |  |  |  |  |  |  | 
| 708 |  |  |  |  |  |  | my @journalChildren = $journalRoot->children(); | 
| 709 |  |  |  |  |  |  |  | 
| 710 |  |  |  |  |  |  | for my $journalChild ( @journalChildren ) | 
| 711 |  |  |  |  |  |  | { | 
| 712 |  |  |  |  |  |  | if( $journalChild->tag() eq "Title" ) | 
| 713 |  |  |  |  |  |  | { | 
| 714 |  |  |  |  |  |  | my $tempStr = Text::Unidecode::unidecode( $journalChild->field() ); | 
| 715 |  |  |  |  |  |  | chomp( $tempStr ); | 
| 716 |  |  |  |  |  |  |  | 
| 717 |  |  |  |  |  |  | # Store String | 
| 718 |  |  |  |  |  |  | $self->AppendToTempStr( $tempStr ) if ( $self->GetStoreTitle() == 1 ); | 
| 719 |  |  |  |  |  |  |  | 
| 720 |  |  |  |  |  |  | $self->WriteLog( "_QuickParseJournal - Tag: " . $journalChild->tag() . ", Field: " . $tempStr ); | 
| 721 |  |  |  |  |  |  | } | 
| 722 |  |  |  |  |  |  | else | 
| 723 |  |  |  |  |  |  | { | 
| 724 |  |  |  |  |  |  | $self->WriteLog( "_QuickParseJournal - (New Tag Found) - Tag: " . $journalChild->tag() . ", Field: " . $journalChild->field() ); | 
| 725 |  |  |  |  |  |  | } | 
| 726 |  |  |  |  |  |  | } | 
| 727 |  |  |  |  |  |  |  | 
| 728 |  |  |  |  |  |  | # Compoundify String If Option Is Enabled | 
| 729 |  |  |  |  |  |  | if( $self->GetCompoundifyText() == 1 && ( $self->IsDateInSpecifiedRange( $self->GetTempDate(), $self->GetBeginDate(), $self->GetEndDate() ) == 1 ) ) | 
| 730 |  |  |  |  |  |  | { | 
| 731 |  |  |  |  |  |  | my $tempStr = $self->CompoundifyString( lc( $self->GetTempStr() ) ); | 
| 732 |  |  |  |  |  |  |  | 
| 733 |  |  |  |  |  |  | # Append Article Data To Text Corpus | 
| 734 |  |  |  |  |  |  | $self->AppendStrToTextCorpus( $tempStr ); | 
| 735 |  |  |  |  |  |  | } | 
| 736 |  |  |  |  |  |  | elsif( $self->IsDateInSpecifiedRange( $self->GetTempDate(), $self->GetBeginDate(), $self->GetEndDate() ) == 1 ) | 
| 737 |  |  |  |  |  |  | { | 
| 738 |  |  |  |  |  |  | # Append Article Data To Text Corpus | 
| 739 |  |  |  |  |  |  | $self->AppendStrToTextCorpus( $self->GetTempStr() ); | 
| 740 |  |  |  |  |  |  | } | 
| 741 |  |  |  |  |  |  |  | 
| 742 |  |  |  |  |  |  | # Clear string placeholders | 
| 743 |  |  |  |  |  |  | $self->ClearTempStr(); | 
| 744 |  |  |  |  |  |  |  | 
| 745 |  |  |  |  |  |  | # Free Memory | 
| 746 |  |  |  |  |  |  | $journalRoot->purge(); | 
| 747 |  |  |  |  |  |  | } | 
| 748 |  |  |  |  |  |  |  | 
| 749 |  |  |  |  |  |  | sub _QuickParseArticle | 
| 750 |  |  |  |  |  |  | { | 
| 751 |  |  |  |  |  |  | my ( $twigSelf, $article, $self ) = @_; | 
| 752 |  |  |  |  |  |  |  | 
| 753 |  |  |  |  |  |  | my @articleChildren = $article->children(); | 
| 754 |  |  |  |  |  |  |  | 
| 755 |  |  |  |  |  |  | for my $articleChild ( @articleChildren ) | 
| 756 |  |  |  |  |  |  | { | 
| 757 |  |  |  |  |  |  | if( $articleChild->tag() eq "ArticleTitle" ) | 
| 758 |  |  |  |  |  |  | { | 
| 759 |  |  |  |  |  |  | my $tempStr = Text::Unidecode::unidecode( $articleChild->field() ); | 
| 760 |  |  |  |  |  |  | chomp( $tempStr ); | 
| 761 |  |  |  |  |  |  |  | 
| 762 |  |  |  |  |  |  | # Store String | 
| 763 |  |  |  |  |  |  | $self->AppendToTempStr( $tempStr ) if ( $self->GetStoreTitle() == 1 ); | 
| 764 |  |  |  |  |  |  |  | 
| 765 |  |  |  |  |  |  | $self->WriteLog( "_QuickParseArticle - Tag: " . $articleChild->tag() . ", Field: " . $tempStr ); | 
| 766 |  |  |  |  |  |  | } | 
| 767 |  |  |  |  |  |  | elsif( $articleChild->tag() eq "Abstract" ) | 
| 768 |  |  |  |  |  |  | { | 
| 769 |  |  |  |  |  |  | my $tempStr = Text::Unidecode::unidecode( $articleChild->field() ); | 
| 770 |  |  |  |  |  |  | chomp( $tempStr ); | 
| 771 |  |  |  |  |  |  |  | 
| 772 |  |  |  |  |  |  | # Store String | 
| 773 |  |  |  |  |  |  | $self->AppendToTempStr( $tempStr ) if ( $self->GetStoreAbstract() == 1 ); | 
| 774 |  |  |  |  |  |  |  | 
| 775 |  |  |  |  |  |  | $self->WriteLog( "_QuickParseArticle - Tag: " . $articleChild->tag() . ", Field: " . $tempStr ); | 
| 776 |  |  |  |  |  |  | } | 
| 777 |  |  |  |  |  |  | else | 
| 778 |  |  |  |  |  |  | { | 
| 779 |  |  |  |  |  |  | $self->WriteLog( "_QuickParseArticle - (New Tag Found) - Tag: " . $articleChild->tag() . ", Field: " . $articleChild->field() ); | 
| 780 |  |  |  |  |  |  | } | 
| 781 |  |  |  |  |  |  | } | 
| 782 |  |  |  |  |  |  |  | 
| 783 |  |  |  |  |  |  | # Compoundify String If Option Is Enabled | 
| 784 |  |  |  |  |  |  | if( $self->GetCompoundifyText() == 1 && ( $self->IsDateInSpecifiedRange( $self->GetTempDate(), $self->GetBeginDate(), $self->GetEndDate() ) == 1 ) ) | 
| 785 |  |  |  |  |  |  | { | 
| 786 |  |  |  |  |  |  | my $tempStr = $self->CompoundifyString( lc( $self->GetTempStr() ) ); | 
| 787 |  |  |  |  |  |  |  | 
| 788 |  |  |  |  |  |  | # Append Article Data To Text Corpus | 
| 789 |  |  |  |  |  |  | $self->AppendStrToTextCorpus( $tempStr ); | 
| 790 |  |  |  |  |  |  | } | 
| 791 |  |  |  |  |  |  | elsif( $self->IsDateInSpecifiedRange( $self->GetTempDate(), $self->GetBeginDate(), $self->GetEndDate() ) == 1 ) | 
| 792 |  |  |  |  |  |  | { | 
| 793 |  |  |  |  |  |  | # Append Article Data To Text Corpus | 
| 794 |  |  |  |  |  |  | $self->AppendStrToTextCorpus( $self->GetTempStr() ); | 
| 795 |  |  |  |  |  |  | } | 
| 796 |  |  |  |  |  |  |  | 
| 797 |  |  |  |  |  |  | # Clear string placeholders | 
| 798 |  |  |  |  |  |  | $self->ClearTempStr(); | 
| 799 |  |  |  |  |  |  |  | 
| 800 |  |  |  |  |  |  | # Free Memory | 
| 801 |  |  |  |  |  |  | $article->purge(); | 
| 802 |  |  |  |  |  |  | } | 
| 803 |  |  |  |  |  |  |  | 
| 804 |  |  |  |  |  |  | sub _QuickParseOtherAbstract | 
| 805 |  |  |  |  |  |  | { | 
| 806 |  |  |  |  |  |  | my ( $twigSelf, $abstractRoot, $self ) = @_; | 
| 807 |  |  |  |  |  |  |  | 
| 808 |  |  |  |  |  |  | my @otherAbstractChildren = $abstractRoot->children(); | 
| 809 |  |  |  |  |  |  |  | 
| 810 |  |  |  |  |  |  | for my $abstractChild ( @otherAbstractChildren ) | 
| 811 |  |  |  |  |  |  | { | 
| 812 |  |  |  |  |  |  | if( $abstractChild->tag() eq "AbstractText" ) | 
| 813 |  |  |  |  |  |  | { | 
| 814 |  |  |  |  |  |  | my $tempStr = Text::Unidecode::unidecode( $abstractChild->field() ); | 
| 815 |  |  |  |  |  |  | chomp( $tempStr ); | 
| 816 |  |  |  |  |  |  |  | 
| 817 |  |  |  |  |  |  | # Store String | 
| 818 |  |  |  |  |  |  | $self->AppendToTempStr( $tempStr ) if ( $self->GetStoreAbstract() == 1 ); | 
| 819 |  |  |  |  |  |  |  | 
| 820 |  |  |  |  |  |  | $self->WriteLog( "_QuickParseOtherAbstract - Tag: " . $abstractChild->tag() . ", Field: " . $tempStr ); | 
| 821 |  |  |  |  |  |  | } | 
| 822 |  |  |  |  |  |  | else | 
| 823 |  |  |  |  |  |  | { | 
| 824 |  |  |  |  |  |  | $self->WriteLog( "_QuickParseOtherAbstract - (New Tag Found) - Tag: " . $abstractChild->tag() . ", Field: " . $abstractChild->field() ); | 
| 825 |  |  |  |  |  |  | } | 
| 826 |  |  |  |  |  |  | } | 
| 827 |  |  |  |  |  |  |  | 
| 828 |  |  |  |  |  |  | # Compoundify String If Option Is Enabled | 
| 829 |  |  |  |  |  |  | if( $self->GetCompoundifyText() == 1 && ( $self->IsDateInSpecifiedRange( $self->GetTempDate(), $self->GetBeginDate(), $self->GetEndDate() ) == 1 ) ) | 
| 830 |  |  |  |  |  |  | { | 
| 831 |  |  |  |  |  |  | my $tempStr = $self->CompoundifyString( lc( $self->GetTempStr() ) ); | 
| 832 |  |  |  |  |  |  |  | 
| 833 |  |  |  |  |  |  | # Append Article Data To Text Corpus | 
| 834 |  |  |  |  |  |  | $self->AppendStrToTextCorpus( $tempStr ); | 
| 835 |  |  |  |  |  |  | } | 
| 836 |  |  |  |  |  |  | elsif( $self->IsDateInSpecifiedRange( $self->GetTempDate(), $self->GetBeginDate(), $self->GetEndDate() ) == 1 ) | 
| 837 |  |  |  |  |  |  | { | 
| 838 |  |  |  |  |  |  | # Append Article Data To Text Corpus | 
| 839 |  |  |  |  |  |  | $self->AppendStrToTextCorpus( $self->GetTempStr() ); | 
| 840 |  |  |  |  |  |  | } | 
| 841 |  |  |  |  |  |  |  | 
| 842 |  |  |  |  |  |  | # Clear string placeholders | 
| 843 |  |  |  |  |  |  | $self->ClearTempStr(); | 
| 844 |  |  |  |  |  |  |  | 
| 845 |  |  |  |  |  |  | # Free Memory | 
| 846 |  |  |  |  |  |  | $abstractRoot->purge(); | 
| 847 |  |  |  |  |  |  | } | 
| 848 |  |  |  |  |  |  |  | 
| 849 |  |  |  |  |  |  | sub CreateCompoundWordBST | 
| 850 |  |  |  |  |  |  | { | 
| 851 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 852 |  |  |  |  |  |  |  | 
| 853 |  |  |  |  |  |  | $self->WriteLog( "CreateCompoundWordBST - Creating Binary Search Tree From Compound Word Array" ); | 
| 854 |  |  |  |  |  |  |  | 
| 855 |  |  |  |  |  |  | my $bst = $self->GetCompoundWordBST(); | 
| 856 |  |  |  |  |  |  | my @compoundWordAry = $self->GetCompoundWordAry(); | 
| 857 |  |  |  |  |  |  | my $arySize = @compoundWordAry; | 
| 858 |  |  |  |  |  |  |  | 
| 859 |  |  |  |  |  |  | # Check(s) | 
| 860 |  |  |  |  |  |  | $self->WriteLog( "CreateCompoundWordBST - Error: Cannot Create BST / Compound Word Array Is Empty - Have You Read The Compound Word File To Memory?" ) if $arySize == 0; | 
| 861 |  |  |  |  |  |  | return -1 if $arySize == 0; | 
| 862 |  |  |  |  |  |  |  | 
| 863 |  |  |  |  |  |  | my $rootNode = $bst->CreateBST( \@compoundWordAry, 0, $arySize - 1, undef ); | 
| 864 |  |  |  |  |  |  | $bst->SetRootNode( $rootNode ); | 
| 865 |  |  |  |  |  |  |  | 
| 866 |  |  |  |  |  |  | # Clean-Up | 
| 867 |  |  |  |  |  |  | $self->ClearCompoundWordAry(); | 
| 868 |  |  |  |  |  |  |  | 
| 869 |  |  |  |  |  |  | $self->WriteLog( "CreateCompoundWordBST - Compound Word Binary Search Tree Created" ); | 
| 870 |  |  |  |  |  |  |  | 
| 871 |  |  |  |  |  |  | return 0; | 
| 872 |  |  |  |  |  |  | } | 
| 873 |  |  |  |  |  |  |  | 
| 874 |  |  |  |  |  |  | sub CompoundifyString | 
| 875 |  |  |  |  |  |  | { | 
| 876 |  |  |  |  |  |  | my ( $self, $str ) = @_; | 
| 877 |  |  |  |  |  |  |  | 
| 878 |  |  |  |  |  |  | return "(null)" if !defined ( $str ); | 
| 879 |  |  |  |  |  |  |  | 
| 880 |  |  |  |  |  |  | $self->WriteLog( "CompoundifyString - Compoundifying String - $str" ); | 
| 881 |  |  |  |  |  |  |  | 
| 882 |  |  |  |  |  |  | my $bst = $self->GetCompoundWordBST(); | 
| 883 |  |  |  |  |  |  |  | 
| 884 |  |  |  |  |  |  | my @strAry = split( ' ', $str ); | 
| 885 |  |  |  |  |  |  | $str = ""; | 
| 886 |  |  |  |  |  |  |  | 
| 887 |  |  |  |  |  |  | my $arySize = @strAry; | 
| 888 |  |  |  |  |  |  | my $maxCompoundWordLength = $self->GetMaxCompoundWordLength(); | 
| 889 |  |  |  |  |  |  |  | 
| 890 |  |  |  |  |  |  | for( my $i = 0; $i < @strAry; $i++ ) | 
| 891 |  |  |  |  |  |  | { | 
| 892 |  |  |  |  |  |  | my $lastIndex = $i + $maxCompoundWordLength; | 
| 893 |  |  |  |  |  |  | $lastIndex = $arySize - 1 if ( $i + $maxCompoundWordLength > $arySize ); | 
| 894 |  |  |  |  |  |  | my @tempAry = @strAry[$i..$lastIndex]; | 
| 895 |  |  |  |  |  |  |  | 
| 896 |  |  |  |  |  |  | my $node = $self->_CompoundifySearch( \@tempAry, undef, $strAry[$i], 0 ); | 
| 897 |  |  |  |  |  |  | undef( @tempAry ); | 
| 898 |  |  |  |  |  |  |  | 
| 899 |  |  |  |  |  |  | # Compound Word(s) Found | 
| 900 |  |  |  |  |  |  | if( defined( $node ) ) | 
| 901 |  |  |  |  |  |  | { | 
| 902 |  |  |  |  |  |  | # Split Compound Word Data And Set Next Index After Located Compound Word(s) | 
| 903 |  |  |  |  |  |  | my @nodeDataAry = split( ' ', $node->data ); | 
| 904 |  |  |  |  |  |  | $i += @nodeDataAry - 1; | 
| 905 |  |  |  |  |  |  |  | 
| 906 |  |  |  |  |  |  | # Add Compound Words To The Return String | 
| 907 |  |  |  |  |  |  | $str .= join( '_', @nodeDataAry ) . " "; | 
| 908 |  |  |  |  |  |  | undef( @nodeDataAry ); | 
| 909 |  |  |  |  |  |  |  | 
| 910 |  |  |  |  |  |  | # Increment Compound Word Counter | 
| 911 |  |  |  |  |  |  | $compoundWordCount++; | 
| 912 |  |  |  |  |  |  | } | 
| 913 |  |  |  |  |  |  | # No Compound Word(s) Found | 
| 914 |  |  |  |  |  |  | else | 
| 915 |  |  |  |  |  |  | { | 
| 916 |  |  |  |  |  |  | # Add Single Word At Array Index To Return String | 
| 917 |  |  |  |  |  |  | $str .= $strAry[$i] . " "; | 
| 918 |  |  |  |  |  |  | } | 
| 919 |  |  |  |  |  |  |  | 
| 920 |  |  |  |  |  |  | # Increment Word Counter | 
| 921 |  |  |  |  |  |  | $postCompWordCount++; | 
| 922 |  |  |  |  |  |  |  | 
| 923 |  |  |  |  |  |  | # Debug Print Statements | 
| 924 |  |  |  |  |  |  | #$self->WriteLog( "Data: " . $node->data . " : Next Index: $i" ) if defined ( $node ); | 
| 925 |  |  |  |  |  |  | #$self->WriteLog( "Undefined : Index: $i" ) if !defined ( $node ); | 
| 926 |  |  |  |  |  |  | } | 
| 927 |  |  |  |  |  |  |  | 
| 928 |  |  |  |  |  |  | $self->WriteLog( "CompoundifyString - Compounded String - $str" ); | 
| 929 |  |  |  |  |  |  |  | 
| 930 |  |  |  |  |  |  | return $str; | 
| 931 |  |  |  |  |  |  | } | 
| 932 |  |  |  |  |  |  |  | 
| 933 |  |  |  |  |  |  | sub _CompoundifySearch | 
| 934 |  |  |  |  |  |  | { | 
| 935 |  |  |  |  |  |  | my ( $self, $strAryRef, $oldNode, $searchStr, $index ) = @_; | 
| 936 |  |  |  |  |  |  |  | 
| 937 |  |  |  |  |  |  | # Checks(s) | 
| 938 |  |  |  |  |  |  | return undef if !defined ( $strAryRef ); | 
| 939 |  |  |  |  |  |  | return undef if !defined ( $searchStr ); | 
| 940 |  |  |  |  |  |  | return undef if !defined ( $index ); | 
| 941 |  |  |  |  |  |  |  | 
| 942 |  |  |  |  |  |  | my @strAry = @{ $strAryRef }; | 
| 943 |  |  |  |  |  |  | my $arySize = @strAry; | 
| 944 |  |  |  |  |  |  | my $bst = $self->GetCompoundWordBST(); | 
| 945 |  |  |  |  |  |  |  | 
| 946 |  |  |  |  |  |  |  | 
| 947 |  |  |  |  |  |  | my $resultNode = $bst->BSTContainsSearch( $bst->GetRootNode(), $searchStr ); | 
| 948 |  |  |  |  |  |  |  | 
| 949 |  |  |  |  |  |  | if( defined( $resultNode ) && $index < $arySize ) | 
| 950 |  |  |  |  |  |  | { | 
| 951 |  |  |  |  |  |  | $index++; | 
| 952 |  |  |  |  |  |  |  | 
| 953 |  |  |  |  |  |  | # Make Sure Returned Node Data Is Equal With Search String Or Return Old Node | 
| 954 |  |  |  |  |  |  | $resultNode = $bst->BSTExactSearch( $bst->GetRootNode(), $searchStr ); | 
| 955 |  |  |  |  |  |  | $resultNode = $oldNode if !defined( $resultNode ); | 
| 956 |  |  |  |  |  |  |  | 
| 957 |  |  |  |  |  |  | $searchStr .= ( " " . $strAry[$index] ) if ( $index < $arySize ); | 
| 958 |  |  |  |  |  |  | return $self->_CompoundifySearch( $strAryRef, $resultNode, $searchStr, $index ) if ( $index < $arySize ); | 
| 959 |  |  |  |  |  |  | } | 
| 960 |  |  |  |  |  |  |  | 
| 961 |  |  |  |  |  |  | # Post Check(s) | 
| 962 |  |  |  |  |  |  | $resultNode = undef if defined( $resultNode ) && ( $resultNode->data ne $searchStr ); | 
| 963 |  |  |  |  |  |  |  | 
| 964 |  |  |  |  |  |  | if( defined( $oldNode ) ) | 
| 965 |  |  |  |  |  |  | { | 
| 966 |  |  |  |  |  |  | my @searchStrAry = split( ' ', $searchStr ); | 
| 967 |  |  |  |  |  |  | my @nodeStrAry = split( ' ', $oldNode->data ); | 
| 968 |  |  |  |  |  |  |  | 
| 969 |  |  |  |  |  |  | if( @searchStrAry > @nodeStrAry ) | 
| 970 |  |  |  |  |  |  | { | 
| 971 |  |  |  |  |  |  | @searchStrAry = splice( @searchStrAry, 0, @nodeStrAry ); | 
| 972 |  |  |  |  |  |  | my $strA = join( ' ', @searchStrAry ); | 
| 973 |  |  |  |  |  |  | my $strB = join( ' ', @nodeStrAry ); | 
| 974 |  |  |  |  |  |  | $oldNode = undef if $strA ne $strB; | 
| 975 |  |  |  |  |  |  | } | 
| 976 |  |  |  |  |  |  | elsif( @searchStrAry == @nodeStrAry ) | 
| 977 |  |  |  |  |  |  | { | 
| 978 |  |  |  |  |  |  | $oldNode = undef if $oldNode->data ne $searchStr; | 
| 979 |  |  |  |  |  |  | } | 
| 980 |  |  |  |  |  |  | else | 
| 981 |  |  |  |  |  |  | { | 
| 982 |  |  |  |  |  |  | $oldNode = undef; | 
| 983 |  |  |  |  |  |  | } | 
| 984 |  |  |  |  |  |  | } | 
| 985 |  |  |  |  |  |  |  | 
| 986 |  |  |  |  |  |  |  | 
| 987 |  |  |  |  |  |  |  | 
| 988 |  |  |  |  |  |  |  | 
| 989 |  |  |  |  |  |  | # Bug Fix: If Search Word Found At First Array Index And Second Word Not Found. | 
| 990 |  |  |  |  |  |  | #          Prevent Invalid Data From Being Returned. | 
| 991 |  |  |  |  |  |  | return undef if !defined( $resultNode ) && $index == 1; | 
| 992 |  |  |  |  |  |  |  | 
| 993 |  |  |  |  |  |  | return $oldNode if !defined( $resultNode ); | 
| 994 |  |  |  |  |  |  |  | 
| 995 |  |  |  |  |  |  | return $resultNode; | 
| 996 |  |  |  |  |  |  | } | 
| 997 |  |  |  |  |  |  |  | 
| 998 |  |  |  |  |  |  | sub ReadCompoundWordDataFromFile | 
| 999 |  |  |  |  |  |  | { | 
| 1000 |  |  |  |  |  |  | my ( $self, $fileDir, $autoSetMaxCompoundWordLength ) = @_; | 
| 1001 |  |  |  |  |  |  |  | 
| 1002 |  |  |  |  |  |  | $self->WriteLog( "ReadCompoundWordDataFromFile - Error: Directory Not Defined" ) if !defined ( $fileDir ); | 
| 1003 |  |  |  |  |  |  | return -1 if !defined ( $fileDir ); | 
| 1004 |  |  |  |  |  |  |  | 
| 1005 |  |  |  |  |  |  | $self->WriteLog( "ReadCompoundWordDataFromFile - Error: Directory/File Does Not Exist" ) if !( -e "$fileDir" ); | 
| 1006 |  |  |  |  |  |  | return -1 if !( -e "$fileDir" ); | 
| 1007 |  |  |  |  |  |  |  | 
| 1008 |  |  |  |  |  |  | $self->WriteLog( "ReadCompoundWordDataFromFile - Reading Compound Word File: \"$fileDir\"" ); | 
| 1009 |  |  |  |  |  |  |  | 
| 1010 |  |  |  |  |  |  | my @dataAry = (); | 
| 1011 |  |  |  |  |  |  |  | 
| 1012 |  |  |  |  |  |  | # Read XML Data From File To Memory | 
| 1013 |  |  |  |  |  |  | open( my $fileHandle, '<:encoding(UTF-8)', "$fileDir" ); | 
| 1014 |  |  |  |  |  |  |  | 
| 1015 |  |  |  |  |  |  | # Prepare Max Compound Word Length | 
| 1016 |  |  |  |  |  |  | $self->SetMaxCompoundWordLength( 0 ) if defined ( $autoSetMaxCompoundWordLength ); | 
| 1017 |  |  |  |  |  |  |  | 
| 1018 |  |  |  |  |  |  | while( my $row = <$fileHandle> ) | 
| 1019 |  |  |  |  |  |  | { | 
| 1020 |  |  |  |  |  |  | chomp( $row ); | 
| 1021 |  |  |  |  |  |  | $row = $self->RemoveSpecialCharactersFromString( $row ); | 
| 1022 |  |  |  |  |  |  | push( @dataAry, $row ); | 
| 1023 |  |  |  |  |  |  |  | 
| 1024 |  |  |  |  |  |  | # Find Max Compound Word Length | 
| 1025 |  |  |  |  |  |  | my @words = split( ' ', $row ); | 
| 1026 |  |  |  |  |  |  | my $size = @words; | 
| 1027 |  |  |  |  |  |  | undef( @words ); | 
| 1028 |  |  |  |  |  |  | $self->SetMaxCompoundWordLength( $size ) if defined( $autoSetMaxCompoundWordLength ) && ( $self->GetMaxCompoundWordLength() < $size ); | 
| 1029 |  |  |  |  |  |  | } | 
| 1030 |  |  |  |  |  |  |  | 
| 1031 |  |  |  |  |  |  | close( $fileHandle ); | 
| 1032 |  |  |  |  |  |  |  | 
| 1033 |  |  |  |  |  |  | $self->WriteLog( "ReadCompoundWordDataFromFile - Error: Compound Word Length > 100" ) if ( $self->GetMaxCompoundWordLength() > 100 ); | 
| 1034 |  |  |  |  |  |  | return -1  if ( $self->GetMaxCompoundWordLength() > 100 ); | 
| 1035 |  |  |  |  |  |  |  | 
| 1036 |  |  |  |  |  |  | $self->WriteLog( "ReadCompoundWordDataFromFile - Auto Set Max Compound Word Length To \"" . $self->GetMaxCompoundWordLength() . "\"") if defined ( $autoSetMaxCompoundWordLength ); | 
| 1037 |  |  |  |  |  |  | $self->WriteLog( "ReadCompoundWordDataFromFile - Reading Complete" ); | 
| 1038 |  |  |  |  |  |  | $self->WriteLog( "ReadCompoundWordDataFromFile - Sorting Compound Word List" ); | 
| 1039 |  |  |  |  |  |  |  | 
| 1040 |  |  |  |  |  |  | @dataAry = sort( @dataAry ); | 
| 1041 |  |  |  |  |  |  | $self->SetCompoundWordAry( \@dataAry ); | 
| 1042 |  |  |  |  |  |  |  | 
| 1043 |  |  |  |  |  |  | $self->WriteLog( "ReadCompoundWordDataFromFile - Stored " . @dataAry . " Compound Words In Memory" ) if ( @dataAry > 0 ); | 
| 1044 |  |  |  |  |  |  | $self->WriteLog( "ReadCompoundWordDataFromFile - Detected Compound Word Array Data / Auto-Setting Compoundify Text = 1" ) if @dataAry > 0; | 
| 1045 |  |  |  |  |  |  | $self->SetCompoundifyText( 1 ) if ( @dataAry > 0 ); | 
| 1046 |  |  |  |  |  |  |  | 
| 1047 |  |  |  |  |  |  | $self->WriteLog( "ReadCompoundwordDataFromFile - No Compound Word Array Data Detected / Auto-Setting Compoundify Text = 0" ) if @dataAry == 0; | 
| 1048 |  |  |  |  |  |  | $self->SetCompoundifyText( 0 ) if ( @dataAry == 0 ); | 
| 1049 |  |  |  |  |  |  |  | 
| 1050 |  |  |  |  |  |  | $self->WriteLog( "ReadCompoundWordDataFromFile - Sorting Complete" ); | 
| 1051 |  |  |  |  |  |  |  | 
| 1052 |  |  |  |  |  |  | return 0; | 
| 1053 |  |  |  |  |  |  | } | 
| 1054 |  |  |  |  |  |  |  | 
| 1055 |  |  |  |  |  |  | sub SaveCompoundWordListToFile | 
| 1056 |  |  |  |  |  |  | { | 
| 1057 |  |  |  |  |  |  | my ( $self, $savePath ) = @_; | 
| 1058 |  |  |  |  |  |  |  | 
| 1059 |  |  |  |  |  |  | $self->WriteLog( "SaveCompoundWordListToFile - Error: Save Path Not Specified" ) if !defined( $savePath ); | 
| 1060 |  |  |  |  |  |  | return -1 if !defined( $savePath ); | 
| 1061 |  |  |  |  |  |  |  | 
| 1062 |  |  |  |  |  |  | $self->WriteLog( "SaveCompoundWordListToFile - Saving Compound Word List To \"$savePath\"" ); | 
| 1063 |  |  |  |  |  |  |  | 
| 1064 |  |  |  |  |  |  | # Create File Handle | 
| 1065 |  |  |  |  |  |  | open( my $fileHandle, '>:encoding(UTF-8)', "$savePath" ); | 
| 1066 |  |  |  |  |  |  |  | 
| 1067 |  |  |  |  |  |  | # Write Data To File | 
| 1068 |  |  |  |  |  |  | for my $compoundWord ( $self->GetCompoundWordAry() ) | 
| 1069 |  |  |  |  |  |  | { | 
| 1070 |  |  |  |  |  |  | print( $fileHandle "$compoundWord\n" ); | 
| 1071 |  |  |  |  |  |  | } | 
| 1072 |  |  |  |  |  |  |  | 
| 1073 |  |  |  |  |  |  | close( $fileHandle ); | 
| 1074 |  |  |  |  |  |  | undef( $fileHandle ); | 
| 1075 |  |  |  |  |  |  |  | 
| 1076 |  |  |  |  |  |  | $self->WriteLog( "SaveCompoundWordListToFile - Compound Word List Saved To \"$savePath\"" ); | 
| 1077 |  |  |  |  |  |  |  | 
| 1078 |  |  |  |  |  |  | return 0; | 
| 1079 |  |  |  |  |  |  | } | 
| 1080 |  |  |  |  |  |  |  | 
| 1081 |  |  |  |  |  |  | sub ReadTextFromFile | 
| 1082 |  |  |  |  |  |  | { | 
| 1083 |  |  |  |  |  |  | my ( $self, $fileDir ) = @_; | 
| 1084 |  |  |  |  |  |  |  | 
| 1085 |  |  |  |  |  |  | $self->WriteLog( "ReadTextFromFile - Error: Directory Not Defined" ) if !defined ( $fileDir ); | 
| 1086 |  |  |  |  |  |  | return "(null)" if !defined ( $fileDir ); | 
| 1087 |  |  |  |  |  |  |  | 
| 1088 |  |  |  |  |  |  | $self->WriteLog( "ReadTextFromFile - Error: Directory/File Does Not Exist" ) if !( -e "$fileDir" ); | 
| 1089 |  |  |  |  |  |  | return "(null)" if !( -e "$fileDir" ); | 
| 1090 |  |  |  |  |  |  |  | 
| 1091 |  |  |  |  |  |  | my $str = ""; | 
| 1092 |  |  |  |  |  |  |  | 
| 1093 |  |  |  |  |  |  | # Read XML Data From File To Memory | 
| 1094 |  |  |  |  |  |  | open( my $fileHandle, '<:encoding(UTF-8)', "$fileDir" ); | 
| 1095 |  |  |  |  |  |  |  | 
| 1096 |  |  |  |  |  |  | while( my $row = <$fileHandle> ) | 
| 1097 |  |  |  |  |  |  | { | 
| 1098 |  |  |  |  |  |  | chomp $row; | 
| 1099 |  |  |  |  |  |  | $str .= " $row"; | 
| 1100 |  |  |  |  |  |  | } | 
| 1101 |  |  |  |  |  |  |  | 
| 1102 |  |  |  |  |  |  | close( $fileHandle ); | 
| 1103 |  |  |  |  |  |  |  | 
| 1104 |  |  |  |  |  |  | $self->WriteLog( "ReadTextFromFile - Reading Complete" ); | 
| 1105 |  |  |  |  |  |  |  | 
| 1106 |  |  |  |  |  |  | return $str; | 
| 1107 |  |  |  |  |  |  | } | 
| 1108 |  |  |  |  |  |  |  | 
| 1109 |  |  |  |  |  |  | sub SaveTextToFile | 
| 1110 |  |  |  |  |  |  | { | 
| 1111 |  |  |  |  |  |  | my ( $self, $savePath, $str ) = @_; | 
| 1112 |  |  |  |  |  |  |  | 
| 1113 |  |  |  |  |  |  | $self->WriteLog( "SaveTextToFile - Error: No Save Path Specified" ) if !defined( $savePath ); | 
| 1114 |  |  |  |  |  |  | return -1 if !defined( $savePath ); | 
| 1115 |  |  |  |  |  |  |  | 
| 1116 |  |  |  |  |  |  | $self->WriteLog( "SaveTextToFile - Saving Data To \"$savePath\"" ); | 
| 1117 |  |  |  |  |  |  |  | 
| 1118 |  |  |  |  |  |  | # Create file handle | 
| 1119 |  |  |  |  |  |  | my $fileHandle = undef; | 
| 1120 |  |  |  |  |  |  |  | 
| 1121 |  |  |  |  |  |  | # Over write file if $appendToFile == 0 | 
| 1122 |  |  |  |  |  |  | open( $fileHandle, '>:encoding(UTF-8)', "$savePath" ); | 
| 1123 |  |  |  |  |  |  |  | 
| 1124 |  |  |  |  |  |  | # Write Data To File | 
| 1125 |  |  |  |  |  |  | print( $fileHandle "$str" ); | 
| 1126 |  |  |  |  |  |  |  | 
| 1127 |  |  |  |  |  |  | close( $fileHandle ); | 
| 1128 |  |  |  |  |  |  | undef( $fileHandle ); | 
| 1129 |  |  |  |  |  |  |  | 
| 1130 |  |  |  |  |  |  | $self->WriteLog( "SaveTextToFile - File Saved To \"$savePath\"" ); | 
| 1131 |  |  |  |  |  |  |  | 
| 1132 |  |  |  |  |  |  | return 0; | 
| 1133 |  |  |  |  |  |  | } | 
| 1134 |  |  |  |  |  |  |  | 
| 1135 |  |  |  |  |  |  | sub _ReadXMLDataFromFile | 
| 1136 |  |  |  |  |  |  | { | 
| 1137 |  |  |  |  |  |  | my ( $self, $fileDir ) = @_; | 
| 1138 |  |  |  |  |  |  |  | 
| 1139 |  |  |  |  |  |  | $self->WriteLog( "_ReadXMLDataFromFile - Error: Directory Not Defined" ) if !defined ( $fileDir ); | 
| 1140 |  |  |  |  |  |  | return "(null)" if !defined ( $fileDir ); | 
| 1141 |  |  |  |  |  |  |  | 
| 1142 |  |  |  |  |  |  | $self->WriteLog( "_ReadXMLDataFromFile - Error: Directory/File Does Not Exist" ) if !( -e "$fileDir" ); | 
| 1143 |  |  |  |  |  |  | return "(null)" if !( -e "$fileDir" ); | 
| 1144 |  |  |  |  |  |  |  | 
| 1145 |  |  |  |  |  |  | my $data = ""; | 
| 1146 |  |  |  |  |  |  |  | 
| 1147 |  |  |  |  |  |  | # Extract XML File From GZip To Memory | 
| 1148 |  |  |  |  |  |  | if ( index( $fileDir, ".gz" ) != -1 ) | 
| 1149 |  |  |  |  |  |  | { | 
| 1150 |  |  |  |  |  |  | IO::Uncompress::Gunzip::gunzip "$fileDir" => \$data or die "gunzip failed\n"; | 
| 1151 |  |  |  |  |  |  | } | 
| 1152 |  |  |  |  |  |  | # Read XML Data From File To Memory | 
| 1153 |  |  |  |  |  |  | else | 
| 1154 |  |  |  |  |  |  | { | 
| 1155 |  |  |  |  |  |  | open( my $fileHandle, '<:encoding(UTF-8)', "$fileDir" ); | 
| 1156 |  |  |  |  |  |  |  | 
| 1157 |  |  |  |  |  |  | while( my $row = <$fileHandle> ) | 
| 1158 |  |  |  |  |  |  | { | 
| 1159 |  |  |  |  |  |  | chomp $row; | 
| 1160 |  |  |  |  |  |  | $data .= "$row\n"; | 
| 1161 |  |  |  |  |  |  | } | 
| 1162 |  |  |  |  |  |  |  | 
| 1163 |  |  |  |  |  |  | close( $fileHandle ); | 
| 1164 |  |  |  |  |  |  | } | 
| 1165 |  |  |  |  |  |  |  | 
| 1166 |  |  |  |  |  |  | $self->WriteLog( "_ReadXMLDataFromFile - Reading Data Complete/Data Stored" ); | 
| 1167 |  |  |  |  |  |  |  | 
| 1168 |  |  |  |  |  |  | return $data; | 
| 1169 |  |  |  |  |  |  | } | 
| 1170 |  |  |  |  |  |  |  | 
| 1171 |  |  |  |  |  |  | sub _SaveTextCorpusToFile | 
| 1172 |  |  |  |  |  |  | { | 
| 1173 |  |  |  |  |  |  | my ( $self, $savePath, $appendToFile ) = @_; | 
| 1174 |  |  |  |  |  |  |  | 
| 1175 |  |  |  |  |  |  | # Prevent Other Threads From Writing At The Same Time | 
| 1176 |  |  |  |  |  |  | { | 
| 1177 |  |  |  |  |  |  | lock( $writeLock ); | 
| 1178 |  |  |  |  |  |  |  | 
| 1179 |  |  |  |  |  |  | $self->WriteLog( "_SaveTextCorpusToFile - Error: No Save Path Specified" ) if !defined( $savePath ); | 
| 1180 |  |  |  |  |  |  | return -1 if !defined( $savePath ); | 
| 1181 |  |  |  |  |  |  |  | 
| 1182 |  |  |  |  |  |  | $appendToFile = $self->GetOverwriteExitingFile() if !defined ( $appendToFile ); | 
| 1183 |  |  |  |  |  |  |  | 
| 1184 |  |  |  |  |  |  | $self->WriteLog( "_SaveTextCorpusToFile - Saving Text Corpus To \"$savePath\"" ); | 
| 1185 |  |  |  |  |  |  |  | 
| 1186 |  |  |  |  |  |  | # Create file handle | 
| 1187 |  |  |  |  |  |  | my $fileHandle = undef; | 
| 1188 |  |  |  |  |  |  |  | 
| 1189 |  |  |  |  |  |  | # Over write file if $appendToFile == 0 | 
| 1190 |  |  |  |  |  |  | open( $fileHandle, '>:encoding(UTF-8)', "$savePath" ) if $appendToFile == 0; | 
| 1191 |  |  |  |  |  |  |  | 
| 1192 |  |  |  |  |  |  | # Append to file if $appendToFile == 1 | 
| 1193 |  |  |  |  |  |  | open( $fileHandle, '>>:encoding(UTF-8)', "$savePath" ) if $appendToFile == 1; | 
| 1194 |  |  |  |  |  |  |  | 
| 1195 |  |  |  |  |  |  | # Write Data To File | 
| 1196 |  |  |  |  |  |  | print( $fileHandle $self->GetTextCorpusStr() ); | 
| 1197 |  |  |  |  |  |  |  | 
| 1198 |  |  |  |  |  |  | close( $fileHandle ); | 
| 1199 |  |  |  |  |  |  | undef( $fileHandle ); | 
| 1200 |  |  |  |  |  |  |  | 
| 1201 |  |  |  |  |  |  | $self->WriteLog( "_SaveTextCorpusToFile - Text Corpus Saved To \"$savePath\"" ); | 
| 1202 |  |  |  |  |  |  | } | 
| 1203 |  |  |  |  |  |  |  | 
| 1204 |  |  |  |  |  |  | return 1; | 
| 1205 |  |  |  |  |  |  | } | 
| 1206 |  |  |  |  |  |  |  | 
| 1207 |  |  |  |  |  |  | sub IsDateInSpecifiedRange | 
| 1208 |  |  |  |  |  |  | { | 
| 1209 |  |  |  |  |  |  | my ( $self, $date, $beginDate, $endDate ) = @_; | 
| 1210 |  |  |  |  |  |  |  | 
| 1211 |  |  |  |  |  |  | $self->WriteLog( "Error: Date Not Specified To Check Against Date Range" ) if !defined ( $date ); | 
| 1212 |  |  |  |  |  |  | return 0 if !defined ( $date ); | 
| 1213 |  |  |  |  |  |  |  | 
| 1214 |  |  |  |  |  |  | $self->WriteLog( "Warning - BeginDate Parameter Not Specified - Using Default Value: " . $self->GetBeginDate() ) if !defined ( $beginDate ); | 
| 1215 |  |  |  |  |  |  | $self->WriteLog( "Warning - EndDate Parameter Not Specified - Using Default Value: " . $self->GetEndDate() ) if !defined ( $endDate ); | 
| 1216 |  |  |  |  |  |  | $beginDate = $self->GetBeginDate() if !defined ( $beginDate ); | 
| 1217 |  |  |  |  |  |  | $endDate = $self->GetEndDate() if !defined ( $endDate ); | 
| 1218 |  |  |  |  |  |  |  | 
| 1219 |  |  |  |  |  |  | my @dateAry = split( '/', $date ); | 
| 1220 |  |  |  |  |  |  | my @beginDateAry = split( '/', $beginDate ); | 
| 1221 |  |  |  |  |  |  | my @endDateAry = split( '/', $endDate ); | 
| 1222 |  |  |  |  |  |  |  | 
| 1223 |  |  |  |  |  |  | # Check(s) | 
| 1224 |  |  |  |  |  |  | if( @dateAry != 3 ) | 
| 1225 |  |  |  |  |  |  | { | 
| 1226 |  |  |  |  |  |  | $self->WriteLog( "Invalid Date Format - Requested Format: Month/Day/Year : Specified Format - $date" ); | 
| 1227 |  |  |  |  |  |  | return 0; | 
| 1228 |  |  |  |  |  |  | } | 
| 1229 |  |  |  |  |  |  | elsif( @beginDateAry != 3 ) | 
| 1230 |  |  |  |  |  |  | { | 
| 1231 |  |  |  |  |  |  | $self->WriteLog( "Invalid Date Format - Requested Format: Month/Day/Year : Specified Format - $beginDate" ); | 
| 1232 |  |  |  |  |  |  | return 0; | 
| 1233 |  |  |  |  |  |  | } | 
| 1234 |  |  |  |  |  |  | elsif( @endDateAry != 3 ) | 
| 1235 |  |  |  |  |  |  | { | 
| 1236 |  |  |  |  |  |  | $self->WriteLog( "Invalid Date Format - Requested Format: Month/Day/Year : Specified Format - $endDate" ); | 
| 1237 |  |  |  |  |  |  | return 0; | 
| 1238 |  |  |  |  |  |  | } | 
| 1239 |  |  |  |  |  |  |  | 
| 1240 |  |  |  |  |  |  | # Begin Date Comparison | 
| 1241 |  |  |  |  |  |  | my $dateYear = $dateAry[2]; | 
| 1242 |  |  |  |  |  |  | my $beginYear = $beginDateAry[2]; | 
| 1243 |  |  |  |  |  |  | my $endYear = $endDateAry[2]; | 
| 1244 |  |  |  |  |  |  |  | 
| 1245 |  |  |  |  |  |  | my $dateMonth = $dateAry[0]; | 
| 1246 |  |  |  |  |  |  | my $beginMonth = $beginDateAry[0]; | 
| 1247 |  |  |  |  |  |  | my $endMonth = $endDateAry[0]; | 
| 1248 |  |  |  |  |  |  |  | 
| 1249 |  |  |  |  |  |  | my $dateDay = $dateAry[1]; | 
| 1250 |  |  |  |  |  |  | my $beginDay = $beginDateAry[1]; | 
| 1251 |  |  |  |  |  |  | my $endDay = $endDateAry[1]; | 
| 1252 |  |  |  |  |  |  |  | 
| 1253 |  |  |  |  |  |  | # Check(s) | 
| 1254 |  |  |  |  |  |  | return 0 if ( $dateYear < 0 || $beginYear < 0 || $endYear < 0 || | 
| 1255 |  |  |  |  |  |  | $dateMonth < 0 || $beginMonth < 0 || $endMonth < 0 || | 
| 1256 |  |  |  |  |  |  | $dateDay < 0 || $beginDay < 0 || $endDay < 0 ); | 
| 1257 |  |  |  |  |  |  |  | 
| 1258 |  |  |  |  |  |  | return 0 if ( $dateYear < $beginYear || $dateYear > $endYear ); | 
| 1259 |  |  |  |  |  |  | return 0 if ( ( $dateYear == $beginYear && $dateMonth < $beginMonth ) || ( $dateYear == $endYear && $dateMonth > $endMonth ) ); | 
| 1260 |  |  |  |  |  |  | return 0 if ( ( $dateYear == $beginYear && $dateMonth == $beginMonth && $dateDay < $beginDay ) | 
| 1261 |  |  |  |  |  |  | || ( $dateYear == $endYear && $dateMonth == $endMonth && $dateDay > $endDay ) ); | 
| 1262 |  |  |  |  |  |  |  | 
| 1263 |  |  |  |  |  |  | return 1; | 
| 1264 |  |  |  |  |  |  | } | 
| 1265 |  |  |  |  |  |  |  | 
| 1266 |  |  |  |  |  |  | sub IsFileOrDirectory | 
| 1267 |  |  |  |  |  |  | { | 
| 1268 |  |  |  |  |  |  | my ( $self, $path ) = @_; | 
| 1269 |  |  |  |  |  |  |  | 
| 1270 |  |  |  |  |  |  | # Check(s) | 
| 1271 |  |  |  |  |  |  | return "unknown" if !defined( $path ); | 
| 1272 |  |  |  |  |  |  | return "unknown" if !( -e $path ); | 
| 1273 |  |  |  |  |  |  |  | 
| 1274 |  |  |  |  |  |  | return "file" if ( -f $path ); | 
| 1275 |  |  |  |  |  |  | return "dir" if ( -d $path ); | 
| 1276 |  |  |  |  |  |  | } | 
| 1277 |  |  |  |  |  |  |  | 
| 1278 |  |  |  |  |  |  | sub RemoveSpecialCharactersFromString | 
| 1279 |  |  |  |  |  |  | { | 
| 1280 |  |  |  |  |  |  | my ( $self, $str ) = @_; | 
| 1281 |  |  |  |  |  |  | $str = lc( $str );                                      # Convert all characters to lowercase | 
| 1282 |  |  |  |  |  |  | $str =~ s/ +/ /g;                                       # Remove duplicate white spaces between words | 
| 1283 |  |  |  |  |  |  | $str =~ s/'s//g;                                        # Remove "'s" characters (Apostrophe 's') | 
| 1284 |  |  |  |  |  |  | $str =~ s/-/ /g;                                        # Replace all hyphen characters to spaces | 
| 1285 |  |  |  |  |  |  | $str =~ tr/a-z\015\012/ /cs;                            # Remove all characters except 'a' to 'z' and new-line characters | 
| 1286 |  |  |  |  |  |  | #$str =~ s/[\$#@~!&*()\[\];.,:?^\-'`\\\/]+//g;          # Does not include numeric characters | 
| 1287 |  |  |  |  |  |  |  | 
| 1288 |  |  |  |  |  |  | # Convert String Line Ending Suitable To The Target | 
| 1289 |  |  |  |  |  |  | my $lineEnding = ""; | 
| 1290 |  |  |  |  |  |  | my $os = $self->GetOSType(); | 
| 1291 |  |  |  |  |  |  |  | 
| 1292 |  |  |  |  |  |  | $lineEnding = "\015\012" if ( $os eq "MSWin32" ); | 
| 1293 |  |  |  |  |  |  | $lineEnding = "\012"     if ( $os eq "linux" ); | 
| 1294 |  |  |  |  |  |  | $lineEnding = "\015"     if ( $os eq "MacOS" ); | 
| 1295 |  |  |  |  |  |  |  | 
| 1296 |  |  |  |  |  |  | $str =~ s/(\015\012|\012|\015)/$lineEnding/g; | 
| 1297 |  |  |  |  |  |  |  | 
| 1298 |  |  |  |  |  |  | # Removes Spaces At Both Ends Of String And More Than Once Space In-Between Ends | 
| 1299 |  |  |  |  |  |  | $str =~ s/^\s+|\s(?=\s)|\s+$//g; | 
| 1300 |  |  |  |  |  |  |  | 
| 1301 |  |  |  |  |  |  | return $str; | 
| 1302 |  |  |  |  |  |  | } | 
| 1303 |  |  |  |  |  |  |  | 
| 1304 |  |  |  |  |  |  | sub GetFileType | 
| 1305 |  |  |  |  |  |  | { | 
| 1306 |  |  |  |  |  |  | my ( $self, $filePath ) = @_; | 
| 1307 |  |  |  |  |  |  |  | 
| 1308 |  |  |  |  |  |  | my $ft = File::Type->new(); | 
| 1309 |  |  |  |  |  |  | my $fileType = $ft->checktype_filename( $filePath ); | 
| 1310 |  |  |  |  |  |  | undef( $ft ); | 
| 1311 |  |  |  |  |  |  |  | 
| 1312 |  |  |  |  |  |  | return $fileType; | 
| 1313 |  |  |  |  |  |  | } | 
| 1314 |  |  |  |  |  |  |  | 
| 1315 |  |  |  |  |  |  | sub _DateCheck | 
| 1316 |  |  |  |  |  |  | { | 
| 1317 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1318 |  |  |  |  |  |  |  | 
| 1319 |  |  |  |  |  |  | my $beginDate = $self->GetBeginDate(); | 
| 1320 |  |  |  |  |  |  | my $endDate   = $self->GetEndDate(); | 
| 1321 |  |  |  |  |  |  |  | 
| 1322 |  |  |  |  |  |  | # Check(s) | 
| 1323 |  |  |  |  |  |  | $self->WriteLog( "_DateCheck - Error: Begin Date Node Defined" ) if !defined( $beginDate ); | 
| 1324 |  |  |  |  |  |  | return -1 if !defined( $beginDate ); | 
| 1325 |  |  |  |  |  |  |  | 
| 1326 |  |  |  |  |  |  | $self->Writelog( "_DateCheck - Error: End Date Not Defined" ) if !defined( $endDate ); | 
| 1327 |  |  |  |  |  |  | return -1 if !defined( $endDate ); | 
| 1328 |  |  |  |  |  |  |  | 
| 1329 |  |  |  |  |  |  | # Parse Begin Date | 
| 1330 |  |  |  |  |  |  | my $delimiter = ""; | 
| 1331 |  |  |  |  |  |  | $delimiter = "-" if index( $beginDate, "-" ) != -1; | 
| 1332 |  |  |  |  |  |  | $delimiter = "/" if index( $beginDate, "/" ) != -1; | 
| 1333 |  |  |  |  |  |  |  | 
| 1334 |  |  |  |  |  |  | $self->WriteLog( "_DateCheck - Error: Begin Date Improper Format" ) if ( $delimiter eq "" ); | 
| 1335 |  |  |  |  |  |  | return -1 if ( $delimiter eq "" ); | 
| 1336 |  |  |  |  |  |  |  | 
| 1337 |  |  |  |  |  |  | my @bDateAry = split( $delimiter, $beginDate ); | 
| 1338 |  |  |  |  |  |  |  | 
| 1339 |  |  |  |  |  |  | # Check For Default Begin Date And Adjust Accordingly | 
| 1340 |  |  |  |  |  |  | if( $bDateAry[0] == 0 && $bDateAry[1] == 0 && $bDateAry[2] == 0000 ) | 
| 1341 |  |  |  |  |  |  | { | 
| 1342 |  |  |  |  |  |  | $bDateAry[0] = 01; | 
| 1343 |  |  |  |  |  |  | $bDateAry[1] = 01; | 
| 1344 |  |  |  |  |  |  | $bDateAry[2] = 0000; | 
| 1345 |  |  |  |  |  |  | } | 
| 1346 |  |  |  |  |  |  |  | 
| 1347 |  |  |  |  |  |  | # Set Date In Proper Format | 
| 1348 |  |  |  |  |  |  | $beginDate = join( '/', @bDateAry ) if ( $delimiter eq "-" ); | 
| 1349 |  |  |  |  |  |  | $self->SetBeginDate( $beginDate ) if ( $delimiter eq "-" ); | 
| 1350 |  |  |  |  |  |  |  | 
| 1351 |  |  |  |  |  |  | # Parse End Date | 
| 1352 |  |  |  |  |  |  | $delimiter = ""; | 
| 1353 |  |  |  |  |  |  | $delimiter = "-" if index( $endDate, "-" ) != -1; | 
| 1354 |  |  |  |  |  |  | $delimiter = "/" if index( $endDate, "/" ) != -1; | 
| 1355 |  |  |  |  |  |  |  | 
| 1356 |  |  |  |  |  |  | $self->WriteLog( "_DateCheck - Error: End Date Improper Format" ) if ( $delimiter eq "" ); | 
| 1357 |  |  |  |  |  |  | return -1 if ( $delimiter eq "" ); | 
| 1358 |  |  |  |  |  |  |  | 
| 1359 |  |  |  |  |  |  | my @eDateAry = split( $delimiter, $endDate ); | 
| 1360 |  |  |  |  |  |  |  | 
| 1361 |  |  |  |  |  |  | # Check For Default End Date And Adjust Accordingly | 
| 1362 |  |  |  |  |  |  | if( $eDateAry[0] == 99 && $eDateAry[1] == 99 && $eDateAry[2] == 9999 ) | 
| 1363 |  |  |  |  |  |  | { | 
| 1364 |  |  |  |  |  |  | $eDateAry[0] = 12; | 
| 1365 |  |  |  |  |  |  | $eDateAry[1] = 31; | 
| 1366 |  |  |  |  |  |  | $eDateAry[2] = 9999; | 
| 1367 |  |  |  |  |  |  | } | 
| 1368 |  |  |  |  |  |  |  | 
| 1369 |  |  |  |  |  |  | # Set Date In Proper Format | 
| 1370 |  |  |  |  |  |  | $endDate = join( '/', @eDateAry ) if ( $delimiter eq "-" ); | 
| 1371 |  |  |  |  |  |  | $self->SetEndDate( $endDate ) if ( $delimiter eq "-" ); | 
| 1372 |  |  |  |  |  |  |  | 
| 1373 |  |  |  |  |  |  | # Basic Checks | 
| 1374 |  |  |  |  |  |  | $self->WriteLog( "_DateCheck - Error: Begin Date Not Specified In \"Month/Day/Year\" or \"Month-Day-Year\" Format" ) if ( @bDateAry != 3 ); | 
| 1375 |  |  |  |  |  |  | $self->WriteLog( "_DateCheck - Error: End Date Not Specified In \"Month/Day/Year\" or \"Month-Day-Year\" Format" ) if ( @eDateAry != 3 ); | 
| 1376 |  |  |  |  |  |  | return -1 if ( @bDateAry != 3 ) || ( @eDateAry != 3 ); | 
| 1377 |  |  |  |  |  |  |  | 
| 1378 |  |  |  |  |  |  | $self->WriteLog( "_DateCheck - Error: Incorrect Begin Date Month Value - Expected Value: 1-12 / Specified Value: " . $bDateAry[0] ) if ( $bDateAry[0] < 1 || $bDateAry[0] > 12 ); | 
| 1379 |  |  |  |  |  |  | $self->WriteLog( "_DateCheck - Error: Incorrect End Date Month Value - Expected Value: 1-12 / Specified Value: " . $eDateAry[0] ) if ( $eDateAry[0] < 1 || $eDateAry[0] > 12 ); | 
| 1380 |  |  |  |  |  |  | return -1 if ( $bDateAry[0] < 1 || $bDateAry[0] > 12 ) || ( $eDateAry[0] < 1 || $eDateAry[0] > 12 ); | 
| 1381 |  |  |  |  |  |  |  | 
| 1382 |  |  |  |  |  |  | $self->WriteLog( "_DateCheck - Error: Incorrect Begin Date Day Value - Expected Value: 1-31 / Specified Value: " . $bDateAry[1] ) if ( $bDateAry[1] < 1 || $bDateAry[1] > 31 ); | 
| 1383 |  |  |  |  |  |  | $self->WriteLog( "_DateCheck - Error: Incorrect End Date Day Value - Expected Value: 1-31 / Specified Value: " . $eDateAry[1] ) if ( $eDateAry[1] < 1 || $eDateAry[1] > 31 ); | 
| 1384 |  |  |  |  |  |  | return -1 if ( $bDateAry[1] < 1 || $bDateAry[1] > 31 ) || ( $eDateAry[1] < 1 || $eDateAry[1] > 31 ); | 
| 1385 |  |  |  |  |  |  |  | 
| 1386 |  |  |  |  |  |  | $self->WriteLog( "_DateCheck - Error: Incorrect Begin Date Year Value - Expected Value: 0-9999 / Specified Value: " . $bDateAry[2] ) if ( $bDateAry[2] < 0 || $bDateAry[2] > 9999 ); | 
| 1387 |  |  |  |  |  |  | $self->WriteLog( "_DateCheck - Error: Incorrect End Date Year Value - Expected Value: 0-9999 / Specified Value: " . $eDateAry[2] ) if ( $eDateAry[2] < 0 || $eDateAry[2] > 9999 ); | 
| 1388 |  |  |  |  |  |  | return -1 if ( $bDateAry[2] < 0 || $bDateAry[2] > 9999 ) || ( $eDateAry[2] < 0 || $eDateAry[2] > 9999 ); | 
| 1389 |  |  |  |  |  |  |  | 
| 1390 |  |  |  |  |  |  | # Advanced Checks | 
| 1391 |  |  |  |  |  |  | $self->WriteLog( "_DateCheck - Error: Begin Date Year > End Date Year" ) if ( $bDateAry[2] > $eDateAry[2] ); | 
| 1392 |  |  |  |  |  |  | return -1 if ( $bDateAry[2] > $eDateAry[2] ); | 
| 1393 |  |  |  |  |  |  |  | 
| 1394 |  |  |  |  |  |  | $self->WriteLog( "_DateCheck - Error: Years Equal, Begin Date Month > End Date Month" ) if ( $bDateAry[2] == $eDateAry[2] && $bDateAry[0] > $eDateAry[0] ); | 
| 1395 |  |  |  |  |  |  | return -1 if ( $bDateAry[2] == $eDateAry[2] && $bDateAry[0] > $eDateAry[0] ); | 
| 1396 |  |  |  |  |  |  |  | 
| 1397 |  |  |  |  |  |  | $self->WriteLog( "_DateCheck - Error: Years And Months Equal, Begin Date Day > End Date Day" ) if ( $bDateAry[2] == $eDateAry[2] && $bDateAry[0] == $eDateAry[0] && $bDateAry[1] > $eDateAry[1] ); | 
| 1398 |  |  |  |  |  |  | return -1 if ( $bDateAry[2] == $eDateAry[2] && $bDateAry[0] == $eDateAry[0] && $bDateAry[1] > $eDateAry[1] ); | 
| 1399 |  |  |  |  |  |  |  | 
| 1400 |  |  |  |  |  |  | # Clean Up | 
| 1401 |  |  |  |  |  |  | $beginDate = ""; | 
| 1402 |  |  |  |  |  |  | $endDate   = ""; | 
| 1403 |  |  |  |  |  |  | $delimiter = ""; | 
| 1404 |  |  |  |  |  |  | @bDateAry = (); | 
| 1405 |  |  |  |  |  |  | @eDateAry = (); | 
| 1406 |  |  |  |  |  |  |  | 
| 1407 |  |  |  |  |  |  | return 0; | 
| 1408 |  |  |  |  |  |  | } | 
| 1409 |  |  |  |  |  |  |  | 
| 1410 |  |  |  |  |  |  | sub GetOSType | 
| 1411 |  |  |  |  |  |  | { | 
| 1412 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1413 |  |  |  |  |  |  | return $^O; | 
| 1414 |  |  |  |  |  |  | } | 
| 1415 |  |  |  |  |  |  |  | 
| 1416 |  |  |  |  |  |  |  | 
| 1417 |  |  |  |  |  |  | ###################################################################################### | 
| 1418 |  |  |  |  |  |  | #    Accessors | 
| 1419 |  |  |  |  |  |  | ###################################################################################### | 
| 1420 |  |  |  |  |  |  |  | 
| 1421 |  |  |  |  |  |  | sub GetDebugLog | 
| 1422 |  |  |  |  |  |  | { | 
| 1423 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1424 |  |  |  |  |  |  | $self->{ _debugLog } = 0 if !defined ( $self->{ _debugLog } ); | 
| 1425 |  |  |  |  |  |  | return $self->{ _debugLog }; | 
| 1426 |  |  |  |  |  |  | } | 
| 1427 |  |  |  |  |  |  |  | 
| 1428 |  |  |  |  |  |  | sub GetWriteLog | 
| 1429 |  |  |  |  |  |  | { | 
| 1430 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1431 |  |  |  |  |  |  | $self->{ _writeLog } = 0 if !defined ( $self->{ _writeLog } ); | 
| 1432 |  |  |  |  |  |  | return $self->{ _writeLog }; | 
| 1433 |  |  |  |  |  |  | } | 
| 1434 |  |  |  |  |  |  |  | 
| 1435 |  |  |  |  |  |  | sub GetStoreTitle | 
| 1436 |  |  |  |  |  |  | { | 
| 1437 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1438 |  |  |  |  |  |  | $self->{ _storeTitle } = 1 if !defined ( $self->{ _storeTitle } ); | 
| 1439 |  |  |  |  |  |  | return $self->{ _storeTitle }; | 
| 1440 |  |  |  |  |  |  | } | 
| 1441 |  |  |  |  |  |  |  | 
| 1442 |  |  |  |  |  |  | sub GetStoreAbstract | 
| 1443 |  |  |  |  |  |  | { | 
| 1444 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1445 |  |  |  |  |  |  | $self->{ _storeAbstract } = 1 if !defined ( $self->{ _storeAbstract } ); | 
| 1446 |  |  |  |  |  |  | return $self->{ _storeAbstract }; | 
| 1447 |  |  |  |  |  |  | } | 
| 1448 |  |  |  |  |  |  |  | 
| 1449 |  |  |  |  |  |  | sub GetQuickParse | 
| 1450 |  |  |  |  |  |  | { | 
| 1451 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1452 |  |  |  |  |  |  | $self->{ _quickParse } = 0 if !defined ( $self->{ _quickParse } ); | 
| 1453 |  |  |  |  |  |  | return $self->{ _quickParse }; | 
| 1454 |  |  |  |  |  |  | } | 
| 1455 |  |  |  |  |  |  |  | 
| 1456 |  |  |  |  |  |  | sub GetCompoundifyText | 
| 1457 |  |  |  |  |  |  | { | 
| 1458 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1459 |  |  |  |  |  |  | $self->{ _compoundifyText } = 0 if !defined ( $self->{ _compoundifyText } ); | 
| 1460 |  |  |  |  |  |  | return $self->{ _compoundifyText }; | 
| 1461 |  |  |  |  |  |  | } | 
| 1462 |  |  |  |  |  |  |  | 
| 1463 |  |  |  |  |  |  | sub GetNumOfThreads | 
| 1464 |  |  |  |  |  |  | { | 
| 1465 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1466 |  |  |  |  |  |  | $self->{ _numOfThreads } = Sys::CpuAffinity::getNumCpus() if !defined ( $self->{ _numOfThreads } ); | 
| 1467 |  |  |  |  |  |  | return $self->{ _numOfThreads }; | 
| 1468 |  |  |  |  |  |  | } | 
| 1469 |  |  |  |  |  |  |  | 
| 1470 |  |  |  |  |  |  | sub GetWorkingDir | 
| 1471 |  |  |  |  |  |  | { | 
| 1472 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1473 |  |  |  |  |  |  | $self->{ _workingDir } = Cwd::getcwd() if !defined $self->{ _workingDir }; | 
| 1474 |  |  |  |  |  |  | return $self->{ _workingDir }; | 
| 1475 |  |  |  |  |  |  | } | 
| 1476 |  |  |  |  |  |  |  | 
| 1477 |  |  |  |  |  |  | sub GetSavePath | 
| 1478 |  |  |  |  |  |  | { | 
| 1479 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1480 |  |  |  |  |  |  | $self->{ _savePath } = "(null)" if !defined $self->{ _savePath }; | 
| 1481 |  |  |  |  |  |  | return $self->{ _savePath }; | 
| 1482 |  |  |  |  |  |  | } | 
| 1483 |  |  |  |  |  |  |  | 
| 1484 |  |  |  |  |  |  | sub GetBeginDate | 
| 1485 |  |  |  |  |  |  | { | 
| 1486 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1487 |  |  |  |  |  |  | $self->{ _beginDate } = "00/00/0000" if !defined ( $self->{ _beginDate } ); | 
| 1488 |  |  |  |  |  |  | return $self->{ _beginDate }; | 
| 1489 |  |  |  |  |  |  | } | 
| 1490 |  |  |  |  |  |  |  | 
| 1491 |  |  |  |  |  |  | sub GetEndDate | 
| 1492 |  |  |  |  |  |  | { | 
| 1493 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1494 |  |  |  |  |  |  | $self->{ _endDate } = "99/99/9999" if !defined ( $self->{ _endDate } ); | 
| 1495 |  |  |  |  |  |  | return $self->{ _endDate }; | 
| 1496 |  |  |  |  |  |  | } | 
| 1497 |  |  |  |  |  |  |  | 
| 1498 |  |  |  |  |  |  | sub GetXMLStringToParse | 
| 1499 |  |  |  |  |  |  | { | 
| 1500 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1501 |  |  |  |  |  |  | $self->{ _xmlStringToParse } = "(null)" if !defined ( $self->{ _xmlStringToParse } ); | 
| 1502 |  |  |  |  |  |  | return $self->{ _xmlStringToParse }; | 
| 1503 |  |  |  |  |  |  | } | 
| 1504 |  |  |  |  |  |  |  | 
| 1505 |  |  |  |  |  |  | sub GetTextCorpusStr | 
| 1506 |  |  |  |  |  |  | { | 
| 1507 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1508 |  |  |  |  |  |  | $self->{ _textCorpusStr } = "" if !defined ( $self->{_textCorpusStr } ); | 
| 1509 |  |  |  |  |  |  | return $self->{ _textCorpusStr }; | 
| 1510 |  |  |  |  |  |  | } | 
| 1511 |  |  |  |  |  |  |  | 
| 1512 |  |  |  |  |  |  | sub GetFileHandle | 
| 1513 |  |  |  |  |  |  | { | 
| 1514 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1515 |  |  |  |  |  |  | $self->{ _fileHandle } = undef if !defined ( $self->{ _fileHandle } ); | 
| 1516 |  |  |  |  |  |  | return $self->{ _fileHandle }; | 
| 1517 |  |  |  |  |  |  | } | 
| 1518 |  |  |  |  |  |  |  | 
| 1519 |  |  |  |  |  |  | sub GetTwigHandler | 
| 1520 |  |  |  |  |  |  | { | 
| 1521 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1522 |  |  |  |  |  |  | $self->{ _twigHandler } = "(null)" if !defined ( $self->{ _twigHandler } ); | 
| 1523 |  |  |  |  |  |  | return $self->{ _twigHandler }; | 
| 1524 |  |  |  |  |  |  | } | 
| 1525 |  |  |  |  |  |  |  | 
| 1526 |  |  |  |  |  |  | sub GetParsedCount | 
| 1527 |  |  |  |  |  |  | { | 
| 1528 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1529 |  |  |  |  |  |  | $self->{ _parsedCount } = 0 if !defined ( $self->{ _parsedCount } ); | 
| 1530 |  |  |  |  |  |  | return $self->{ _parsedCount }; | 
| 1531 |  |  |  |  |  |  | } | 
| 1532 |  |  |  |  |  |  |  | 
| 1533 |  |  |  |  |  |  | sub GetTempStr | 
| 1534 |  |  |  |  |  |  | { | 
| 1535 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1536 |  |  |  |  |  |  | $self->{ _tempStr } = "" if !defined ( $self->{ _tempStr } ); | 
| 1537 |  |  |  |  |  |  | return $self->{ _tempStr }; | 
| 1538 |  |  |  |  |  |  | } | 
| 1539 |  |  |  |  |  |  |  | 
| 1540 |  |  |  |  |  |  | sub GetTempDate | 
| 1541 |  |  |  |  |  |  | { | 
| 1542 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1543 |  |  |  |  |  |  | $self->{ _tempDate } = "" if !defined ( $self->{ _tempDate } ); | 
| 1544 |  |  |  |  |  |  | return $self->{ _tempDate }; | 
| 1545 |  |  |  |  |  |  | } | 
| 1546 |  |  |  |  |  |  |  | 
| 1547 |  |  |  |  |  |  | sub GetCompoundWordAry | 
| 1548 |  |  |  |  |  |  | { | 
| 1549 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1550 |  |  |  |  |  |  | $self->{ _compoundWordAry } = () if !defined ( $self->{ _compoundWordAry } ); | 
| 1551 |  |  |  |  |  |  | return @{ $self->{ _compoundWordAry } }; | 
| 1552 |  |  |  |  |  |  | } | 
| 1553 |  |  |  |  |  |  |  | 
| 1554 |  |  |  |  |  |  | sub GetCompoundWordBST | 
| 1555 |  |  |  |  |  |  | { | 
| 1556 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1557 |  |  |  |  |  |  | $self->{ _compoundWordBST } = Word2vec::Bst->new() if !defined ( $self->{ _compoundWordBST } ); | 
| 1558 |  |  |  |  |  |  | return $self->{ _compoundWordBST }; | 
| 1559 |  |  |  |  |  |  | } | 
| 1560 |  |  |  |  |  |  |  | 
| 1561 |  |  |  |  |  |  | sub GetMaxCompoundWordLength | 
| 1562 |  |  |  |  |  |  | { | 
| 1563 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1564 |  |  |  |  |  |  | $self->{ _maxCompoundWordLength } = 20 if !defined ( $self->{ _maxCompoundWordLength } ); | 
| 1565 |  |  |  |  |  |  | return $self->{ _maxCompoundWordLength }; | 
| 1566 |  |  |  |  |  |  | } | 
| 1567 |  |  |  |  |  |  |  | 
| 1568 |  |  |  |  |  |  | sub GetOverwriteExistingFile | 
| 1569 |  |  |  |  |  |  | { | 
| 1570 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1571 |  |  |  |  |  |  | $self->{ _overwriteExistingFile } = 0 if !defined ( $self->{ _overwriteExistingFile } ); | 
| 1572 |  |  |  |  |  |  | return $self->{ _overwriteExistingFile }; | 
| 1573 |  |  |  |  |  |  | } | 
| 1574 |  |  |  |  |  |  |  | 
| 1575 |  |  |  |  |  |  |  | 
| 1576 |  |  |  |  |  |  | ###################################################################################### | 
| 1577 |  |  |  |  |  |  | #    Mutators | 
| 1578 |  |  |  |  |  |  | ###################################################################################### | 
| 1579 |  |  |  |  |  |  |  | 
| 1580 |  |  |  |  |  |  | sub SetStoreTitle | 
| 1581 |  |  |  |  |  |  | { | 
| 1582 |  |  |  |  |  |  | my ( $self, $value ) = @_; | 
| 1583 |  |  |  |  |  |  | return $self->{ _storeTitle } = $value; | 
| 1584 |  |  |  |  |  |  | } | 
| 1585 |  |  |  |  |  |  |  | 
| 1586 |  |  |  |  |  |  | sub SetStoreAbstract | 
| 1587 |  |  |  |  |  |  | { | 
| 1588 |  |  |  |  |  |  | my ( $self, $value ) = @_; | 
| 1589 |  |  |  |  |  |  | return $self->{ _storeAbstract } = $value; | 
| 1590 |  |  |  |  |  |  | } | 
| 1591 |  |  |  |  |  |  |  | 
| 1592 |  |  |  |  |  |  | sub SetWorkingDir | 
| 1593 |  |  |  |  |  |  | { | 
| 1594 |  |  |  |  |  |  | my ( $self, $dir ) = @_; | 
| 1595 |  |  |  |  |  |  | return $self->{ _workingDir } = $dir; | 
| 1596 |  |  |  |  |  |  | } | 
| 1597 |  |  |  |  |  |  |  | 
| 1598 |  |  |  |  |  |  | sub SetSavePath | 
| 1599 |  |  |  |  |  |  | { | 
| 1600 |  |  |  |  |  |  | my ( $self, $dir ) = @_; | 
| 1601 |  |  |  |  |  |  | return $self->{ _savePath } = $dir; | 
| 1602 |  |  |  |  |  |  | } | 
| 1603 |  |  |  |  |  |  |  | 
| 1604 |  |  |  |  |  |  | sub SetQuickParse | 
| 1605 |  |  |  |  |  |  | { | 
| 1606 |  |  |  |  |  |  | my ( $self, $value ) = @_; | 
| 1607 |  |  |  |  |  |  | return $self->{ _quickParse } = $value; | 
| 1608 |  |  |  |  |  |  | } | 
| 1609 |  |  |  |  |  |  |  | 
| 1610 |  |  |  |  |  |  | sub SetCompoundifyText | 
| 1611 |  |  |  |  |  |  | { | 
| 1612 |  |  |  |  |  |  | my ( $self, $value ) = @_; | 
| 1613 |  |  |  |  |  |  | return $self->{ _compoundifyText } = $value; | 
| 1614 |  |  |  |  |  |  | } | 
| 1615 |  |  |  |  |  |  |  | 
| 1616 |  |  |  |  |  |  | sub SetNumOfThreads | 
| 1617 |  |  |  |  |  |  | { | 
| 1618 |  |  |  |  |  |  | my ( $self, $value ) = @_; | 
| 1619 |  |  |  |  |  |  |  | 
| 1620 |  |  |  |  |  |  | # Check | 
| 1621 |  |  |  |  |  |  | $self->WriteLog( "SetNumOfThreads - Warning: Number Of Threads Value < 0 / Setting Default Value" ) if ( $value < 0 ); | 
| 1622 |  |  |  |  |  |  | $value = Sys::CpuAffinity::getNumCpus() if ( $value < 0 ); | 
| 1623 |  |  |  |  |  |  |  | 
| 1624 |  |  |  |  |  |  | return $self->{ _numOfThreads } = $value; | 
| 1625 |  |  |  |  |  |  | } | 
| 1626 |  |  |  |  |  |  |  | 
| 1627 |  |  |  |  |  |  | sub SetBeginDate | 
| 1628 |  |  |  |  |  |  | { | 
| 1629 |  |  |  |  |  |  | my ( $self, $str ) = @_; | 
| 1630 |  |  |  |  |  |  | return $self->{ _beginDate } = $str; | 
| 1631 |  |  |  |  |  |  | } | 
| 1632 |  |  |  |  |  |  |  | 
| 1633 |  |  |  |  |  |  | sub SetEndDate | 
| 1634 |  |  |  |  |  |  | { | 
| 1635 |  |  |  |  |  |  | my ( $self, $str ) = @_; | 
| 1636 |  |  |  |  |  |  | return $self->{ _endDate } = $str; | 
| 1637 |  |  |  |  |  |  | } | 
| 1638 |  |  |  |  |  |  |  | 
| 1639 |  |  |  |  |  |  | sub SetXMLStringToParse | 
| 1640 |  |  |  |  |  |  | { | 
| 1641 |  |  |  |  |  |  | my ( $self, $str ) = @_; | 
| 1642 |  |  |  |  |  |  | return $self->{ _xmlStringToParse } = $str; | 
| 1643 |  |  |  |  |  |  | } | 
| 1644 |  |  |  |  |  |  |  | 
| 1645 |  |  |  |  |  |  | sub SetTextCorpusStr | 
| 1646 |  |  |  |  |  |  | { | 
| 1647 |  |  |  |  |  |  | my ( $self, $str ) = @_; | 
| 1648 |  |  |  |  |  |  | return $self->{ _textCorpusStr } = $str; | 
| 1649 |  |  |  |  |  |  | } | 
| 1650 |  |  |  |  |  |  |  | 
| 1651 |  |  |  |  |  |  | sub AppendStrToTextCorpus | 
| 1652 |  |  |  |  |  |  | { | 
| 1653 |  |  |  |  |  |  | my ( $self, $str ) = @_; | 
| 1654 |  |  |  |  |  |  |  | 
| 1655 |  |  |  |  |  |  | return if ( $str eq "" || !defined( $str ) ); | 
| 1656 |  |  |  |  |  |  |  | 
| 1657 |  |  |  |  |  |  | # Prevent Other Threads From Appending Data At The Same Time | 
| 1658 |  |  |  |  |  |  | { | 
| 1659 |  |  |  |  |  |  | lock( $appendLock ); | 
| 1660 |  |  |  |  |  |  |  | 
| 1661 |  |  |  |  |  |  | # Removes Spaces At Both Ends Of String And More Than Once Space In-Between Ends | 
| 1662 |  |  |  |  |  |  | $str =~ s/^\s+|\s(?=\s)|\s+$//g; | 
| 1663 |  |  |  |  |  |  |  | 
| 1664 |  |  |  |  |  |  | # Append string to text corpus | 
| 1665 |  |  |  |  |  |  | $self->{ _textCorpusStr } .= "$str "; | 
| 1666 |  |  |  |  |  |  | } | 
| 1667 |  |  |  |  |  |  | } | 
| 1668 |  |  |  |  |  |  |  | 
| 1669 |  |  |  |  |  |  | sub ClearTextCorpusStr | 
| 1670 |  |  |  |  |  |  | { | 
| 1671 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1672 |  |  |  |  |  |  | return $self->{ _textCorpusStr } = ""; | 
| 1673 |  |  |  |  |  |  | } | 
| 1674 |  |  |  |  |  |  |  | 
| 1675 |  |  |  |  |  |  | sub SetTempStr | 
| 1676 |  |  |  |  |  |  | { | 
| 1677 |  |  |  |  |  |  | my ( $self, $str ) = @_; | 
| 1678 |  |  |  |  |  |  |  | 
| 1679 |  |  |  |  |  |  | # Convert String To UTF8 Format Encoding (Removes Special Characters / Fixes Wide Character Bug) | 
| 1680 |  |  |  |  |  |  | $str = $self->RemoveSpecialCharactersFromString( $str ); | 
| 1681 |  |  |  |  |  |  | $str = Text::Unidecode::unidecode( $str ); | 
| 1682 |  |  |  |  |  |  |  | 
| 1683 |  |  |  |  |  |  | return $self->{ _tempStr } = $str; | 
| 1684 |  |  |  |  |  |  | } | 
| 1685 |  |  |  |  |  |  |  | 
| 1686 |  |  |  |  |  |  | sub AppendToTempStr | 
| 1687 |  |  |  |  |  |  | { | 
| 1688 |  |  |  |  |  |  | my ( $self, $str ) = @_; | 
| 1689 |  |  |  |  |  |  |  | 
| 1690 |  |  |  |  |  |  | # Convert String To UTF8 Format Encoding (Removes Special Characters / Fixes Wide Character Bug) | 
| 1691 |  |  |  |  |  |  | $str = $self->RemoveSpecialCharactersFromString( $str ); | 
| 1692 |  |  |  |  |  |  | $str = Text::Unidecode::unidecode( $str ); | 
| 1693 |  |  |  |  |  |  |  | 
| 1694 |  |  |  |  |  |  | # Removes Spaces At Both Ends Of String And More Than Once Space In-Between Ends | 
| 1695 |  |  |  |  |  |  | $str =~ s/^\s+|\s(?=\s)|\s+$//g; | 
| 1696 |  |  |  |  |  |  |  | 
| 1697 |  |  |  |  |  |  | # Increment Word Counter | 
| 1698 |  |  |  |  |  |  | my @words = split( ' ', $str ); | 
| 1699 |  |  |  |  |  |  | $preCompWordCount += scalar( @words ); | 
| 1700 |  |  |  |  |  |  | undef( @words ); | 
| 1701 |  |  |  |  |  |  |  | 
| 1702 |  |  |  |  |  |  | # Append string to text corpus | 
| 1703 |  |  |  |  |  |  | return $self->{ _tempStr } .= "$str "; | 
| 1704 |  |  |  |  |  |  | } | 
| 1705 |  |  |  |  |  |  |  | 
| 1706 |  |  |  |  |  |  | sub ClearTempStr | 
| 1707 |  |  |  |  |  |  | { | 
| 1708 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1709 |  |  |  |  |  |  | return $self->{ _tempStr } = ""; | 
| 1710 |  |  |  |  |  |  | } | 
| 1711 |  |  |  |  |  |  |  | 
| 1712 |  |  |  |  |  |  | sub SetTempDate | 
| 1713 |  |  |  |  |  |  | { | 
| 1714 |  |  |  |  |  |  | my ( $self, $str ) = @_; | 
| 1715 |  |  |  |  |  |  | return $self->{ _tempDate } = $str; | 
| 1716 |  |  |  |  |  |  | } | 
| 1717 |  |  |  |  |  |  |  | 
| 1718 |  |  |  |  |  |  | sub ClearTempDate | 
| 1719 |  |  |  |  |  |  | { | 
| 1720 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1721 |  |  |  |  |  |  | return $self->{ _tempDate } = ""; | 
| 1722 |  |  |  |  |  |  | } | 
| 1723 |  |  |  |  |  |  |  | 
| 1724 |  |  |  |  |  |  | sub SetCompoundWordAry | 
| 1725 |  |  |  |  |  |  | { | 
| 1726 |  |  |  |  |  |  | my ( $self, $aryRef ) = @_; | 
| 1727 |  |  |  |  |  |  | $self->WriteLog( "Warning: Setting CompoundWordArray when array is already defined - Clearing Previous Array" ) if ( @{ $self->{ _compoundWordAry } } > 0 ); | 
| 1728 |  |  |  |  |  |  | undef( $self->{ _compoundWordAry } ) if ( @{ $self->{ _compoundWordAry } } > 0 ); | 
| 1729 |  |  |  |  |  |  | return @{ $self->{ _compoundWordAry } } = @{ $aryRef }; | 
| 1730 |  |  |  |  |  |  | } | 
| 1731 |  |  |  |  |  |  |  | 
| 1732 |  |  |  |  |  |  | sub ClearCompoundWordAry | 
| 1733 |  |  |  |  |  |  | { | 
| 1734 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1735 |  |  |  |  |  |  | undef( $self->{ _compoundWordAry } ); | 
| 1736 |  |  |  |  |  |  | return @{ $self->{ _compoundWordAry } } = (); | 
| 1737 |  |  |  |  |  |  | } | 
| 1738 |  |  |  |  |  |  |  | 
| 1739 |  |  |  |  |  |  | sub SetCompoundWordBST | 
| 1740 |  |  |  |  |  |  | { | 
| 1741 |  |  |  |  |  |  | my ( $self, $bst ) = @_; | 
| 1742 |  |  |  |  |  |  | $self->WriteLog( "Warning: Setting CompoundWordBST when BST is already defined - Clearing Previous BST" ) if defined ( $self->{ _compoundWordBST } ); | 
| 1743 |  |  |  |  |  |  | $self->{ _compoundWordBST }->DESTROY() if defined( $self->{ _compoundWordBST } ); | 
| 1744 |  |  |  |  |  |  | undef( $self->{ _compoundWordBST } ) if defined ( $self->{ _compoundWordBST } ); | 
| 1745 |  |  |  |  |  |  | return $self->{ _compoundWordBST } = $bst; | 
| 1746 |  |  |  |  |  |  | } | 
| 1747 |  |  |  |  |  |  |  | 
| 1748 |  |  |  |  |  |  | sub ClearCompoundWordBST | 
| 1749 |  |  |  |  |  |  | { | 
| 1750 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1751 |  |  |  |  |  |  | undef( $self->{ _compoundWordBST } ); | 
| 1752 |  |  |  |  |  |  | return $self->{ _compoundWordBST }; | 
| 1753 |  |  |  |  |  |  | } | 
| 1754 |  |  |  |  |  |  |  | 
| 1755 |  |  |  |  |  |  | sub SetMaxCompoundWordLength | 
| 1756 |  |  |  |  |  |  | { | 
| 1757 |  |  |  |  |  |  | my ( $self, $value ) = @_; | 
| 1758 |  |  |  |  |  |  | return $self->{ _maxCompoundWordLength } = $value; | 
| 1759 |  |  |  |  |  |  | } | 
| 1760 |  |  |  |  |  |  |  | 
| 1761 |  |  |  |  |  |  | sub SetOverwriteExistingFile | 
| 1762 |  |  |  |  |  |  | { | 
| 1763 |  |  |  |  |  |  | my ( $self, $value ) = @_; | 
| 1764 |  |  |  |  |  |  | return $self->{ _overwriteExistingFile } = $value; | 
| 1765 |  |  |  |  |  |  | } | 
| 1766 |  |  |  |  |  |  |  | 
| 1767 |  |  |  |  |  |  |  | 
| 1768 |  |  |  |  |  |  | ###################################################################################### | 
| 1769 |  |  |  |  |  |  | #    Debug Functions | 
| 1770 |  |  |  |  |  |  | ###################################################################################### | 
| 1771 |  |  |  |  |  |  |  | 
| 1772 |  |  |  |  |  |  | sub GetTime | 
| 1773 |  |  |  |  |  |  | { | 
| 1774 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1775 |  |  |  |  |  |  | my( $sec, $min, $hour ) = localtime(); | 
| 1776 |  |  |  |  |  |  |  | 
| 1777 |  |  |  |  |  |  | $hour = "0$hour" if( $hour < 10 ); | 
| 1778 |  |  |  |  |  |  | $min = "0$min"   if( $min < 10 ); | 
| 1779 |  |  |  |  |  |  | $sec = "0$sec"   if( $sec < 10 ); | 
| 1780 |  |  |  |  |  |  |  | 
| 1781 |  |  |  |  |  |  | return "$hour:$min:$sec"; | 
| 1782 |  |  |  |  |  |  | } | 
| 1783 |  |  |  |  |  |  |  | 
| 1784 |  |  |  |  |  |  | sub GetDate | 
| 1785 |  |  |  |  |  |  | { | 
| 1786 |  |  |  |  |  |  | my ( $self ) = @_; | 
| 1787 |  |  |  |  |  |  | my ( $sec, $min, $hour, $mday, $mon, $year ) = localtime(); | 
| 1788 |  |  |  |  |  |  |  | 
| 1789 |  |  |  |  |  |  | $mon += 1; | 
| 1790 |  |  |  |  |  |  | $year += 1900; | 
| 1791 |  |  |  |  |  |  |  | 
| 1792 |  |  |  |  |  |  | return "$mon/$mday/$year"; | 
| 1793 |  |  |  |  |  |  | } | 
| 1794 |  |  |  |  |  |  |  | 
| 1795 |  |  |  |  |  |  | sub WriteLog | 
| 1796 |  |  |  |  |  |  | { | 
| 1797 |  |  |  |  |  |  | my ( $self ) = shift; | 
| 1798 |  |  |  |  |  |  | my $string = shift; | 
| 1799 |  |  |  |  |  |  | my $printNewLine = shift; | 
| 1800 |  |  |  |  |  |  |  | 
| 1801 |  |  |  |  |  |  | return if !defined ( $string ); | 
| 1802 |  |  |  |  |  |  | $printNewLine = 1 if !defined ( $printNewLine ); | 
| 1803 |  |  |  |  |  |  |  | 
| 1804 |  |  |  |  |  |  | # Prevent Other Threads From Writing At The Same Time | 
| 1805 |  |  |  |  |  |  | lock( $debugLock ); | 
| 1806 |  |  |  |  |  |  |  | 
| 1807 |  |  |  |  |  |  | if( $self->GetDebugLog() ) | 
| 1808 |  |  |  |  |  |  | { | 
| 1809 |  |  |  |  |  |  | if( ref ( $self ) ne "Word2vec::Xmltow2v" ) | 
| 1810 |  |  |  |  |  |  | { | 
| 1811 |  |  |  |  |  |  | print( GetDate() . " " . GetTime() . " - xmltow2v: Cannot Call WriteLog() From Outside Module!\n" ); | 
| 1812 |  |  |  |  |  |  | return; | 
| 1813 |  |  |  |  |  |  | } | 
| 1814 |  |  |  |  |  |  |  | 
| 1815 |  |  |  |  |  |  | $string = "" if !defined ( $string ); | 
| 1816 |  |  |  |  |  |  | print GetDate() . " " . GetTime() . " - xmltow2v::$string"; | 
| 1817 |  |  |  |  |  |  | print "\n" if( $printNewLine != 0 ); | 
| 1818 |  |  |  |  |  |  | } | 
| 1819 |  |  |  |  |  |  |  | 
| 1820 |  |  |  |  |  |  | if( $self->GetWriteLog() ) | 
| 1821 |  |  |  |  |  |  | { | 
| 1822 |  |  |  |  |  |  | if( ref ( $self ) ne "Word2vec::Xmltow2v" ) | 
| 1823 |  |  |  |  |  |  | { | 
| 1824 |  |  |  |  |  |  | print( GetDate() . " " . GetTime() . " - xmltow2v: Cannot Call WriteLog() From Outside Module!\n" ); | 
| 1825 |  |  |  |  |  |  | return; | 
| 1826 |  |  |  |  |  |  | } | 
| 1827 |  |  |  |  |  |  |  | 
| 1828 |  |  |  |  |  |  | my $fileHandle = $self->GetFileHandle(); | 
| 1829 |  |  |  |  |  |  |  | 
| 1830 |  |  |  |  |  |  | if( defined( $fileHandle ) ) | 
| 1831 |  |  |  |  |  |  | { | 
| 1832 |  |  |  |  |  |  | print( $fileHandle GetDate() . " " . GetTime() . " - xmltow2v::$string" ); | 
| 1833 |  |  |  |  |  |  | print( $fileHandle "\n" ) if( $printNewLine != 0 ); | 
| 1834 |  |  |  |  |  |  | } | 
| 1835 |  |  |  |  |  |  | } | 
| 1836 |  |  |  |  |  |  | } | 
| 1837 |  |  |  |  |  |  |  | 
| 1838 |  |  |  |  |  |  | #################### All Modules Are To Output "1"(True) at EOF ###################### | 
| 1839 |  |  |  |  |  |  | 1; | 
| 1840 |  |  |  |  |  |  |  | 
| 1841 |  |  |  |  |  |  |  | 
| 1842 |  |  |  |  |  |  | =head1 NAME | 
| 1843 |  |  |  |  |  |  |  | 
| 1844 |  |  |  |  |  |  | Word2vec::Xmltow2v - Medline XML-To-W2V Module. | 
| 1845 |  |  |  |  |  |  |  | 
| 1846 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 1847 |  |  |  |  |  |  |  | 
| 1848 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 1849 |  |  |  |  |  |  |  | 
| 1850 |  |  |  |  |  |  | # Parameters: Debug Output = True, Write Log = False, StoreTitle = True, StoreAbstract = True, Quick Parse = True, CompoundifyText = True, Use Multi-Threading (Default = 1 Thread Per CPU Core) | 
| 1851 |  |  |  |  |  |  | my $xmlconv = new xmltow2v( 1, 0, 1, 1, 1, 1, 2 );      # Note: Specifying no parameters implies default settings. | 
| 1852 |  |  |  |  |  |  | $xmlconv->SetWorkingDir( "Medline/XML/Directory/Here" ); | 
| 1853 |  |  |  |  |  |  | $xmlconv->SetSavePath( "textcorpus.txt" ); | 
| 1854 |  |  |  |  |  |  | $xmlconv->SetStoreTitle( 1 ); | 
| 1855 |  |  |  |  |  |  | $xmlconv->SetStoreAbstract( 1 ); | 
| 1856 |  |  |  |  |  |  | $xmlconv->SetBeginDate( "01/01/2004" ); | 
| 1857 |  |  |  |  |  |  | $xmlconv->SetEndDate( "08/13/2016" ); | 
| 1858 |  |  |  |  |  |  | $xmlconv->SetOverwriteExistingFile( 1 ); | 
| 1859 |  |  |  |  |  |  |  | 
| 1860 |  |  |  |  |  |  | # If Compound Word File Exists, Store It In Memory And Create Compound Word Binary Search Tree | 
| 1861 |  |  |  |  |  |  | $xmlconv->ReadCompoundWordDataFromFile( "compoundword.txt", 1 ); | 
| 1862 |  |  |  |  |  |  | $xmlconv->CreateCompoundWordBST(); | 
| 1863 |  |  |  |  |  |  |  | 
| 1864 |  |  |  |  |  |  | # Parse XML Files or Directory Of Files | 
| 1865 |  |  |  |  |  |  | $xmlconv->ConvertMedlineXMLToW2V( "/xmlDirectory/" ); | 
| 1866 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 1867 |  |  |  |  |  |  |  | 
| 1868 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 1869 |  |  |  |  |  |  |  | 
| 1870 |  |  |  |  |  |  | Word2vec::Xmltow2v is a XML-to-text module which converts Medline XML article title | 
| 1871 |  |  |  |  |  |  | and abstract data, given a date range, into a plain text corpus for use | 
| 1872 |  |  |  |  |  |  | with Word2vec::Interface. It also "compoundifies" during text corpus compilation | 
| 1873 |  |  |  |  |  |  | given a compound word file. | 
| 1874 |  |  |  |  |  |  |  | 
| 1875 |  |  |  |  |  |  | =head2 Main Functions | 
| 1876 |  |  |  |  |  |  |  | 
| 1877 |  |  |  |  |  |  | =head3 new | 
| 1878 |  |  |  |  |  |  |  | 
| 1879 |  |  |  |  |  |  | Description: | 
| 1880 |  |  |  |  |  |  |  | 
| 1881 |  |  |  |  |  |  | Returns a new 'Word2vec::Xmltow2v' module object. | 
| 1882 |  |  |  |  |  |  |  | 
| 1883 |  |  |  |  |  |  | Note: Specifying no parameters implies default options. | 
| 1884 |  |  |  |  |  |  |  | 
| 1885 |  |  |  |  |  |  | Default Parameters: | 
| 1886 |  |  |  |  |  |  | debugLog                    = 0 | 
| 1887 |  |  |  |  |  |  | writeLog                    = 0 | 
| 1888 |  |  |  |  |  |  | storeTitle                  = 1 | 
| 1889 |  |  |  |  |  |  | storeAbstract               = 1 | 
| 1890 |  |  |  |  |  |  | quickParse                  = 0 | 
| 1891 |  |  |  |  |  |  | compoundifyText             = 0 | 
| 1892 |  |  |  |  |  |  | numOfThreads                = Number of CPUs/CPU cores (1 thread per core/CPU) | 
| 1893 |  |  |  |  |  |  | workingDir                  = Current Directory | 
| 1894 |  |  |  |  |  |  | savePath                    = Current Directory | 
| 1895 |  |  |  |  |  |  | beginDate                   = "00/00/0000" | 
| 1896 |  |  |  |  |  |  | endDate                     = "99/99/9999" | 
| 1897 |  |  |  |  |  |  | xmlStringToParse            = "(null)" | 
| 1898 |  |  |  |  |  |  | textCorpusString            = "" | 
| 1899 |  |  |  |  |  |  | twigHandler                 = 0 | 
| 1900 |  |  |  |  |  |  | parsedCount                 = 0 | 
| 1901 |  |  |  |  |  |  | tempDate                    = "" | 
| 1902 |  |  |  |  |  |  | tempStr                     = "" | 
| 1903 |  |  |  |  |  |  | outputFileName              = "textcorpus.txt" | 
| 1904 |  |  |  |  |  |  | compoundWordAry             = () | 
| 1905 |  |  |  |  |  |  | compoundWordBST             = Word2vec::Bst->new() | 
| 1906 |  |  |  |  |  |  | maxCompoundWordLength       = 0 | 
| 1907 |  |  |  |  |  |  | overwriteExistingFile       = 0 | 
| 1908 |  |  |  |  |  |  |  | 
| 1909 |  |  |  |  |  |  | Input: | 
| 1910 |  |  |  |  |  |  |  | 
| 1911 |  |  |  |  |  |  | $debugLog                    -> Instructs module to print debug statements to the console. (1 = True / 0 = False) | 
| 1912 |  |  |  |  |  |  | $writeLog                    -> Instructs module to print debug statements to a log file. (1 = True / 0 = False) | 
| 1913 |  |  |  |  |  |  | $storeTitle                  -> Instructs module to store Medline article titles during text corpus compilation. (1 = True / 0 = False) | 
| 1914 |  |  |  |  |  |  | $storeAbstract               -> Instructs module to store Medline article abstracts during text corpus compilation. (1 = True / 0 = False) | 
| 1915 |  |  |  |  |  |  | $quickParse                  -> Instructs module to utilize quick XML parsing Functions for known Medline article title and abstract tags. (1 = True / 0 = False) | 
| 1916 |  |  |  |  |  |  | $compoundifyText             -> Instructs module to compoundify text on the fly given a compound word file. This is automatically set | 
| 1917 |  |  |  |  |  |  | when reading the compound word file to memory regardless of user setting. (1 = True / 0 = False) | 
| 1918 |  |  |  |  |  |  | $numOfThreads                -> Specifies the number of worker threads which parse Medline XML files simultaneously to create the text corpus. | 
| 1919 |  |  |  |  |  |  | This speeds up text corpus generation by the number of physical cores present an a given machine. (Positive integer value) | 
| 1920 |  |  |  |  |  |  | ie. Using four threads of a Intel i7 core machine speeds up text corpus generation roughly four times faster than being single threaded. | 
| 1921 |  |  |  |  |  |  | $workingDir                  -> Specifies the current working directory. (String) | 
| 1922 |  |  |  |  |  |  | $savePath                    -> Specifies the save path for text corpus generation. (String) | 
| 1923 |  |  |  |  |  |  | $beginDate                   -> Specifies the beginning date range for Medline article text corpus composition. (Format: XX/XX/XXXX) | 
| 1924 |  |  |  |  |  |  | $endDate                     -> Specifies the ending date range for Medline article text corpus composition. (Format: XX/XX/XXXX) | 
| 1925 |  |  |  |  |  |  | $xmlStringToParse            -> Storage location for the current Medline XML file in memory. (String) | 
| 1926 |  |  |  |  |  |  | $textCorpusString            -> Temporary storage location for text corpus generation in memory. (String) | 
| 1927 |  |  |  |  |  |  | $twigHandler                 -> XML::Twig object location. | 
| 1928 |  |  |  |  |  |  | $parsedCount                 -> Number of parsed Medline articles during text corpus generation. | 
| 1929 |  |  |  |  |  |  | $tempDate                    -> Temporary storage location for current Medline article date during text corpus compilation. | 
| 1930 |  |  |  |  |  |  | $tempStr                     -> Temporary storage location for current Medline article title/abstract during text corpus compilation. | 
| 1931 |  |  |  |  |  |  | $outputFileName              -> Output file path/name. | 
| 1932 |  |  |  |  |  |  | $compoundWordAry             -> Storage location for compound words, used to compoundify text. (Array) <- Depreciated | 
| 1933 |  |  |  |  |  |  | $compoundWordBST             -> Storage location for compound words, used to compoundify text. (Binary Search Tree) <- Supersedes '$compoundWordAry' | 
| 1934 |  |  |  |  |  |  | $maxCompoundWordLength       -> Maximum number of words able to be compoundified in one phrase. ie "six_sea_snakes_were_sailing" = 5 compoundified words. | 
| 1935 |  |  |  |  |  |  | The compounding algorithm will attempt to compoundify no more than this set value, even-though the compound word list could | 
| 1936 |  |  |  |  |  |  | possibly contain larger compounded phrases. | 
| 1937 |  |  |  |  |  |  | $overwriteExistingFile       -> Instructs the module to either overwrite any existing text corpus files or append to the existing file. | 
| 1938 |  |  |  |  |  |  |  | 
| 1939 |  |  |  |  |  |  | Note: It is not recommended to specify all new() parameters, as it has not been thoroughly tested. Maximum recommended parameters to be specified include: | 
| 1940 |  |  |  |  |  |  | "debugLog, writeLog, storeTitle, storeAbstract, quickParse, compoundifyText, numOfThreads, workingDir, savePath, beginDate, endDate" | 
| 1941 |  |  |  |  |  |  |  | 
| 1942 |  |  |  |  |  |  | Output: | 
| 1943 |  |  |  |  |  |  |  | 
| 1944 |  |  |  |  |  |  | Word2vec::Xmltow2v object. | 
| 1945 |  |  |  |  |  |  |  | 
| 1946 |  |  |  |  |  |  | Example: | 
| 1947 |  |  |  |  |  |  |  | 
| 1948 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 1949 |  |  |  |  |  |  |  | 
| 1950 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new();  # Note: Specifying no parameters implies default settings as listed above. | 
| 1951 |  |  |  |  |  |  |  | 
| 1952 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 1953 |  |  |  |  |  |  |  | 
| 1954 |  |  |  |  |  |  | # Or | 
| 1955 |  |  |  |  |  |  |  | 
| 1956 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 1957 |  |  |  |  |  |  |  | 
| 1958 |  |  |  |  |  |  | # Parameters: Debug Output = True, Write Log = False, StoreTitle = True, StoreAbstract = True, Quick Parse = True, CompoundifyText = True, Use Multi-Threading (2 Threads) | 
| 1959 |  |  |  |  |  |  | my $xmlconv = new xmltow2v( 1, 0, 1, 1, 1, 1, 2 ); | 
| 1960 |  |  |  |  |  |  |  | 
| 1961 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 1962 |  |  |  |  |  |  |  | 
| 1963 |  |  |  |  |  |  | =head3 DESTROY | 
| 1964 |  |  |  |  |  |  |  | 
| 1965 |  |  |  |  |  |  | Description: | 
| 1966 |  |  |  |  |  |  |  | 
| 1967 |  |  |  |  |  |  | Removes module objects and variables from memory. | 
| 1968 |  |  |  |  |  |  |  | 
| 1969 |  |  |  |  |  |  | Input: | 
| 1970 |  |  |  |  |  |  |  | 
| 1971 |  |  |  |  |  |  | None | 
| 1972 |  |  |  |  |  |  |  | 
| 1973 |  |  |  |  |  |  | Output: | 
| 1974 |  |  |  |  |  |  |  | 
| 1975 |  |  |  |  |  |  | None | 
| 1976 |  |  |  |  |  |  |  | 
| 1977 |  |  |  |  |  |  | Example: | 
| 1978 |  |  |  |  |  |  |  | 
| 1979 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 1980 |  |  |  |  |  |  |  | 
| 1981 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 1982 |  |  |  |  |  |  |  | 
| 1983 |  |  |  |  |  |  | $xmlconv->DESTROY(); | 
| 1984 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 1985 |  |  |  |  |  |  |  | 
| 1986 |  |  |  |  |  |  | =head3 ConvertMedlineXMLToW2V | 
| 1987 |  |  |  |  |  |  |  | 
| 1988 |  |  |  |  |  |  | Description: | 
| 1989 |  |  |  |  |  |  |  | 
| 1990 |  |  |  |  |  |  | Parses specified parameter Medline XML file or directory of files, creating a text corpus. Returns 0 if successful or -1 during an error. | 
| 1991 |  |  |  |  |  |  |  | 
| 1992 |  |  |  |  |  |  | Note: Supports plain Medline XML or gun-zipped XML files. | 
| 1993 |  |  |  |  |  |  |  | 
| 1994 |  |  |  |  |  |  | Input: | 
| 1995 |  |  |  |  |  |  |  | 
| 1996 |  |  |  |  |  |  | $filePath -> XML file path to parse. (This can be a single file or directory of XML/XML.gz files). | 
| 1997 |  |  |  |  |  |  |  | 
| 1998 |  |  |  |  |  |  | Output: | 
| 1999 |  |  |  |  |  |  |  | 
| 2000 |  |  |  |  |  |  | $value    -> '0' = Successful / '-1' = Un-Successful | 
| 2001 |  |  |  |  |  |  |  | 
| 2002 |  |  |  |  |  |  | Example: | 
| 2003 |  |  |  |  |  |  |  | 
| 2004 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 2005 |  |  |  |  |  |  |  | 
| 2006 |  |  |  |  |  |  | $xmlconv = new xmltow2v();      # Note: Specifying no parameters implies default settings | 
| 2007 |  |  |  |  |  |  | $xmlconv->SetSavePath( "testCorpus.txt" ); | 
| 2008 |  |  |  |  |  |  | $xmlconv->SetStoreTitle( 1 ); | 
| 2009 |  |  |  |  |  |  | $xmlconv->SetStoreAbstract( 1 ); | 
| 2010 |  |  |  |  |  |  | $xmlconv->SetBeginDate( "01/01/2004" ); | 
| 2011 |  |  |  |  |  |  | $xmlconv->SetEndDate( "08/13/2016" ); | 
| 2012 |  |  |  |  |  |  | $xmlconv->SetOverwriteExistingFile( 1 ); | 
| 2013 |  |  |  |  |  |  | $xmlconv->ConvertMedlineXMLToW2V( "/xmlDirectory/" ); | 
| 2014 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 2015 |  |  |  |  |  |  |  | 
| 2016 |  |  |  |  |  |  |  | 
| 2017 |  |  |  |  |  |  | =head3 _ThreadedConvert | 
| 2018 |  |  |  |  |  |  |  | 
| 2019 |  |  |  |  |  |  | Description: | 
| 2020 |  |  |  |  |  |  |  | 
| 2021 |  |  |  |  |  |  | Multi-Threaded Medline XML to text corpus conversion function. | 
| 2022 |  |  |  |  |  |  |  | 
| 2023 |  |  |  |  |  |  | Input: | 
| 2024 |  |  |  |  |  |  |  | 
| 2025 |  |  |  |  |  |  | $directory -> File directory or directory of files to parse. | 
| 2026 |  |  |  |  |  |  |  | 
| 2027 |  |  |  |  |  |  | Output: | 
| 2028 |  |  |  |  |  |  |  | 
| 2029 |  |  |  |  |  |  | $value     -> '0' = Successful / '-1' = Un-successful | 
| 2030 |  |  |  |  |  |  |  | 
| 2031 |  |  |  |  |  |  | Example: | 
| 2032 |  |  |  |  |  |  |  | 
| 2033 |  |  |  |  |  |  | Warning: This is a private function called by 'ConvertMedlineXMLToW2V()'. It should not be called outside of xmltow2v module. | 
| 2034 |  |  |  |  |  |  |  | 
| 2035 |  |  |  |  |  |  | =head3 _ParseXMLString | 
| 2036 |  |  |  |  |  |  |  | 
| 2037 |  |  |  |  |  |  | Description: | 
| 2038 |  |  |  |  |  |  |  | 
| 2039 |  |  |  |  |  |  | Parses passed string parameter for Medline XML article title and abstract data and appends found data to the text corpus. | 
| 2040 |  |  |  |  |  |  |  | 
| 2041 |  |  |  |  |  |  | Input: | 
| 2042 |  |  |  |  |  |  |  | 
| 2043 |  |  |  |  |  |  | $string -> Medline XML string data to parse. | 
| 2044 |  |  |  |  |  |  |  | 
| 2045 |  |  |  |  |  |  | Output: | 
| 2046 |  |  |  |  |  |  |  | 
| 2047 |  |  |  |  |  |  | None | 
| 2048 |  |  |  |  |  |  |  | 
| 2049 |  |  |  |  |  |  | Example: | 
| 2050 |  |  |  |  |  |  |  | 
| 2051 |  |  |  |  |  |  | Warning: This is a private function called by "ConvertMedlineXMLToW2V()" and "_ThreadedConvert()". It should not be called outside of xmltow2v module. | 
| 2052 |  |  |  |  |  |  |  | 
| 2053 |  |  |  |  |  |  | =head3 _CheckParseRequirements | 
| 2054 |  |  |  |  |  |  |  | 
| 2055 |  |  |  |  |  |  | Description: | 
| 2056 |  |  |  |  |  |  |  | 
| 2057 |  |  |  |  |  |  | Checks passed string parameter to see if it contains relevant data and XML::Twig handler is initialized. | 
| 2058 |  |  |  |  |  |  |  | 
| 2059 |  |  |  |  |  |  | Input: | 
| 2060 |  |  |  |  |  |  |  | 
| 2061 |  |  |  |  |  |  | $string -> String data to check | 
| 2062 |  |  |  |  |  |  |  | 
| 2063 |  |  |  |  |  |  | Output: | 
| 2064 |  |  |  |  |  |  |  | 
| 2065 |  |  |  |  |  |  | $value  -> '0' = Successful / '-1' = Un-successful | 
| 2066 |  |  |  |  |  |  |  | 
| 2067 |  |  |  |  |  |  | Example: | 
| 2068 |  |  |  |  |  |  |  | 
| 2069 |  |  |  |  |  |  | Warning: This is a private function called "_ParseXMLString()". It should not be called outside of xmltow2v module. | 
| 2070 |  |  |  |  |  |  |  | 
| 2071 |  |  |  |  |  |  | =head3 _CheckForNullData | 
| 2072 |  |  |  |  |  |  |  | 
| 2073 |  |  |  |  |  |  | Description: | 
| 2074 |  |  |  |  |  |  |  | 
| 2075 |  |  |  |  |  |  | Checks passed string parameter for "(null)" string. | 
| 2076 |  |  |  |  |  |  |  | 
| 2077 |  |  |  |  |  |  | Input: | 
| 2078 |  |  |  |  |  |  |  | 
| 2079 |  |  |  |  |  |  | $string -> String data to be checked. | 
| 2080 |  |  |  |  |  |  |  | 
| 2081 |  |  |  |  |  |  | Output: | 
| 2082 |  |  |  |  |  |  |  | 
| 2083 |  |  |  |  |  |  | $value  -> '1' = True/Null data or '0' = False/Valid data | 
| 2084 |  |  |  |  |  |  |  | 
| 2085 |  |  |  |  |  |  | Example: | 
| 2086 |  |  |  |  |  |  |  | 
| 2087 |  |  |  |  |  |  | Warning: This is a private function called by "new()" and "_ParseXMLString()". It should not be called outside of xmltow2v module. | 
| 2088 |  |  |  |  |  |  |  | 
| 2089 |  |  |  |  |  |  | =head3 _RemoveXMLVersion | 
| 2090 |  |  |  |  |  |  |  | 
| 2091 |  |  |  |  |  |  | Description: | 
| 2092 |  |  |  |  |  |  |  | 
| 2093 |  |  |  |  |  |  | Removes the XML Version string prior to parsing the XML string data. (Depreciated) | 
| 2094 |  |  |  |  |  |  |  | 
| 2095 |  |  |  |  |  |  | Input: | 
| 2096 |  |  |  |  |  |  |  | 
| 2097 |  |  |  |  |  |  | $string -> Medline XML string data | 
| 2098 |  |  |  |  |  |  |  | 
| 2099 |  |  |  |  |  |  | Output: | 
| 2100 |  |  |  |  |  |  |  | 
| 2101 |  |  |  |  |  |  | None | 
| 2102 |  |  |  |  |  |  |  | 
| 2103 |  |  |  |  |  |  | Example: | 
| 2104 |  |  |  |  |  |  |  | 
| 2105 |  |  |  |  |  |  | Warning: This is a private function called by "new()" and "_ParseXMLString()". It should not be called outside of xmltow2v module. | 
| 2106 |  |  |  |  |  |  |  | 
| 2107 |  |  |  |  |  |  | =head3 _ParseMedlineCitationSet | 
| 2108 |  |  |  |  |  |  |  | 
| 2109 |  |  |  |  |  |  | Description: | 
| 2110 |  |  |  |  |  |  |  | 
| 2111 |  |  |  |  |  |  | Parses 'MedlineCitationSet' tag data in Medline XML file. | 
| 2112 |  |  |  |  |  |  |  | 
| 2113 |  |  |  |  |  |  | Input: | 
| 2114 |  |  |  |  |  |  |  | 
| 2115 |  |  |  |  |  |  | $twigHandler -> XML::Twig handler | 
| 2116 |  |  |  |  |  |  | $root        -> Beginning of XML directory to parse. ( Directory in Medline XML string data ) | 
| 2117 |  |  |  |  |  |  |  | 
| 2118 |  |  |  |  |  |  | Output: | 
| 2119 |  |  |  |  |  |  |  | 
| 2120 |  |  |  |  |  |  | None | 
| 2121 |  |  |  |  |  |  |  | 
| 2122 |  |  |  |  |  |  | Example: | 
| 2123 |  |  |  |  |  |  |  | 
| 2124 |  |  |  |  |  |  | Warning: This is a private function and is called by xmltow2v's XML::Twig handler. It should not be called outside of xmltow2v module. | 
| 2125 |  |  |  |  |  |  |  | 
| 2126 |  |  |  |  |  |  | =head3 _ParseMedlineArticle | 
| 2127 |  |  |  |  |  |  |  | 
| 2128 |  |  |  |  |  |  | Description: | 
| 2129 |  |  |  |  |  |  |  | 
| 2130 |  |  |  |  |  |  | Parses 'MedlineArticle' tag data in Medline XML file. | 
| 2131 |  |  |  |  |  |  |  | 
| 2132 |  |  |  |  |  |  | Input: | 
| 2133 |  |  |  |  |  |  |  | 
| 2134 |  |  |  |  |  |  | $medlineArticle -> Current Medline article directory in XML data (XML::Twig directory) | 
| 2135 |  |  |  |  |  |  |  | 
| 2136 |  |  |  |  |  |  | Output: | 
| 2137 |  |  |  |  |  |  |  | 
| 2138 |  |  |  |  |  |  | $value          -> '1' = Finished parsing Medline article. | 
| 2139 |  |  |  |  |  |  |  | 
| 2140 |  |  |  |  |  |  | Example: | 
| 2141 |  |  |  |  |  |  |  | 
| 2142 |  |  |  |  |  |  | Warning: This is a private function and is called by xmltow2v's XML::Twig handler. It should not be called outside of xmltow2v module. | 
| 2143 |  |  |  |  |  |  |  | 
| 2144 |  |  |  |  |  |  | =head3 _ParseDateCreated | 
| 2145 |  |  |  |  |  |  |  | 
| 2146 |  |  |  |  |  |  | Description: | 
| 2147 |  |  |  |  |  |  |  | 
| 2148 |  |  |  |  |  |  | Parses 'DateCreated' tag data in Medline XML file. | 
| 2149 |  |  |  |  |  |  |  | 
| 2150 |  |  |  |  |  |  | Input: | 
| 2151 |  |  |  |  |  |  |  | 
| 2152 |  |  |  |  |  |  | $article -> Current Medline article in XML data (XML::Twig directory) | 
| 2153 |  |  |  |  |  |  |  | 
| 2154 |  |  |  |  |  |  | Output: | 
| 2155 |  |  |  |  |  |  |  | 
| 2156 |  |  |  |  |  |  | $date    -> 'XX/XX/XXXX' (Month/Day/Year) | 
| 2157 |  |  |  |  |  |  |  | 
| 2158 |  |  |  |  |  |  | Example: | 
| 2159 |  |  |  |  |  |  |  | 
| 2160 |  |  |  |  |  |  | Warning: This is a private function and is called by xmltow2v's XML::Twig handler. It should not be called outside of xmltow2v module. | 
| 2161 |  |  |  |  |  |  |  | 
| 2162 |  |  |  |  |  |  | =head3 _ParseArticle | 
| 2163 |  |  |  |  |  |  |  | 
| 2164 |  |  |  |  |  |  | Description: | 
| 2165 |  |  |  |  |  |  |  | 
| 2166 |  |  |  |  |  |  | Parses 'Article' tag data in Medline XML file. Fetches 'ArticleTitle', 'Journal' and 'Abstract' XML tags. | 
| 2167 |  |  |  |  |  |  |  | 
| 2168 |  |  |  |  |  |  | Input: | 
| 2169 |  |  |  |  |  |  |  | 
| 2170 |  |  |  |  |  |  | $article -> Current Medline article in XML data (XML::Twig directory) | 
| 2171 |  |  |  |  |  |  |  | 
| 2172 |  |  |  |  |  |  | Output: | 
| 2173 |  |  |  |  |  |  |  | 
| 2174 |  |  |  |  |  |  | None | 
| 2175 |  |  |  |  |  |  |  | 
| 2176 |  |  |  |  |  |  | Example: | 
| 2177 |  |  |  |  |  |  |  | 
| 2178 |  |  |  |  |  |  | Warning: This is a private function and is called by xmltow2v's XML::Twig handler. It should not be called outside of xmltow2v module. | 
| 2179 |  |  |  |  |  |  |  | 
| 2180 |  |  |  |  |  |  | =head3 _ParseJournal | 
| 2181 |  |  |  |  |  |  |  | 
| 2182 |  |  |  |  |  |  | Description: | 
| 2183 |  |  |  |  |  |  |  | 
| 2184 |  |  |  |  |  |  | Parses 'Journal' tag data in Medline XML file. Fetches 'Title' XML tag. | 
| 2185 |  |  |  |  |  |  |  | 
| 2186 |  |  |  |  |  |  | Input: | 
| 2187 |  |  |  |  |  |  |  | 
| 2188 |  |  |  |  |  |  | $journalRoot -> Current Medline journal directory in XML data (XML::Twig directory) | 
| 2189 |  |  |  |  |  |  |  | 
| 2190 |  |  |  |  |  |  | Output: | 
| 2191 |  |  |  |  |  |  |  | 
| 2192 |  |  |  |  |  |  | None | 
| 2193 |  |  |  |  |  |  |  | 
| 2194 |  |  |  |  |  |  | Example: | 
| 2195 |  |  |  |  |  |  |  | 
| 2196 |  |  |  |  |  |  | Warning: This is a private function and is called by xmltow2v's XML::Twig handler. It should not be called outside of xmltow2v module. | 
| 2197 |  |  |  |  |  |  |  | 
| 2198 |  |  |  |  |  |  | =head3 _ParseOtherAbstract | 
| 2199 |  |  |  |  |  |  |  | 
| 2200 |  |  |  |  |  |  | Description: | 
| 2201 |  |  |  |  |  |  |  | 
| 2202 |  |  |  |  |  |  | Parses 'Abstract' tag data in Medline XML file. Fetches 'AbstractText' XML tag. | 
| 2203 |  |  |  |  |  |  |  | 
| 2204 |  |  |  |  |  |  | Input: | 
| 2205 |  |  |  |  |  |  |  | 
| 2206 |  |  |  |  |  |  | $abstractRoot -> Current Medline abstract directory in XML data (XML::Twig directory) | 
| 2207 |  |  |  |  |  |  |  | 
| 2208 |  |  |  |  |  |  | Output: | 
| 2209 |  |  |  |  |  |  |  | 
| 2210 |  |  |  |  |  |  | None | 
| 2211 |  |  |  |  |  |  |  | 
| 2212 |  |  |  |  |  |  | Example: | 
| 2213 |  |  |  |  |  |  |  | 
| 2214 |  |  |  |  |  |  | Warning: This is a private function and is called by xmltow2v's XML::Twig handler. It should not be called outside of xmltow2v module. | 
| 2215 |  |  |  |  |  |  |  | 
| 2216 |  |  |  |  |  |  | =head3 _QuickParseDateCreated | 
| 2217 |  |  |  |  |  |  |  | 
| 2218 |  |  |  |  |  |  | Description: | 
| 2219 |  |  |  |  |  |  |  | 
| 2220 |  |  |  |  |  |  | Parses 'DateCreated' tag data in Medline XML file. Used when 'QuickParse' member variable is enabled. Sets $tempDate member variable to parsed 'DateCreated' tag data. | 
| 2221 |  |  |  |  |  |  |  | 
| 2222 |  |  |  |  |  |  | Input: | 
| 2223 |  |  |  |  |  |  |  | 
| 2224 |  |  |  |  |  |  | $twigHandler -> 'XML::Twig' handler | 
| 2225 |  |  |  |  |  |  | $article     -> Current Medline article directory in XML data (XML::Twig directory) | 
| 2226 |  |  |  |  |  |  |  | 
| 2227 |  |  |  |  |  |  | Output: | 
| 2228 |  |  |  |  |  |  |  | 
| 2229 |  |  |  |  |  |  | None | 
| 2230 |  |  |  |  |  |  |  | 
| 2231 |  |  |  |  |  |  | Example: | 
| 2232 |  |  |  |  |  |  |  | 
| 2233 |  |  |  |  |  |  | Warning: This is a private function and is called by xmltow2v's XML::Twig handler. It should not be called outside of xmltow2v module. | 
| 2234 |  |  |  |  |  |  |  | 
| 2235 |  |  |  |  |  |  | =head3 _QuickParseJournal | 
| 2236 |  |  |  |  |  |  |  | 
| 2237 |  |  |  |  |  |  | Description: | 
| 2238 |  |  |  |  |  |  |  | 
| 2239 |  |  |  |  |  |  | Parses 'Journal' tag data in Medline XML file. Fetches 'Title' XML tag. Used when 'QuickParse' member variable is enabled. | 
| 2240 |  |  |  |  |  |  | Sets $tempStr to parsed data and stores in text corpus. | 
| 2241 |  |  |  |  |  |  |  | 
| 2242 |  |  |  |  |  |  | Input: | 
| 2243 |  |  |  |  |  |  |  | 
| 2244 |  |  |  |  |  |  | $twigHandler -> 'XML::Twig' handler. | 
| 2245 |  |  |  |  |  |  | $journalRoot -> Current Medline journal directory in XML data (XML::Twig directory) | 
| 2246 |  |  |  |  |  |  |  | 
| 2247 |  |  |  |  |  |  | Output: | 
| 2248 |  |  |  |  |  |  |  | 
| 2249 |  |  |  |  |  |  | None | 
| 2250 |  |  |  |  |  |  |  | 
| 2251 |  |  |  |  |  |  | Example: | 
| 2252 |  |  |  |  |  |  |  | 
| 2253 |  |  |  |  |  |  | Warning: This is a private function and is called by xmltow2v's XML::Twig handler. It should not be called outside of xmltow2v module. | 
| 2254 |  |  |  |  |  |  |  | 
| 2255 |  |  |  |  |  |  | =head3 _QuickParseArticle | 
| 2256 |  |  |  |  |  |  |  | 
| 2257 |  |  |  |  |  |  | Description: | 
| 2258 |  |  |  |  |  |  |  | 
| 2259 |  |  |  |  |  |  | Parses 'Article' tag data in Medline XML file. Fetches 'ArticleTitle' and 'Abstract' XML tags. Used when 'QuickParse' member variable is enabled. | 
| 2260 |  |  |  |  |  |  | Sets $tempStr to parsed data and stores in text corpus. | 
| 2261 |  |  |  |  |  |  |  | 
| 2262 |  |  |  |  |  |  | Input: | 
| 2263 |  |  |  |  |  |  |  | 
| 2264 |  |  |  |  |  |  | $twigHandler -> 'XML::Twig' handler. | 
| 2265 |  |  |  |  |  |  | $article     -> Current Medline article directory in XML data (XML::Twig directory) | 
| 2266 |  |  |  |  |  |  |  | 
| 2267 |  |  |  |  |  |  | Output: | 
| 2268 |  |  |  |  |  |  |  | 
| 2269 |  |  |  |  |  |  | None | 
| 2270 |  |  |  |  |  |  |  | 
| 2271 |  |  |  |  |  |  | Example: | 
| 2272 |  |  |  |  |  |  |  | 
| 2273 |  |  |  |  |  |  | Warning: This is a private function and is called by xmltow2v's XML::Twig handler. It should not be called outside of xmltow2v module. | 
| 2274 |  |  |  |  |  |  |  | 
| 2275 |  |  |  |  |  |  | =head3 _QuickParseOtherAbstract | 
| 2276 |  |  |  |  |  |  |  | 
| 2277 |  |  |  |  |  |  | Description: | 
| 2278 |  |  |  |  |  |  |  | 
| 2279 |  |  |  |  |  |  | Parses 'Abstract' tag data in Medline XML file. Fetches 'AbstractText' XML tag. Used when 'QuickParse' member variable is enabled. | 
| 2280 |  |  |  |  |  |  | Sets $tempStr to parsed data and stores in text corpus. | 
| 2281 |  |  |  |  |  |  |  | 
| 2282 |  |  |  |  |  |  | Input: | 
| 2283 |  |  |  |  |  |  |  | 
| 2284 |  |  |  |  |  |  | $twigHandler -> 'XML::Twig' handler. | 
| 2285 |  |  |  |  |  |  | $anstractRoot -> Current Medline abstract directory in XML data (XML::Twig directory) | 
| 2286 |  |  |  |  |  |  |  | 
| 2287 |  |  |  |  |  |  | Output: | 
| 2288 |  |  |  |  |  |  |  | 
| 2289 |  |  |  |  |  |  | None | 
| 2290 |  |  |  |  |  |  |  | 
| 2291 |  |  |  |  |  |  | Example: | 
| 2292 |  |  |  |  |  |  |  | 
| 2293 |  |  |  |  |  |  | Warning: This is a private function and is called by xmltow2v's XML::Twig handler. It should not be called outside of xmltow2v module. | 
| 2294 |  |  |  |  |  |  |  | 
| 2295 |  |  |  |  |  |  | =head3 CreateCompoundWordBST | 
| 2296 |  |  |  |  |  |  |  | 
| 2297 |  |  |  |  |  |  | Description: | 
| 2298 |  |  |  |  |  |  |  | 
| 2299 |  |  |  |  |  |  | Creates a binary search tree using compound word data in memory and stores root node. This also clears the compound word array afterwards. | 
| 2300 |  |  |  |  |  |  |  | 
| 2301 |  |  |  |  |  |  | Warning: Compound word file must be loaded into memory using ReadCompoundWordDataFromFile() prior to calling this method. This function | 
| 2302 |  |  |  |  |  |  | will also delete the compound word array upon completion as it will no longer be necessary. | 
| 2303 |  |  |  |  |  |  |  | 
| 2304 |  |  |  |  |  |  | Input: | 
| 2305 |  |  |  |  |  |  |  | 
| 2306 |  |  |  |  |  |  | None | 
| 2307 |  |  |  |  |  |  |  | 
| 2308 |  |  |  |  |  |  | Output: | 
| 2309 |  |  |  |  |  |  |  | 
| 2310 |  |  |  |  |  |  | $value -> '0' = Successful / '-1' = Un-successful | 
| 2311 |  |  |  |  |  |  |  | 
| 2312 |  |  |  |  |  |  | Example: | 
| 2313 |  |  |  |  |  |  |  | 
| 2314 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 2315 |  |  |  |  |  |  |  | 
| 2316 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 2317 |  |  |  |  |  |  | $xmlconv->ReadCompoundWordDataFromFile( "samples/compoundword.txt" ); | 
| 2318 |  |  |  |  |  |  | $xmlconv->CreateCompoundWordBST(); | 
| 2319 |  |  |  |  |  |  |  | 
| 2320 |  |  |  |  |  |  | =head3 CompoundifyString | 
| 2321 |  |  |  |  |  |  |  | 
| 2322 |  |  |  |  |  |  | Description: | 
| 2323 |  |  |  |  |  |  |  | 
| 2324 |  |  |  |  |  |  | Compoundifies string parameter based on compound word data in memory using the compound word binary search tree. | 
| 2325 |  |  |  |  |  |  |  | 
| 2326 |  |  |  |  |  |  | Warning: Compound word file must be loaded into memory using ReadCompoundWordDataFromFile() prior to calling this method. | 
| 2327 |  |  |  |  |  |  |  | 
| 2328 |  |  |  |  |  |  | Input: | 
| 2329 |  |  |  |  |  |  |  | 
| 2330 |  |  |  |  |  |  | $string -> String to compoundify | 
| 2331 |  |  |  |  |  |  |  | 
| 2332 |  |  |  |  |  |  | Output: | 
| 2333 |  |  |  |  |  |  |  | 
| 2334 |  |  |  |  |  |  | $string -> Compounded string or "(null)" if string parameter is not defined. | 
| 2335 |  |  |  |  |  |  |  | 
| 2336 |  |  |  |  |  |  | Example: | 
| 2337 |  |  |  |  |  |  |  | 
| 2338 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 2339 |  |  |  |  |  |  |  | 
| 2340 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 2341 |  |  |  |  |  |  | $xmlconv->ReadCompoundWordDataFromFile( "samples/compoundword.txt" ); | 
| 2342 |  |  |  |  |  |  | $xmlconv->CreateCompoundWordBST(); | 
| 2343 |  |  |  |  |  |  | my $compoundedString = $xmlconv->CompoundifyString( "String to compoundify" ); | 
| 2344 |  |  |  |  |  |  | print( "Compounded String: $compoundedString\n" ); | 
| 2345 |  |  |  |  |  |  |  | 
| 2346 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 2347 |  |  |  |  |  |  |  | 
| 2348 |  |  |  |  |  |  | =head3 _CompoundifySearch | 
| 2349 |  |  |  |  |  |  |  | 
| 2350 |  |  |  |  |  |  | Description: | 
| 2351 |  |  |  |  |  |  |  | 
| 2352 |  |  |  |  |  |  | Recursive method used by CompoundifyString() to fetch compound word data in binary search tree. | 
| 2353 |  |  |  |  |  |  |  | 
| 2354 |  |  |  |  |  |  | Warning: This function requires specific parameters and should not be called outside of CompoundifyString() method. | 
| 2355 |  |  |  |  |  |  |  | 
| 2356 |  |  |  |  |  |  | Input: | 
| 2357 |  |  |  |  |  |  |  | 
| 2358 |  |  |  |  |  |  | $stringArrayRef -> Array reference containing string data | 
| 2359 |  |  |  |  |  |  | $oldNode        -> Last 'Word2vec::Node' data match was found | 
| 2360 |  |  |  |  |  |  | $searchStr      -> Search phrase | 
| 2361 |  |  |  |  |  |  | $index          -> Current string array index | 
| 2362 |  |  |  |  |  |  |  | 
| 2363 |  |  |  |  |  |  | Output: | 
| 2364 |  |  |  |  |  |  |  | 
| 2365 |  |  |  |  |  |  | Word2vec::Node  -> Last node containing positive search phrase match | 
| 2366 |  |  |  |  |  |  |  | 
| 2367 |  |  |  |  |  |  | Example: | 
| 2368 |  |  |  |  |  |  |  | 
| 2369 |  |  |  |  |  |  | Warning: This is a private function and is called by 'CompoundifyString()'. It should not be called outside of xmltow2v module. | 
| 2370 |  |  |  |  |  |  |  | 
| 2371 |  |  |  |  |  |  | =head3 ReadCompoundWordDataFromFile | 
| 2372 |  |  |  |  |  |  |  | 
| 2373 |  |  |  |  |  |  | Description: | 
| 2374 |  |  |  |  |  |  |  | 
| 2375 |  |  |  |  |  |  | Reads compound word file and stores in memory. $autoSetMaxCompWordLength parameter is not required to be set. This | 
| 2376 |  |  |  |  |  |  | parameter instructs the method to auto set the maximum compound word length dependent on the longest compound word found. | 
| 2377 |  |  |  |  |  |  |  | 
| 2378 |  |  |  |  |  |  | Note: $autoSetMaxCompWordLength options: defined = True and Undefined = False. | 
| 2379 |  |  |  |  |  |  |  | 
| 2380 |  |  |  |  |  |  | Input: | 
| 2381 |  |  |  |  |  |  |  | 
| 2382 |  |  |  |  |  |  | $filePath                 -> Compound word file path | 
| 2383 |  |  |  |  |  |  | $autoSetMaxCompWordLength -> Maximum length of a given compoundified phrase the module's compoundify algorithm will permit. | 
| 2384 |  |  |  |  |  |  |  | 
| 2385 |  |  |  |  |  |  | Note: Calling this method with $autoSetMaxCompWordLength defined will automatically set the maxCompoundWordLength variable to the longest compound phrase. | 
| 2386 |  |  |  |  |  |  |  | 
| 2387 |  |  |  |  |  |  | Output: | 
| 2388 |  |  |  |  |  |  |  | 
| 2389 |  |  |  |  |  |  | $value                    -> '0' = Successful / '-1' = Un-successful | 
| 2390 |  |  |  |  |  |  |  | 
| 2391 |  |  |  |  |  |  | Example: | 
| 2392 |  |  |  |  |  |  |  | 
| 2393 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 2394 |  |  |  |  |  |  |  | 
| 2395 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 2396 |  |  |  |  |  |  | $xmlconv->ReadCompoundWordDataFromFile( "samples/compoundword.txt", 1 ); | 
| 2397 |  |  |  |  |  |  |  | 
| 2398 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 2399 |  |  |  |  |  |  |  | 
| 2400 |  |  |  |  |  |  | =head3 SaveCompoundWordListToFile | 
| 2401 |  |  |  |  |  |  |  | 
| 2402 |  |  |  |  |  |  | Description: | 
| 2403 |  |  |  |  |  |  |  | 
| 2404 |  |  |  |  |  |  | Saves compound word data in memory to a specified file location. | 
| 2405 |  |  |  |  |  |  |  | 
| 2406 |  |  |  |  |  |  | Input: | 
| 2407 |  |  |  |  |  |  |  | 
| 2408 |  |  |  |  |  |  | $savePath -> Path to save compound word list to file. | 
| 2409 |  |  |  |  |  |  |  | 
| 2410 |  |  |  |  |  |  | Output: | 
| 2411 |  |  |  |  |  |  |  | 
| 2412 |  |  |  |  |  |  | $value    -> '0' = Successful / '-1' = Un-successful | 
| 2413 |  |  |  |  |  |  |  | 
| 2414 |  |  |  |  |  |  | Example: | 
| 2415 |  |  |  |  |  |  |  | 
| 2416 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 2417 |  |  |  |  |  |  |  | 
| 2418 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 2419 |  |  |  |  |  |  | $xmlconv->ReadCompoundWordDataFromFile( "samples/compoundword.txt" ); | 
| 2420 |  |  |  |  |  |  | $xmlconv->SaveCompoundWordDataFromFile( "samples/newcompoundword.txt" ); | 
| 2421 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 2422 |  |  |  |  |  |  |  | 
| 2423 |  |  |  |  |  |  | =head3 ReadTextFromFile | 
| 2424 |  |  |  |  |  |  |  | 
| 2425 |  |  |  |  |  |  | Description: | 
| 2426 |  |  |  |  |  |  |  | 
| 2427 |  |  |  |  |  |  | Reads a plain text file with utf8 encoding in memory. Returns string data if successful and "(null)" if unsuccessful. | 
| 2428 |  |  |  |  |  |  |  | 
| 2429 |  |  |  |  |  |  | Input: | 
| 2430 |  |  |  |  |  |  |  | 
| 2431 |  |  |  |  |  |  | $filePath -> Text file to read into memory | 
| 2432 |  |  |  |  |  |  |  | 
| 2433 |  |  |  |  |  |  | Output: | 
| 2434 |  |  |  |  |  |  |  | 
| 2435 |  |  |  |  |  |  | $string   -> String data if successful or "(null)" if un-successful. | 
| 2436 |  |  |  |  |  |  |  | 
| 2437 |  |  |  |  |  |  | Example: | 
| 2438 |  |  |  |  |  |  |  | 
| 2439 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 2440 |  |  |  |  |  |  |  | 
| 2441 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 2442 |  |  |  |  |  |  | my $textData = $xmlconv->ReadTextFromFile( "samples/textcorpus.txt" ); | 
| 2443 |  |  |  |  |  |  | print( "Text Data: $textData\n" ); | 
| 2444 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 2445 |  |  |  |  |  |  |  | 
| 2446 |  |  |  |  |  |  | =head3 SaveTextToFile | 
| 2447 |  |  |  |  |  |  |  | 
| 2448 |  |  |  |  |  |  | Description: | 
| 2449 |  |  |  |  |  |  |  | 
| 2450 |  |  |  |  |  |  | Saves a plain text file with utf8 encoding in a specified location. | 
| 2451 |  |  |  |  |  |  |  | 
| 2452 |  |  |  |  |  |  | Input: | 
| 2453 |  |  |  |  |  |  |  | 
| 2454 |  |  |  |  |  |  | $savePath -> Path to save string data. | 
| 2455 |  |  |  |  |  |  | $string   -> String to save | 
| 2456 |  |  |  |  |  |  |  | 
| 2457 |  |  |  |  |  |  | Output: | 
| 2458 |  |  |  |  |  |  |  | 
| 2459 |  |  |  |  |  |  | $value    -> '0' = Successful / '-1' = Un-successful | 
| 2460 |  |  |  |  |  |  |  | 
| 2461 |  |  |  |  |  |  | Example: | 
| 2462 |  |  |  |  |  |  |  | 
| 2463 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 2464 |  |  |  |  |  |  |  | 
| 2465 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 2466 |  |  |  |  |  |  | my $result = $xmlconv->SaveTextToFile( "text.txt", "Hello world!" ); | 
| 2467 |  |  |  |  |  |  |  | 
| 2468 |  |  |  |  |  |  | print( "File saved\n" ) if $result == 0; | 
| 2469 |  |  |  |  |  |  | print( "File unable to save\n" ) if $result == -1; | 
| 2470 |  |  |  |  |  |  |  | 
| 2471 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 2472 |  |  |  |  |  |  |  | 
| 2473 |  |  |  |  |  |  | =head3 _ReadXMLDataFromFile | 
| 2474 |  |  |  |  |  |  |  | 
| 2475 |  |  |  |  |  |  | Description: | 
| 2476 |  |  |  |  |  |  |  | 
| 2477 |  |  |  |  |  |  | Reads an XML file from a specified location. Returns string in memory if successful and "(null)" if unsuccessful. | 
| 2478 |  |  |  |  |  |  |  | 
| 2479 |  |  |  |  |  |  | Input: | 
| 2480 |  |  |  |  |  |  |  | 
| 2481 |  |  |  |  |  |  | $filePath -> File to read given path | 
| 2482 |  |  |  |  |  |  |  | 
| 2483 |  |  |  |  |  |  | Output: | 
| 2484 |  |  |  |  |  |  |  | 
| 2485 |  |  |  |  |  |  | $value    -> '0' = Successful / '-1' = Un-successful | 
| 2486 |  |  |  |  |  |  |  | 
| 2487 |  |  |  |  |  |  | Example: | 
| 2488 |  |  |  |  |  |  |  | 
| 2489 |  |  |  |  |  |  | Warning: This is a private function and is called by XML::Twig parsing functions. It should not be called outside of xmltow2v module. | 
| 2490 |  |  |  |  |  |  |  | 
| 2491 |  |  |  |  |  |  | =head3 _SaveTextCorpusToFile | 
| 2492 |  |  |  |  |  |  |  | 
| 2493 |  |  |  |  |  |  | Description: | 
| 2494 |  |  |  |  |  |  |  | 
| 2495 |  |  |  |  |  |  | Saves text corpus data to specified file path. This method will append to any existing file if $appendToFile parameter | 
| 2496 |  |  |  |  |  |  | is defined or "overwrite" option is disabled. Enabling "overwrite" option will overwrite any existing files. | 
| 2497 |  |  |  |  |  |  |  | 
| 2498 |  |  |  |  |  |  | Input: | 
| 2499 |  |  |  |  |  |  |  | 
| 2500 |  |  |  |  |  |  | $savePath     -> Path to save the text corpus | 
| 2501 |  |  |  |  |  |  | $appendToFile -> Specifies whether the module will overwrite any existing data or append to existing text corpus data. | 
| 2502 |  |  |  |  |  |  |  | 
| 2503 |  |  |  |  |  |  | Note: Leaving this variable undefined will fetch the "Overwrite" member variable and set the value to this parameter. | 
| 2504 |  |  |  |  |  |  |  | 
| 2505 |  |  |  |  |  |  | Output: | 
| 2506 |  |  |  |  |  |  |  | 
| 2507 |  |  |  |  |  |  | $value        -> '0' = Successful / '-1' = Un-successful | 
| 2508 |  |  |  |  |  |  |  | 
| 2509 |  |  |  |  |  |  | Example: | 
| 2510 |  |  |  |  |  |  |  | 
| 2511 |  |  |  |  |  |  | Warning: This is a private function and is called by XML::Twig parsing functions. It should not be called outside of xmltow2v module. | 
| 2512 |  |  |  |  |  |  |  | 
| 2513 |  |  |  |  |  |  | =head3 IsDateInSpecifiedRange | 
| 2514 |  |  |  |  |  |  |  | 
| 2515 |  |  |  |  |  |  | Description: | 
| 2516 |  |  |  |  |  |  |  | 
| 2517 |  |  |  |  |  |  | Checks to see if $date is within $beginDate and $endDate range. Returns 1 if true and 0 if false. | 
| 2518 |  |  |  |  |  |  |  | 
| 2519 |  |  |  |  |  |  | Note: Date Format: XX/XX/XXXX (Month/Day/Year) | 
| 2520 |  |  |  |  |  |  |  | 
| 2521 |  |  |  |  |  |  | Input: | 
| 2522 |  |  |  |  |  |  |  | 
| 2523 |  |  |  |  |  |  | $date      -> Date to check against minimum and maximum data range. (String) | 
| 2524 |  |  |  |  |  |  | $beginDate -> Minimum date range (String) | 
| 2525 |  |  |  |  |  |  | $endDate   -> Maximum date range (String) | 
| 2526 |  |  |  |  |  |  |  | 
| 2527 |  |  |  |  |  |  | Output: | 
| 2528 |  |  |  |  |  |  |  | 
| 2529 |  |  |  |  |  |  | $value     -> '1' = True/Date is within specified range Or '0' = False/Date is not within specified range. | 
| 2530 |  |  |  |  |  |  |  | 
| 2531 |  |  |  |  |  |  | Example: | 
| 2532 |  |  |  |  |  |  |  | 
| 2533 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 2534 |  |  |  |  |  |  |  | 
| 2535 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 2536 |  |  |  |  |  |  | print( "Is \"01/01/2004\" within the date range: \"02/21/1985\" to \"08/13/2016\"?\n" ); | 
| 2537 |  |  |  |  |  |  | print( "Yes\n" ) if $xmlconv->IsDateInSpecifiedRange( "01/01/2004", "02/21/1985", "08/13/2016" ) == 1; | 
| 2538 |  |  |  |  |  |  | print( "No\n" ) if $xmlconv->IsDateInSpecifiedRange( "01/01/2004", "02/21/1985", "08/13/2016" ) == 0; | 
| 2539 |  |  |  |  |  |  |  | 
| 2540 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 2541 |  |  |  |  |  |  |  | 
| 2542 |  |  |  |  |  |  | =head3 IsFileOrDirectory | 
| 2543 |  |  |  |  |  |  |  | 
| 2544 |  |  |  |  |  |  | Description: | 
| 2545 |  |  |  |  |  |  |  | 
| 2546 |  |  |  |  |  |  | Checks to see if specified path is a file or directory. | 
| 2547 |  |  |  |  |  |  |  | 
| 2548 |  |  |  |  |  |  | Input: | 
| 2549 |  |  |  |  |  |  |  | 
| 2550 |  |  |  |  |  |  | $path   -> File or directory path. (String) | 
| 2551 |  |  |  |  |  |  |  | 
| 2552 |  |  |  |  |  |  | Output: | 
| 2553 |  |  |  |  |  |  |  | 
| 2554 |  |  |  |  |  |  | $string -> Returns: "file" = file, "dir" = directory and "unknown" if the path is not a file or directory (undefined). | 
| 2555 |  |  |  |  |  |  |  | 
| 2556 |  |  |  |  |  |  | Example: | 
| 2557 |  |  |  |  |  |  |  | 
| 2558 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 2559 |  |  |  |  |  |  |  | 
| 2560 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 2561 |  |  |  |  |  |  | my $path = "path/to/a/directory"; | 
| 2562 |  |  |  |  |  |  |  | 
| 2563 |  |  |  |  |  |  | print( "Is \"$path\" a file or directory? " . $xmlconv->IsFileOrDirectory( $path ) . "\n" ); | 
| 2564 |  |  |  |  |  |  |  | 
| 2565 |  |  |  |  |  |  | $path = "path/to/a/file.file"; | 
| 2566 |  |  |  |  |  |  |  | 
| 2567 |  |  |  |  |  |  | print( "Is \"$path\" a file or directory? " . $xmlconv->IsFileOrDirectory( $path ) . "\n" ); | 
| 2568 |  |  |  |  |  |  |  | 
| 2569 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 2570 |  |  |  |  |  |  |  | 
| 2571 |  |  |  |  |  |  | =head3 RemoveSpecialCharactersFromString | 
| 2572 |  |  |  |  |  |  |  | 
| 2573 |  |  |  |  |  |  | Description: | 
| 2574 |  |  |  |  |  |  |  | 
| 2575 |  |  |  |  |  |  | Removes special characters from string parameter, removes extra spaces and converts text to lowercase. | 
| 2576 |  |  |  |  |  |  |  | 
| 2577 |  |  |  |  |  |  | Note: This method is called when parsing and compiling Medline title/abstract data. | 
| 2578 |  |  |  |  |  |  |  | 
| 2579 |  |  |  |  |  |  | Input: | 
| 2580 |  |  |  |  |  |  |  | 
| 2581 |  |  |  |  |  |  | $string -> String passed to remove special characters from and convert to lowercase. | 
| 2582 |  |  |  |  |  |  |  | 
| 2583 |  |  |  |  |  |  | Output: | 
| 2584 |  |  |  |  |  |  |  | 
| 2585 |  |  |  |  |  |  | $string -> String with all special characters removed and converted to lowercase. | 
| 2586 |  |  |  |  |  |  |  | 
| 2587 |  |  |  |  |  |  | Example: | 
| 2588 |  |  |  |  |  |  |  | 
| 2589 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 2590 |  |  |  |  |  |  |  | 
| 2591 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 2592 |  |  |  |  |  |  |  | 
| 2593 |  |  |  |  |  |  | my $str = "Heart Attack is$ an!@ also KNOWN as an Acute MYOCARDIAL inFARCTion!"; | 
| 2594 |  |  |  |  |  |  |  | 
| 2595 |  |  |  |  |  |  | print( "Original String: $str\n" ); | 
| 2596 |  |  |  |  |  |  |  | 
| 2597 |  |  |  |  |  |  | $str = $xmlconv->RemoveSpecialCharactersFromString( $str ); | 
| 2598 |  |  |  |  |  |  |  | 
| 2599 |  |  |  |  |  |  | print( "Modified String: $str\n" ); | 
| 2600 |  |  |  |  |  |  |  | 
| 2601 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 2602 |  |  |  |  |  |  |  | 
| 2603 |  |  |  |  |  |  | =head3 GetFileType | 
| 2604 |  |  |  |  |  |  |  | 
| 2605 |  |  |  |  |  |  | Description: | 
| 2606 |  |  |  |  |  |  |  | 
| 2607 |  |  |  |  |  |  | Returns file data type (string). | 
| 2608 |  |  |  |  |  |  |  | 
| 2609 |  |  |  |  |  |  | Input: | 
| 2610 |  |  |  |  |  |  |  | 
| 2611 |  |  |  |  |  |  | $filePath -> File to check located at file path | 
| 2612 |  |  |  |  |  |  |  | 
| 2613 |  |  |  |  |  |  | Output: | 
| 2614 |  |  |  |  |  |  |  | 
| 2615 |  |  |  |  |  |  | $string   -> File type | 
| 2616 |  |  |  |  |  |  |  | 
| 2617 |  |  |  |  |  |  | Example: | 
| 2618 |  |  |  |  |  |  |  | 
| 2619 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 2620 |  |  |  |  |  |  |  | 
| 2621 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new() | 
| 2622 |  |  |  |  |  |  | my $fileType = $xmlconv->GetFileType( "samples/textcorpus.txt" ); | 
| 2623 |  |  |  |  |  |  |  | 
| 2624 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 2625 |  |  |  |  |  |  |  | 
| 2626 |  |  |  |  |  |  | =head3 _DateCheck | 
| 2627 |  |  |  |  |  |  |  | 
| 2628 |  |  |  |  |  |  | Description: | 
| 2629 |  |  |  |  |  |  |  | 
| 2630 |  |  |  |  |  |  | Checks specified begin and end date strings for formatting and logic errors. | 
| 2631 |  |  |  |  |  |  |  | 
| 2632 |  |  |  |  |  |  | Input: | 
| 2633 |  |  |  |  |  |  |  | 
| 2634 |  |  |  |  |  |  | None | 
| 2635 |  |  |  |  |  |  |  | 
| 2636 |  |  |  |  |  |  | Output: | 
| 2637 |  |  |  |  |  |  |  | 
| 2638 |  |  |  |  |  |  | $value   -> "0" = Passed Checks / "-1" = Failed Checks | 
| 2639 |  |  |  |  |  |  |  | 
| 2640 |  |  |  |  |  |  | Example: | 
| 2641 |  |  |  |  |  |  |  | 
| 2642 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 2643 |  |  |  |  |  |  |  | 
| 2644 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new() | 
| 2645 |  |  |  |  |  |  | print "Passed Date Checks\n" if ( $xmlconv->_DateCheck() == 0 ); | 
| 2646 |  |  |  |  |  |  | print "Failed Date Checks\n" if ( $xmlconv->_DateCheck() == -1 ); | 
| 2647 |  |  |  |  |  |  |  | 
| 2648 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 2649 |  |  |  |  |  |  |  | 
| 2650 |  |  |  |  |  |  | =head2 Accessor Functions | 
| 2651 |  |  |  |  |  |  |  | 
| 2652 |  |  |  |  |  |  | =head3 GetDebugLog | 
| 2653 |  |  |  |  |  |  |  | 
| 2654 |  |  |  |  |  |  | Description: | 
| 2655 |  |  |  |  |  |  |  | 
| 2656 |  |  |  |  |  |  | Returns the _debugLog member variable set during Word2vec::Xmltow2v object initialization of new function. | 
| 2657 |  |  |  |  |  |  |  | 
| 2658 |  |  |  |  |  |  | Input: | 
| 2659 |  |  |  |  |  |  |  | 
| 2660 |  |  |  |  |  |  | None | 
| 2661 |  |  |  |  |  |  |  | 
| 2662 |  |  |  |  |  |  | Output: | 
| 2663 |  |  |  |  |  |  |  | 
| 2664 |  |  |  |  |  |  | $value -> '0' = False, '1' = True | 
| 2665 |  |  |  |  |  |  |  | 
| 2666 |  |  |  |  |  |  | Example: | 
| 2667 |  |  |  |  |  |  |  | 
| 2668 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 2669 |  |  |  |  |  |  |  | 
| 2670 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new() | 
| 2671 |  |  |  |  |  |  | my $debugLog = $xmlconv->GetDebugLog(); | 
| 2672 |  |  |  |  |  |  |  | 
| 2673 |  |  |  |  |  |  | print( "Debug Logging Enabled\n" ) if $debugLog == 1; | 
| 2674 |  |  |  |  |  |  | print( "Debug Logging Disabled\n" ) if $debugLog == 0; | 
| 2675 |  |  |  |  |  |  |  | 
| 2676 |  |  |  |  |  |  |  | 
| 2677 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 2678 |  |  |  |  |  |  |  | 
| 2679 |  |  |  |  |  |  | =head3 GetWriteLog | 
| 2680 |  |  |  |  |  |  |  | 
| 2681 |  |  |  |  |  |  | Description: | 
| 2682 |  |  |  |  |  |  |  | 
| 2683 |  |  |  |  |  |  | Returns the _writeLog member variable set during Word2vec::Xmltow2v object initialization of new function. | 
| 2684 |  |  |  |  |  |  |  | 
| 2685 |  |  |  |  |  |  | Input: | 
| 2686 |  |  |  |  |  |  |  | 
| 2687 |  |  |  |  |  |  | None | 
| 2688 |  |  |  |  |  |  |  | 
| 2689 |  |  |  |  |  |  | Output: | 
| 2690 |  |  |  |  |  |  |  | 
| 2691 |  |  |  |  |  |  | $value -> '0' = False, '1' = True | 
| 2692 |  |  |  |  |  |  |  | 
| 2693 |  |  |  |  |  |  | Example: | 
| 2694 |  |  |  |  |  |  |  | 
| 2695 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 2696 |  |  |  |  |  |  |  | 
| 2697 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 2698 |  |  |  |  |  |  | my $writeLog = $xmlconv->GetWriteLog(); | 
| 2699 |  |  |  |  |  |  |  | 
| 2700 |  |  |  |  |  |  | print( "Write Logging Enabled\n" ) if $writeLog == 1; | 
| 2701 |  |  |  |  |  |  | print( "Write Logging Disabled\n" ) if $writeLog == 0; | 
| 2702 |  |  |  |  |  |  |  | 
| 2703 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 2704 |  |  |  |  |  |  |  | 
| 2705 |  |  |  |  |  |  | =head3 GetStoreTitle | 
| 2706 |  |  |  |  |  |  |  | 
| 2707 |  |  |  |  |  |  | Description: | 
| 2708 |  |  |  |  |  |  |  | 
| 2709 |  |  |  |  |  |  | Returns the _storeTitle member variable set during Word2vec::Xmltow2v object instantiation of new function. | 
| 2710 |  |  |  |  |  |  |  | 
| 2711 |  |  |  |  |  |  | Input: | 
| 2712 |  |  |  |  |  |  |  | 
| 2713 |  |  |  |  |  |  | None | 
| 2714 |  |  |  |  |  |  |  | 
| 2715 |  |  |  |  |  |  | Output: | 
| 2716 |  |  |  |  |  |  |  | 
| 2717 |  |  |  |  |  |  | $value -> '1' = True / '0' = False | 
| 2718 |  |  |  |  |  |  |  | 
| 2719 |  |  |  |  |  |  | Example: | 
| 2720 |  |  |  |  |  |  |  | 
| 2721 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 2722 |  |  |  |  |  |  |  | 
| 2723 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 2724 |  |  |  |  |  |  | my $storeTitle = $xmlconv->GetStoreTitle(); | 
| 2725 |  |  |  |  |  |  |  | 
| 2726 |  |  |  |  |  |  | print( "Store Title Option: Enabled\n" ) if $storeTitle == 1; | 
| 2727 |  |  |  |  |  |  | print( "Store Title Option: Disabled\n" ) if $storeTitle == 0; | 
| 2728 |  |  |  |  |  |  |  | 
| 2729 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 2730 |  |  |  |  |  |  |  | 
| 2731 |  |  |  |  |  |  | =head3 GetStoreAbstract | 
| 2732 |  |  |  |  |  |  |  | 
| 2733 |  |  |  |  |  |  | Description: | 
| 2734 |  |  |  |  |  |  |  | 
| 2735 |  |  |  |  |  |  | Returns the _storeAbstract member variable set during Word2vec::Xmltow2v object instantiation of new function. | 
| 2736 |  |  |  |  |  |  |  | 
| 2737 |  |  |  |  |  |  | Input: | 
| 2738 |  |  |  |  |  |  |  | 
| 2739 |  |  |  |  |  |  | None | 
| 2740 |  |  |  |  |  |  |  | 
| 2741 |  |  |  |  |  |  | Output: | 
| 2742 |  |  |  |  |  |  |  | 
| 2743 |  |  |  |  |  |  | $value -> '1' = True / '0' = False | 
| 2744 |  |  |  |  |  |  |  | 
| 2745 |  |  |  |  |  |  | Example: | 
| 2746 |  |  |  |  |  |  |  | 
| 2747 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 2748 |  |  |  |  |  |  |  | 
| 2749 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 2750 |  |  |  |  |  |  | my $storeAbstract = $xmlconv->GetStoreAbstract(); | 
| 2751 |  |  |  |  |  |  |  | 
| 2752 |  |  |  |  |  |  | print( "Store Abstract Option: Enabled\n" ) if $storeAbsract == 1; | 
| 2753 |  |  |  |  |  |  | print( "Store Abstract Option: Disabled\n" ) if $storeAbstract == 0; | 
| 2754 |  |  |  |  |  |  |  | 
| 2755 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 2756 |  |  |  |  |  |  |  | 
| 2757 |  |  |  |  |  |  | =head3 GetQuickParse | 
| 2758 |  |  |  |  |  |  |  | 
| 2759 |  |  |  |  |  |  | Description: | 
| 2760 |  |  |  |  |  |  |  | 
| 2761 |  |  |  |  |  |  | Returns the _quickParse member variable set during Word2vec::Xmltow2v object instantiation of new function. | 
| 2762 |  |  |  |  |  |  |  | 
| 2763 |  |  |  |  |  |  | Input: | 
| 2764 |  |  |  |  |  |  |  | 
| 2765 |  |  |  |  |  |  | None | 
| 2766 |  |  |  |  |  |  |  | 
| 2767 |  |  |  |  |  |  | Output: | 
| 2768 |  |  |  |  |  |  |  | 
| 2769 |  |  |  |  |  |  | $value -> '1' = True / '0' = False | 
| 2770 |  |  |  |  |  |  |  | 
| 2771 |  |  |  |  |  |  | Example: | 
| 2772 |  |  |  |  |  |  |  | 
| 2773 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 2774 |  |  |  |  |  |  |  | 
| 2775 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 2776 |  |  |  |  |  |  | my $quickParse = $xmlconv->GetQuickParse(); | 
| 2777 |  |  |  |  |  |  |  | 
| 2778 |  |  |  |  |  |  | print( "Quick Parse Option: Enabled\n" ) if $quickParse == 1; | 
| 2779 |  |  |  |  |  |  | print( "Quick Parse Option: Disabled\n" ) if $quickParse == 0; | 
| 2780 |  |  |  |  |  |  |  | 
| 2781 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 2782 |  |  |  |  |  |  |  | 
| 2783 |  |  |  |  |  |  | =head3 GetCompoundifyText | 
| 2784 |  |  |  |  |  |  |  | 
| 2785 |  |  |  |  |  |  | Description: | 
| 2786 |  |  |  |  |  |  |  | 
| 2787 |  |  |  |  |  |  | Returns the _compoundifyText member variable set during Word2vec::Xmltow2v object instantiation of new function. | 
| 2788 |  |  |  |  |  |  |  | 
| 2789 |  |  |  |  |  |  | Input: | 
| 2790 |  |  |  |  |  |  |  | 
| 2791 |  |  |  |  |  |  | None | 
| 2792 |  |  |  |  |  |  |  | 
| 2793 |  |  |  |  |  |  | Output: | 
| 2794 |  |  |  |  |  |  |  | 
| 2795 |  |  |  |  |  |  | $value -> '1' = True / '0' = False | 
| 2796 |  |  |  |  |  |  |  | 
| 2797 |  |  |  |  |  |  | Example: | 
| 2798 |  |  |  |  |  |  |  | 
| 2799 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 2800 |  |  |  |  |  |  |  | 
| 2801 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 2802 |  |  |  |  |  |  | my $compoundify = $xmlconv->GetCompoundifyText(); | 
| 2803 |  |  |  |  |  |  |  | 
| 2804 |  |  |  |  |  |  | print( "Compoundify Text Option: Enabled\n" ) if $compoundify == 1; | 
| 2805 |  |  |  |  |  |  | print( "Compoundify Text Option: Disabled\n" ) if $compoundify == 0; | 
| 2806 |  |  |  |  |  |  |  | 
| 2807 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 2808 |  |  |  |  |  |  |  | 
| 2809 |  |  |  |  |  |  | =head3 GetNumOfThreads | 
| 2810 |  |  |  |  |  |  |  | 
| 2811 |  |  |  |  |  |  | Description: | 
| 2812 |  |  |  |  |  |  |  | 
| 2813 |  |  |  |  |  |  | Returns the _numOfThreads member variable set during Word2vec::Xmltow2v object instantiation of new function. | 
| 2814 |  |  |  |  |  |  |  | 
| 2815 |  |  |  |  |  |  | Input: | 
| 2816 |  |  |  |  |  |  |  | 
| 2817 |  |  |  |  |  |  | None | 
| 2818 |  |  |  |  |  |  |  | 
| 2819 |  |  |  |  |  |  | Output: | 
| 2820 |  |  |  |  |  |  |  | 
| 2821 |  |  |  |  |  |  | $value -> Number of threads | 
| 2822 |  |  |  |  |  |  |  | 
| 2823 |  |  |  |  |  |  | Example: | 
| 2824 |  |  |  |  |  |  |  | 
| 2825 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 2826 |  |  |  |  |  |  |  | 
| 2827 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 2828 |  |  |  |  |  |  | my $numOfThreads = $xmlconv->GetNumOfThreads(); | 
| 2829 |  |  |  |  |  |  |  | 
| 2830 |  |  |  |  |  |  | print( "Number of threads: $numOfThreads\n" ); | 
| 2831 |  |  |  |  |  |  |  | 
| 2832 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 2833 |  |  |  |  |  |  |  | 
| 2834 |  |  |  |  |  |  | =head3 GetWorkingDir | 
| 2835 |  |  |  |  |  |  |  | 
| 2836 |  |  |  |  |  |  | Description: | 
| 2837 |  |  |  |  |  |  |  | 
| 2838 |  |  |  |  |  |  | Returns the _workingDir member variable set during Word2vec::Xmltow2v object instantiation of new function. | 
| 2839 |  |  |  |  |  |  |  | 
| 2840 |  |  |  |  |  |  | Input: | 
| 2841 |  |  |  |  |  |  |  | 
| 2842 |  |  |  |  |  |  | None | 
| 2843 |  |  |  |  |  |  |  | 
| 2844 |  |  |  |  |  |  | Output: | 
| 2845 |  |  |  |  |  |  |  | 
| 2846 |  |  |  |  |  |  | $string -> Working directory string | 
| 2847 |  |  |  |  |  |  |  | 
| 2848 |  |  |  |  |  |  | Example: | 
| 2849 |  |  |  |  |  |  |  | 
| 2850 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 2851 |  |  |  |  |  |  |  | 
| 2852 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 2853 |  |  |  |  |  |  | my $workingDirectory = $xmlconv->GetWorkingDir(); | 
| 2854 |  |  |  |  |  |  |  | 
| 2855 |  |  |  |  |  |  | print( "Working Directory: $workingDirectory\n" ); | 
| 2856 |  |  |  |  |  |  |  | 
| 2857 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 2858 |  |  |  |  |  |  |  | 
| 2859 |  |  |  |  |  |  | =head3 GetSavePath | 
| 2860 |  |  |  |  |  |  |  | 
| 2861 |  |  |  |  |  |  | Description: | 
| 2862 |  |  |  |  |  |  |  | 
| 2863 |  |  |  |  |  |  | Returns the _saveDir member variable set during Word2vec::Xmltow2v object instantiation of new function. | 
| 2864 |  |  |  |  |  |  |  | 
| 2865 |  |  |  |  |  |  | Input: | 
| 2866 |  |  |  |  |  |  |  | 
| 2867 |  |  |  |  |  |  | None | 
| 2868 |  |  |  |  |  |  |  | 
| 2869 |  |  |  |  |  |  | Output: | 
| 2870 |  |  |  |  |  |  |  | 
| 2871 |  |  |  |  |  |  | $string -> Save directory string | 
| 2872 |  |  |  |  |  |  |  | 
| 2873 |  |  |  |  |  |  | Example: | 
| 2874 |  |  |  |  |  |  |  | 
| 2875 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 2876 |  |  |  |  |  |  |  | 
| 2877 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 2878 |  |  |  |  |  |  | my $savePath = $xmlconv->GetSavePath(); | 
| 2879 |  |  |  |  |  |  |  | 
| 2880 |  |  |  |  |  |  | print( "Save Directory: $savePath\n" ); | 
| 2881 |  |  |  |  |  |  |  | 
| 2882 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 2883 |  |  |  |  |  |  |  | 
| 2884 |  |  |  |  |  |  | =head3 GetBeginDate | 
| 2885 |  |  |  |  |  |  |  | 
| 2886 |  |  |  |  |  |  | Description: | 
| 2887 |  |  |  |  |  |  |  | 
| 2888 |  |  |  |  |  |  | Returns the _beginDate member variable set during Word2vec::Xmltow2v object instantiation of new function. | 
| 2889 |  |  |  |  |  |  |  | 
| 2890 |  |  |  |  |  |  | Input: | 
| 2891 |  |  |  |  |  |  |  | 
| 2892 |  |  |  |  |  |  | None | 
| 2893 |  |  |  |  |  |  |  | 
| 2894 |  |  |  |  |  |  | Output: | 
| 2895 |  |  |  |  |  |  |  | 
| 2896 |  |  |  |  |  |  | $date -> Beginning date range - Format: XX/XX/XXXX (Mon/Day/Year) | 
| 2897 |  |  |  |  |  |  |  | 
| 2898 |  |  |  |  |  |  | Example: | 
| 2899 |  |  |  |  |  |  |  | 
| 2900 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 2901 |  |  |  |  |  |  |  | 
| 2902 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 2903 |  |  |  |  |  |  | my $date = $xmlconv->GetBeginDate(); | 
| 2904 |  |  |  |  |  |  |  | 
| 2905 |  |  |  |  |  |  | print( "Date: $date\n" ); | 
| 2906 |  |  |  |  |  |  |  | 
| 2907 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 2908 |  |  |  |  |  |  |  | 
| 2909 |  |  |  |  |  |  | =head3 GetEndDate | 
| 2910 |  |  |  |  |  |  |  | 
| 2911 |  |  |  |  |  |  | Description: | 
| 2912 |  |  |  |  |  |  |  | 
| 2913 |  |  |  |  |  |  | Returns the _endDate member variable set during Word2vec::Xmltow2v object instantiation of new function. | 
| 2914 |  |  |  |  |  |  |  | 
| 2915 |  |  |  |  |  |  | Input: | 
| 2916 |  |  |  |  |  |  |  | 
| 2917 |  |  |  |  |  |  | None | 
| 2918 |  |  |  |  |  |  |  | 
| 2919 |  |  |  |  |  |  | Output: | 
| 2920 |  |  |  |  |  |  |  | 
| 2921 |  |  |  |  |  |  | $date -> End date range - Format: XX/XX/XXXX (Mon/Day/Year). | 
| 2922 |  |  |  |  |  |  |  | 
| 2923 |  |  |  |  |  |  | Example: | 
| 2924 |  |  |  |  |  |  |  | 
| 2925 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 2926 |  |  |  |  |  |  |  | 
| 2927 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 2928 |  |  |  |  |  |  | my $date = $xmlconv->GetEndDate(); | 
| 2929 |  |  |  |  |  |  |  | 
| 2930 |  |  |  |  |  |  | print( "Date: $date\n" ); | 
| 2931 |  |  |  |  |  |  |  | 
| 2932 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 2933 |  |  |  |  |  |  |  | 
| 2934 |  |  |  |  |  |  | =head3 GetXMLStringToParse | 
| 2935 |  |  |  |  |  |  |  | 
| 2936 |  |  |  |  |  |  | Returns the XML data (string) to be parsed. | 
| 2937 |  |  |  |  |  |  |  | 
| 2938 |  |  |  |  |  |  | Description: | 
| 2939 |  |  |  |  |  |  |  | 
| 2940 |  |  |  |  |  |  | Returns the _xmlStringToParse member variable set during Word2vec::Xmltow2v object instantiation of new function. | 
| 2941 |  |  |  |  |  |  |  | 
| 2942 |  |  |  |  |  |  | Input: | 
| 2943 |  |  |  |  |  |  |  | 
| 2944 |  |  |  |  |  |  | None | 
| 2945 |  |  |  |  |  |  |  | 
| 2946 |  |  |  |  |  |  | Output: | 
| 2947 |  |  |  |  |  |  |  | 
| 2948 |  |  |  |  |  |  | $string -> Medline XML data string | 
| 2949 |  |  |  |  |  |  |  | 
| 2950 |  |  |  |  |  |  | Example: | 
| 2951 |  |  |  |  |  |  |  | 
| 2952 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 2953 |  |  |  |  |  |  |  | 
| 2954 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 2955 |  |  |  |  |  |  | my $xmlStr = $xmlconv->GetXMLStringToParse(); | 
| 2956 |  |  |  |  |  |  |  | 
| 2957 |  |  |  |  |  |  | print( "XML String: $xmlStr\n" ); | 
| 2958 |  |  |  |  |  |  |  | 
| 2959 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 2960 |  |  |  |  |  |  |  | 
| 2961 |  |  |  |  |  |  | =head3 GetTextCorpusStr | 
| 2962 |  |  |  |  |  |  |  | 
| 2963 |  |  |  |  |  |  | Description: | 
| 2964 |  |  |  |  |  |  |  | 
| 2965 |  |  |  |  |  |  | Returns the _textCorpusStr member variable set during Word2vec::Xmltow2v object instantiation of new function. | 
| 2966 |  |  |  |  |  |  |  | 
| 2967 |  |  |  |  |  |  | Input: | 
| 2968 |  |  |  |  |  |  |  | 
| 2969 |  |  |  |  |  |  | None | 
| 2970 |  |  |  |  |  |  |  | 
| 2971 |  |  |  |  |  |  | Output: | 
| 2972 |  |  |  |  |  |  |  | 
| 2973 |  |  |  |  |  |  | $string -> Text corpus string | 
| 2974 |  |  |  |  |  |  |  | 
| 2975 |  |  |  |  |  |  | Example: | 
| 2976 |  |  |  |  |  |  |  | 
| 2977 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 2978 |  |  |  |  |  |  |  | 
| 2979 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 2980 |  |  |  |  |  |  | my $str = $xmlconv->GetTextCorpusStr(); | 
| 2981 |  |  |  |  |  |  |  | 
| 2982 |  |  |  |  |  |  | print( "Text Corpus: $str\n" ); | 
| 2983 |  |  |  |  |  |  |  | 
| 2984 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 2985 |  |  |  |  |  |  |  | 
| 2986 |  |  |  |  |  |  | =head3 GetFileHandle | 
| 2987 |  |  |  |  |  |  |  | 
| 2988 |  |  |  |  |  |  | Description: | 
| 2989 |  |  |  |  |  |  |  | 
| 2990 |  |  |  |  |  |  | Returns the _fileHandle member variable set during Word2vec::Xmltow2v object instantiation of new function. | 
| 2991 |  |  |  |  |  |  |  | 
| 2992 |  |  |  |  |  |  | Warning: This is a private function. File handle is used by WriteLog() method. Do not manipulate this file handle as errors can result. | 
| 2993 |  |  |  |  |  |  |  | 
| 2994 |  |  |  |  |  |  | Input: | 
| 2995 |  |  |  |  |  |  |  | 
| 2996 |  |  |  |  |  |  | None | 
| 2997 |  |  |  |  |  |  |  | 
| 2998 |  |  |  |  |  |  | Output: | 
| 2999 |  |  |  |  |  |  |  | 
| 3000 |  |  |  |  |  |  | $fileHandle -> Returns file handle for WriteLog() method. | 
| 3001 |  |  |  |  |  |  |  | 
| 3002 |  |  |  |  |  |  | Example: | 
| 3003 |  |  |  |  |  |  |  | 
| 3004 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3005 |  |  |  |  |  |  |  | 
| 3006 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3007 |  |  |  |  |  |  | my $fileHandle = $xmlconv->GetFileHandle(); | 
| 3008 |  |  |  |  |  |  |  | 
| 3009 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3010 |  |  |  |  |  |  |  | 
| 3011 |  |  |  |  |  |  | =head3 GetTwigHandler | 
| 3012 |  |  |  |  |  |  |  | 
| 3013 |  |  |  |  |  |  | Returns XML::Twig handler. | 
| 3014 |  |  |  |  |  |  |  | 
| 3015 |  |  |  |  |  |  | Description: | 
| 3016 |  |  |  |  |  |  |  | 
| 3017 |  |  |  |  |  |  | Returns the _twigHandler member variable set during Word2vec::Xmltow2v object instantiation of new function. | 
| 3018 |  |  |  |  |  |  |  | 
| 3019 |  |  |  |  |  |  | Warning: This is a private function and should not be called or manipulated. | 
| 3020 |  |  |  |  |  |  |  | 
| 3021 |  |  |  |  |  |  | Input: | 
| 3022 |  |  |  |  |  |  |  | 
| 3023 |  |  |  |  |  |  | None | 
| 3024 |  |  |  |  |  |  |  | 
| 3025 |  |  |  |  |  |  | Output: | 
| 3026 |  |  |  |  |  |  |  | 
| 3027 |  |  |  |  |  |  | $twigHandler -> XML::Twig handler. | 
| 3028 |  |  |  |  |  |  |  | 
| 3029 |  |  |  |  |  |  | Example: | 
| 3030 |  |  |  |  |  |  |  | 
| 3031 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3032 |  |  |  |  |  |  |  | 
| 3033 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3034 |  |  |  |  |  |  | my $xmlHandler = $xmlconv->GetTwigHandler(); | 
| 3035 |  |  |  |  |  |  |  | 
| 3036 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3037 |  |  |  |  |  |  |  | 
| 3038 |  |  |  |  |  |  | =head3 GetParsedCount | 
| 3039 |  |  |  |  |  |  |  | 
| 3040 |  |  |  |  |  |  | Description: | 
| 3041 |  |  |  |  |  |  |  | 
| 3042 |  |  |  |  |  |  | Returns the _parsedCount member variable set during Word2vec::Xmltow2v object instantiation of new function. | 
| 3043 |  |  |  |  |  |  |  | 
| 3044 |  |  |  |  |  |  | Input: | 
| 3045 |  |  |  |  |  |  |  | 
| 3046 |  |  |  |  |  |  | None | 
| 3047 |  |  |  |  |  |  |  | 
| 3048 |  |  |  |  |  |  | Output: | 
| 3049 |  |  |  |  |  |  |  | 
| 3050 |  |  |  |  |  |  | $value -> Number of parsed Medline articles. | 
| 3051 |  |  |  |  |  |  |  | 
| 3052 |  |  |  |  |  |  | Example: | 
| 3053 |  |  |  |  |  |  |  | 
| 3054 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3055 |  |  |  |  |  |  |  | 
| 3056 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3057 |  |  |  |  |  |  | my $numOfParsed = $xmlconv->GetParsedCount(); | 
| 3058 |  |  |  |  |  |  |  | 
| 3059 |  |  |  |  |  |  | print( "Number of parsed Medline articles: $numOfParsed\n" ); | 
| 3060 |  |  |  |  |  |  |  | 
| 3061 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3062 |  |  |  |  |  |  |  | 
| 3063 |  |  |  |  |  |  | =head3 GetTempStr | 
| 3064 |  |  |  |  |  |  |  | 
| 3065 |  |  |  |  |  |  | Description: | 
| 3066 |  |  |  |  |  |  |  | 
| 3067 |  |  |  |  |  |  | Returns the _tempStr member variable set during Word2vec::Xmltow2v object instantiation of new function. | 
| 3068 |  |  |  |  |  |  |  | 
| 3069 |  |  |  |  |  |  | Warning: This is a private function and should not be called or manipulated. Used by module as a temporary storage | 
| 3070 |  |  |  |  |  |  | location for parsed Medline 'Title' and 'Abstract' flag string data. | 
| 3071 |  |  |  |  |  |  |  | 
| 3072 |  |  |  |  |  |  | Input: | 
| 3073 |  |  |  |  |  |  |  | 
| 3074 |  |  |  |  |  |  | None | 
| 3075 |  |  |  |  |  |  |  | 
| 3076 |  |  |  |  |  |  | Output: | 
| 3077 |  |  |  |  |  |  |  | 
| 3078 |  |  |  |  |  |  | $string -> Temporary string storage location. | 
| 3079 |  |  |  |  |  |  |  | 
| 3080 |  |  |  |  |  |  | Example: | 
| 3081 |  |  |  |  |  |  |  | 
| 3082 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3083 |  |  |  |  |  |  |  | 
| 3084 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3085 |  |  |  |  |  |  | my $tempStr = $xmlconv->GetTempStr(); | 
| 3086 |  |  |  |  |  |  |  | 
| 3087 |  |  |  |  |  |  | print( "Temp String: $tempStr\n" ); | 
| 3088 |  |  |  |  |  |  |  | 
| 3089 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3090 |  |  |  |  |  |  |  | 
| 3091 |  |  |  |  |  |  | =head3 GetTempDate | 
| 3092 |  |  |  |  |  |  |  | 
| 3093 |  |  |  |  |  |  | Description: | 
| 3094 |  |  |  |  |  |  |  | 
| 3095 |  |  |  |  |  |  | Returns the _tempDate member variable set during Word2vec::Xmltow2v object instantiation of new function. | 
| 3096 |  |  |  |  |  |  | Used by module as a temporary storage location for parsed Medline 'DateCreated' flag string data. | 
| 3097 |  |  |  |  |  |  |  | 
| 3098 |  |  |  |  |  |  | Input: | 
| 3099 |  |  |  |  |  |  |  | 
| 3100 |  |  |  |  |  |  | None | 
| 3101 |  |  |  |  |  |  |  | 
| 3102 |  |  |  |  |  |  | Output: | 
| 3103 |  |  |  |  |  |  |  | 
| 3104 |  |  |  |  |  |  | $date -> Date string - Format: XX/XX/XXXX (Mon/Day/Year). | 
| 3105 |  |  |  |  |  |  |  | 
| 3106 |  |  |  |  |  |  | Example: | 
| 3107 |  |  |  |  |  |  |  | 
| 3108 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3109 |  |  |  |  |  |  |  | 
| 3110 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3111 |  |  |  |  |  |  | my $date = $xmlconv->GetTempDate(); | 
| 3112 |  |  |  |  |  |  |  | 
| 3113 |  |  |  |  |  |  | print( "Temp Date: $date\n" ); | 
| 3114 |  |  |  |  |  |  |  | 
| 3115 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3116 |  |  |  |  |  |  |  | 
| 3117 |  |  |  |  |  |  | =head3 GetCompoundWordAry | 
| 3118 |  |  |  |  |  |  |  | 
| 3119 |  |  |  |  |  |  | Description: | 
| 3120 |  |  |  |  |  |  |  | 
| 3121 |  |  |  |  |  |  | Returns the _compoundWordAry member array reference set during Word2vec::Xmltow2v object instantiation of new function. | 
| 3122 |  |  |  |  |  |  |  | 
| 3123 |  |  |  |  |  |  | Warning: Compound word data must be loaded in memory first via ReadCompoundWordDataFromFile(). | 
| 3124 |  |  |  |  |  |  |  | 
| 3125 |  |  |  |  |  |  | Input: | 
| 3126 |  |  |  |  |  |  |  | 
| 3127 |  |  |  |  |  |  | None | 
| 3128 |  |  |  |  |  |  |  | 
| 3129 |  |  |  |  |  |  | Output: | 
| 3130 |  |  |  |  |  |  |  | 
| 3131 |  |  |  |  |  |  | $arrayReference -> Compound word array reference. | 
| 3132 |  |  |  |  |  |  |  | 
| 3133 |  |  |  |  |  |  | Example: | 
| 3134 |  |  |  |  |  |  |  | 
| 3135 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3136 |  |  |  |  |  |  |  | 
| 3137 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3138 |  |  |  |  |  |  | my $arrayReference = $xmlconv->GetCompoundWordAry(); | 
| 3139 |  |  |  |  |  |  | my @compoundWord = @{ $arrayReference }; | 
| 3140 |  |  |  |  |  |  |  | 
| 3141 |  |  |  |  |  |  | print( "Compound Word Array: @compoundWord\n" ); | 
| 3142 |  |  |  |  |  |  |  | 
| 3143 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3144 |  |  |  |  |  |  |  | 
| 3145 |  |  |  |  |  |  | =head3 GetCompoundWordBST | 
| 3146 |  |  |  |  |  |  |  | 
| 3147 |  |  |  |  |  |  | Description: | 
| 3148 |  |  |  |  |  |  |  | 
| 3149 |  |  |  |  |  |  | Returns the _compoundWordBST member variable set during Word2vec::Xmltow2v object instantiation of new function. | 
| 3150 |  |  |  |  |  |  |  | 
| 3151 |  |  |  |  |  |  | Input: | 
| 3152 |  |  |  |  |  |  |  | 
| 3153 |  |  |  |  |  |  | None | 
| 3154 |  |  |  |  |  |  |  | 
| 3155 |  |  |  |  |  |  | Output: | 
| 3156 |  |  |  |  |  |  |  | 
| 3157 |  |  |  |  |  |  | $bst -> Compound word binary search tree. | 
| 3158 |  |  |  |  |  |  |  | 
| 3159 |  |  |  |  |  |  | Example: | 
| 3160 |  |  |  |  |  |  |  | 
| 3161 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3162 |  |  |  |  |  |  |  | 
| 3163 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3164 |  |  |  |  |  |  | my $bst = $xmlconv->GetCompoundWordBST(); | 
| 3165 |  |  |  |  |  |  |  | 
| 3166 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3167 |  |  |  |  |  |  |  | 
| 3168 |  |  |  |  |  |  | =head3 GetMaxCompoundWordLength | 
| 3169 |  |  |  |  |  |  |  | 
| 3170 |  |  |  |  |  |  | Description: | 
| 3171 |  |  |  |  |  |  |  | 
| 3172 |  |  |  |  |  |  | Returns the _maxCompoundWordLength member variable set during Word2vec::Xmltow2v object instantiation of new function. | 
| 3173 |  |  |  |  |  |  |  | 
| 3174 |  |  |  |  |  |  | Note: If not defined, it is automatically set to and returns 20. | 
| 3175 |  |  |  |  |  |  |  | 
| 3176 |  |  |  |  |  |  | Input: | 
| 3177 |  |  |  |  |  |  |  | 
| 3178 |  |  |  |  |  |  | None | 
| 3179 |  |  |  |  |  |  |  | 
| 3180 |  |  |  |  |  |  | Output: | 
| 3181 |  |  |  |  |  |  |  | 
| 3182 |  |  |  |  |  |  | $value -> Maximum number of compound words in a given phrase. | 
| 3183 |  |  |  |  |  |  |  | 
| 3184 |  |  |  |  |  |  | Example: | 
| 3185 |  |  |  |  |  |  |  | 
| 3186 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3187 |  |  |  |  |  |  |  | 
| 3188 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3189 |  |  |  |  |  |  | my $compoundWordLength = $xmlconv->GetMaxCompoundWordLength(); | 
| 3190 |  |  |  |  |  |  |  | 
| 3191 |  |  |  |  |  |  | print( "Maximum Compound Word Length: $compoundWordLength\n" ); | 
| 3192 |  |  |  |  |  |  |  | 
| 3193 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3194 |  |  |  |  |  |  |  | 
| 3195 |  |  |  |  |  |  | =head3 GetOverwriteExistingFile | 
| 3196 |  |  |  |  |  |  |  | 
| 3197 |  |  |  |  |  |  | Description: | 
| 3198 |  |  |  |  |  |  |  | 
| 3199 |  |  |  |  |  |  | Returns the _overwriteExisitingFile member variable set during Word2vec::Xmltow2v object instantiation of new function. | 
| 3200 |  |  |  |  |  |  | Enables overwriting of existing text corpus if set to '1' or appends to the existing text corpus if set to '0'. | 
| 3201 |  |  |  |  |  |  |  | 
| 3202 |  |  |  |  |  |  | Input: | 
| 3203 |  |  |  |  |  |  |  | 
| 3204 |  |  |  |  |  |  | None | 
| 3205 |  |  |  |  |  |  |  | 
| 3206 |  |  |  |  |  |  | Output: | 
| 3207 |  |  |  |  |  |  |  | 
| 3208 |  |  |  |  |  |  | $value -> '1' = Overwrite existing file / '0' = Append to exiting file. | 
| 3209 |  |  |  |  |  |  |  | 
| 3210 |  |  |  |  |  |  | Example: | 
| 3211 |  |  |  |  |  |  |  | 
| 3212 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3213 |  |  |  |  |  |  |  | 
| 3214 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3215 |  |  |  |  |  |  | my $overwriteExitingFile = $xmlconv->GetOverwriteExistingFile(); | 
| 3216 |  |  |  |  |  |  |  | 
| 3217 |  |  |  |  |  |  | print( "Overwrite Existing File? YES\n" ) if ( $overwriteExistingFile == 1 ); | 
| 3218 |  |  |  |  |  |  | print( "Overwrite Existing File? NO\n" ) if ( $overwriteExistingFile == 0 ); | 
| 3219 |  |  |  |  |  |  |  | 
| 3220 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3221 |  |  |  |  |  |  |  | 
| 3222 |  |  |  |  |  |  | =head2 Mutator Functions | 
| 3223 |  |  |  |  |  |  |  | 
| 3224 |  |  |  |  |  |  | =head3 SetStoreTitle | 
| 3225 |  |  |  |  |  |  |  | 
| 3226 |  |  |  |  |  |  | Description: | 
| 3227 |  |  |  |  |  |  |  | 
| 3228 |  |  |  |  |  |  | Sets member variable to passed integer parameter. Instructs module to store article title if true or omit if false. | 
| 3229 |  |  |  |  |  |  |  | 
| 3230 |  |  |  |  |  |  | Input: | 
| 3231 |  |  |  |  |  |  |  | 
| 3232 |  |  |  |  |  |  | $value -> '1' = Store Titles / '0' = Omit Titles | 
| 3233 |  |  |  |  |  |  |  | 
| 3234 |  |  |  |  |  |  | Ouput: | 
| 3235 |  |  |  |  |  |  |  | 
| 3236 |  |  |  |  |  |  | None | 
| 3237 |  |  |  |  |  |  |  | 
| 3238 |  |  |  |  |  |  | Example: | 
| 3239 |  |  |  |  |  |  |  | 
| 3240 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3241 |  |  |  |  |  |  |  | 
| 3242 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3243 |  |  |  |  |  |  | $xmlconv->SetStoreTitle( 1 ); | 
| 3244 |  |  |  |  |  |  |  | 
| 3245 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3246 |  |  |  |  |  |  |  | 
| 3247 |  |  |  |  |  |  | =head3 SetStoreAbstract | 
| 3248 |  |  |  |  |  |  |  | 
| 3249 |  |  |  |  |  |  | Description: | 
| 3250 |  |  |  |  |  |  |  | 
| 3251 |  |  |  |  |  |  | Sets member variable to passed integer parameter. Instructs module to store article abstracts if true or omit if false. | 
| 3252 |  |  |  |  |  |  |  | 
| 3253 |  |  |  |  |  |  | Input: | 
| 3254 |  |  |  |  |  |  |  | 
| 3255 |  |  |  |  |  |  | $value -> '1' = Store Abstracts / '0' = Omit Abstracts | 
| 3256 |  |  |  |  |  |  |  | 
| 3257 |  |  |  |  |  |  | Ouput: | 
| 3258 |  |  |  |  |  |  |  | 
| 3259 |  |  |  |  |  |  | None | 
| 3260 |  |  |  |  |  |  |  | 
| 3261 |  |  |  |  |  |  | Example: | 
| 3262 |  |  |  |  |  |  |  | 
| 3263 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3264 |  |  |  |  |  |  |  | 
| 3265 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3266 |  |  |  |  |  |  | $xmlconv->SetStoreAbstract( 1 ); | 
| 3267 |  |  |  |  |  |  |  | 
| 3268 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3269 |  |  |  |  |  |  |  | 
| 3270 |  |  |  |  |  |  | =head3 SetWorkingDir | 
| 3271 |  |  |  |  |  |  |  | 
| 3272 |  |  |  |  |  |  | Description: | 
| 3273 |  |  |  |  |  |  |  | 
| 3274 |  |  |  |  |  |  | Sets member variable to passed string parameter. Represents the working directory. | 
| 3275 |  |  |  |  |  |  |  | 
| 3276 |  |  |  |  |  |  | Input: | 
| 3277 |  |  |  |  |  |  |  | 
| 3278 |  |  |  |  |  |  | $string -> Working directory string | 
| 3279 |  |  |  |  |  |  |  | 
| 3280 |  |  |  |  |  |  | Ouput: | 
| 3281 |  |  |  |  |  |  |  | 
| 3282 |  |  |  |  |  |  | None | 
| 3283 |  |  |  |  |  |  |  | 
| 3284 |  |  |  |  |  |  | Example: | 
| 3285 |  |  |  |  |  |  |  | 
| 3286 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3287 |  |  |  |  |  |  |  | 
| 3288 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3289 |  |  |  |  |  |  | $xmlconv->SetWorkingDir( "/samples/" ); | 
| 3290 |  |  |  |  |  |  |  | 
| 3291 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3292 |  |  |  |  |  |  |  | 
| 3293 |  |  |  |  |  |  | =head3 SetSavePath | 
| 3294 |  |  |  |  |  |  |  | 
| 3295 |  |  |  |  |  |  | Description: | 
| 3296 |  |  |  |  |  |  |  | 
| 3297 |  |  |  |  |  |  | Sets member variable to passed integer parameter. Represents the text corpus save path. | 
| 3298 |  |  |  |  |  |  |  | 
| 3299 |  |  |  |  |  |  | Input: | 
| 3300 |  |  |  |  |  |  |  | 
| 3301 |  |  |  |  |  |  | $string -> Text corpus save path | 
| 3302 |  |  |  |  |  |  |  | 
| 3303 |  |  |  |  |  |  | Output: | 
| 3304 |  |  |  |  |  |  |  | 
| 3305 |  |  |  |  |  |  | None | 
| 3306 |  |  |  |  |  |  |  | 
| 3307 |  |  |  |  |  |  | Example: | 
| 3308 |  |  |  |  |  |  |  | 
| 3309 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3310 |  |  |  |  |  |  |  | 
| 3311 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3312 |  |  |  |  |  |  | $xmlconv->SetSavePath( "samples/textcorpus.txt" ); | 
| 3313 |  |  |  |  |  |  |  | 
| 3314 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3315 |  |  |  |  |  |  |  | 
| 3316 |  |  |  |  |  |  | =head3 SetQuickParse | 
| 3317 |  |  |  |  |  |  |  | 
| 3318 |  |  |  |  |  |  | Description: | 
| 3319 |  |  |  |  |  |  |  | 
| 3320 |  |  |  |  |  |  | Sets member variable to passed integer parameter. Instructs module to utilize quick parse | 
| 3321 |  |  |  |  |  |  | routines to speed up text corpus compilation. This method is somewhat less accurate due to its non-exhaustive nature. | 
| 3322 |  |  |  |  |  |  |  | 
| 3323 |  |  |  |  |  |  | Input: | 
| 3324 |  |  |  |  |  |  |  | 
| 3325 |  |  |  |  |  |  | $value -> '1' = Enable Quick Parse / '0' = Disable Quick Parse | 
| 3326 |  |  |  |  |  |  |  | 
| 3327 |  |  |  |  |  |  | Ouput: | 
| 3328 |  |  |  |  |  |  |  | 
| 3329 |  |  |  |  |  |  | None | 
| 3330 |  |  |  |  |  |  |  | 
| 3331 |  |  |  |  |  |  | Example: | 
| 3332 |  |  |  |  |  |  |  | 
| 3333 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3334 |  |  |  |  |  |  |  | 
| 3335 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3336 |  |  |  |  |  |  | $xmlconv->SetQuickParse( 1 ); | 
| 3337 |  |  |  |  |  |  |  | 
| 3338 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3339 |  |  |  |  |  |  |  | 
| 3340 |  |  |  |  |  |  | =head3 SetCompoundifyText | 
| 3341 |  |  |  |  |  |  |  | 
| 3342 |  |  |  |  |  |  | Description: | 
| 3343 |  |  |  |  |  |  |  | 
| 3344 |  |  |  |  |  |  | Sets member variable to passed integer parameter. Instructs module to utilize 'compoundify' option if true. | 
| 3345 |  |  |  |  |  |  |  | 
| 3346 |  |  |  |  |  |  | Warning: This requires compound word data to be loaded into memory with ReadCompoundWordDataFromFile() method prior | 
| 3347 |  |  |  |  |  |  | to executing text corpus compilation. | 
| 3348 |  |  |  |  |  |  |  | 
| 3349 |  |  |  |  |  |  | Input: | 
| 3350 |  |  |  |  |  |  |  | 
| 3351 |  |  |  |  |  |  | $value -> '1' = Compoundify text / '0' = Do not compoundify text | 
| 3352 |  |  |  |  |  |  |  | 
| 3353 |  |  |  |  |  |  | Ouput: | 
| 3354 |  |  |  |  |  |  |  | 
| 3355 |  |  |  |  |  |  | None | 
| 3356 |  |  |  |  |  |  |  | 
| 3357 |  |  |  |  |  |  | Example: | 
| 3358 |  |  |  |  |  |  |  | 
| 3359 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3360 |  |  |  |  |  |  |  | 
| 3361 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3362 |  |  |  |  |  |  | $xmlconv->SetCompoundifyText( 1 ); | 
| 3363 |  |  |  |  |  |  |  | 
| 3364 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3365 |  |  |  |  |  |  |  | 
| 3366 |  |  |  |  |  |  | =head3 SetNumOfThreads | 
| 3367 |  |  |  |  |  |  |  | 
| 3368 |  |  |  |  |  |  | Description: | 
| 3369 |  |  |  |  |  |  |  | 
| 3370 |  |  |  |  |  |  | Sets member variable to passed integer parameter. Sets the requested number of threads to parse Medline XML files | 
| 3371 |  |  |  |  |  |  | and compile the text corpus. | 
| 3372 |  |  |  |  |  |  |  | 
| 3373 |  |  |  |  |  |  | Input: | 
| 3374 |  |  |  |  |  |  |  | 
| 3375 |  |  |  |  |  |  | $value -> Integer (Positive value) | 
| 3376 |  |  |  |  |  |  |  | 
| 3377 |  |  |  |  |  |  | Ouput: | 
| 3378 |  |  |  |  |  |  |  | 
| 3379 |  |  |  |  |  |  | None | 
| 3380 |  |  |  |  |  |  |  | 
| 3381 |  |  |  |  |  |  | Example: | 
| 3382 |  |  |  |  |  |  |  | 
| 3383 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3384 |  |  |  |  |  |  |  | 
| 3385 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3386 |  |  |  |  |  |  | $xmlconv->SetNumOfThreads( 4 ); | 
| 3387 |  |  |  |  |  |  |  | 
| 3388 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3389 |  |  |  |  |  |  |  | 
| 3390 |  |  |  |  |  |  | =head3 SetBeginDate | 
| 3391 |  |  |  |  |  |  |  | 
| 3392 |  |  |  |  |  |  | Description: | 
| 3393 |  |  |  |  |  |  |  | 
| 3394 |  |  |  |  |  |  | Sets member variable to passed string parameter. Sets beginning date range for earliest articles to store, by | 
| 3395 |  |  |  |  |  |  | 'DateCreated' Medline tag, within the text corpus during compilation. | 
| 3396 |  |  |  |  |  |  |  | 
| 3397 |  |  |  |  |  |  | Note: Expected format - "XX/XX/XXXX" (Mon/Day/Year) | 
| 3398 |  |  |  |  |  |  |  | 
| 3399 |  |  |  |  |  |  | Input: | 
| 3400 |  |  |  |  |  |  |  | 
| 3401 |  |  |  |  |  |  | $string -> Date string - Format: "XX/XX/XXXX" | 
| 3402 |  |  |  |  |  |  |  | 
| 3403 |  |  |  |  |  |  | Ouput: | 
| 3404 |  |  |  |  |  |  |  | 
| 3405 |  |  |  |  |  |  | None | 
| 3406 |  |  |  |  |  |  |  | 
| 3407 |  |  |  |  |  |  | Example: | 
| 3408 |  |  |  |  |  |  |  | 
| 3409 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3410 |  |  |  |  |  |  |  | 
| 3411 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3412 |  |  |  |  |  |  | $xmlconv->SetBeginDate( "01/01/2004" ); | 
| 3413 |  |  |  |  |  |  |  | 
| 3414 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3415 |  |  |  |  |  |  |  | 
| 3416 |  |  |  |  |  |  | =head3 SetEndDate | 
| 3417 |  |  |  |  |  |  |  | 
| 3418 |  |  |  |  |  |  | Description: | 
| 3419 |  |  |  |  |  |  |  | 
| 3420 |  |  |  |  |  |  | Sets member variable to passed string parameter. Sets ending date range for latest article to store, by | 
| 3421 |  |  |  |  |  |  | 'DateCreated' Medline tag, within the text corpus during compilation. | 
| 3422 |  |  |  |  |  |  |  | 
| 3423 |  |  |  |  |  |  | Note: Expected format - "XX/XX/XXXX" (Mon/Day/Year) | 
| 3424 |  |  |  |  |  |  |  | 
| 3425 |  |  |  |  |  |  | Input: | 
| 3426 |  |  |  |  |  |  |  | 
| 3427 |  |  |  |  |  |  | $string -> Date string - Format: "XX/XX/XXXX" | 
| 3428 |  |  |  |  |  |  |  | 
| 3429 |  |  |  |  |  |  | Ouput: | 
| 3430 |  |  |  |  |  |  |  | 
| 3431 |  |  |  |  |  |  | None | 
| 3432 |  |  |  |  |  |  |  | 
| 3433 |  |  |  |  |  |  | Example: | 
| 3434 |  |  |  |  |  |  |  | 
| 3435 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3436 |  |  |  |  |  |  |  | 
| 3437 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3438 |  |  |  |  |  |  | $xmlconv->SetEndDate( "08/13/2016" ); | 
| 3439 |  |  |  |  |  |  |  | 
| 3440 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3441 |  |  |  |  |  |  |  | 
| 3442 |  |  |  |  |  |  | =head3 SetXMLStringToParse | 
| 3443 |  |  |  |  |  |  |  | 
| 3444 |  |  |  |  |  |  | Description: | 
| 3445 |  |  |  |  |  |  |  | 
| 3446 |  |  |  |  |  |  | Sets member variable to passed string parameter. This string normally consists of Medline XML data to be | 
| 3447 |  |  |  |  |  |  | parsed for text corpus compilation. | 
| 3448 |  |  |  |  |  |  |  | 
| 3449 |  |  |  |  |  |  | Warning: This is a private function and should not be called or manipulated. | 
| 3450 |  |  |  |  |  |  |  | 
| 3451 |  |  |  |  |  |  | Input: | 
| 3452 |  |  |  |  |  |  |  | 
| 3453 |  |  |  |  |  |  | $string -> String | 
| 3454 |  |  |  |  |  |  |  | 
| 3455 |  |  |  |  |  |  | Ouput: | 
| 3456 |  |  |  |  |  |  |  | 
| 3457 |  |  |  |  |  |  | None | 
| 3458 |  |  |  |  |  |  |  | 
| 3459 |  |  |  |  |  |  | Example: | 
| 3460 |  |  |  |  |  |  |  | 
| 3461 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3462 |  |  |  |  |  |  |  | 
| 3463 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3464 |  |  |  |  |  |  | $xmlconv->SetXMLStringToParse( "Hello World!" ); | 
| 3465 |  |  |  |  |  |  |  | 
| 3466 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3467 |  |  |  |  |  |  |  | 
| 3468 |  |  |  |  |  |  | =head3 SetTextCorpusStr | 
| 3469 |  |  |  |  |  |  |  | 
| 3470 |  |  |  |  |  |  | Description: | 
| 3471 |  |  |  |  |  |  |  | 
| 3472 |  |  |  |  |  |  | Sets member variable to passed string parameter. Overwrites any stored text corpus data in memory to the string parameter. | 
| 3473 |  |  |  |  |  |  |  | 
| 3474 |  |  |  |  |  |  | Warning: This is a private function and should not be called or manipulated. | 
| 3475 |  |  |  |  |  |  |  | 
| 3476 |  |  |  |  |  |  | Input: | 
| 3477 |  |  |  |  |  |  |  | 
| 3478 |  |  |  |  |  |  | $string -> String | 
| 3479 |  |  |  |  |  |  |  | 
| 3480 |  |  |  |  |  |  | Ouput: | 
| 3481 |  |  |  |  |  |  |  | 
| 3482 |  |  |  |  |  |  | None | 
| 3483 |  |  |  |  |  |  |  | 
| 3484 |  |  |  |  |  |  | Example: | 
| 3485 |  |  |  |  |  |  |  | 
| 3486 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3487 |  |  |  |  |  |  |  | 
| 3488 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3489 |  |  |  |  |  |  | $xmlconv->SetTextCorpusStr( "Hello World!" ); | 
| 3490 |  |  |  |  |  |  |  | 
| 3491 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3492 |  |  |  |  |  |  |  | 
| 3493 |  |  |  |  |  |  | =head3 AppendStrToTextCorpus | 
| 3494 |  |  |  |  |  |  |  | 
| 3495 |  |  |  |  |  |  | Description: | 
| 3496 |  |  |  |  |  |  |  | 
| 3497 |  |  |  |  |  |  | Sets member variable to passed string parameter. Appends string parameter to text corpus string in memory. | 
| 3498 |  |  |  |  |  |  |  | 
| 3499 |  |  |  |  |  |  | Warning: This is a private function and should not be called or manipulated. | 
| 3500 |  |  |  |  |  |  |  | 
| 3501 |  |  |  |  |  |  | Input: | 
| 3502 |  |  |  |  |  |  |  | 
| 3503 |  |  |  |  |  |  | $string -> String | 
| 3504 |  |  |  |  |  |  |  | 
| 3505 |  |  |  |  |  |  | Ouput: | 
| 3506 |  |  |  |  |  |  |  | 
| 3507 |  |  |  |  |  |  | None | 
| 3508 |  |  |  |  |  |  |  | 
| 3509 |  |  |  |  |  |  | Example: | 
| 3510 |  |  |  |  |  |  |  | 
| 3511 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3512 |  |  |  |  |  |  |  | 
| 3513 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3514 |  |  |  |  |  |  | $xmlconv->AppendStrToTextCorpus( "Hello World!" ); | 
| 3515 |  |  |  |  |  |  |  | 
| 3516 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3517 |  |  |  |  |  |  |  | 
| 3518 |  |  |  |  |  |  | =head3 ClearTextCorpus | 
| 3519 |  |  |  |  |  |  |  | 
| 3520 |  |  |  |  |  |  | Description: | 
| 3521 |  |  |  |  |  |  |  | 
| 3522 |  |  |  |  |  |  | Clears text corpus data in memory. | 
| 3523 |  |  |  |  |  |  |  | 
| 3524 |  |  |  |  |  |  | Warning: This is a private function and should not be called or manipulated. | 
| 3525 |  |  |  |  |  |  |  | 
| 3526 |  |  |  |  |  |  | Input: | 
| 3527 |  |  |  |  |  |  |  | 
| 3528 |  |  |  |  |  |  | None | 
| 3529 |  |  |  |  |  |  |  | 
| 3530 |  |  |  |  |  |  | Ouput: | 
| 3531 |  |  |  |  |  |  |  | 
| 3532 |  |  |  |  |  |  | None | 
| 3533 |  |  |  |  |  |  |  | 
| 3534 |  |  |  |  |  |  | Example: | 
| 3535 |  |  |  |  |  |  |  | 
| 3536 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3537 |  |  |  |  |  |  |  | 
| 3538 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3539 |  |  |  |  |  |  | $xmlconv->ClearTextCorpus(); | 
| 3540 |  |  |  |  |  |  |  | 
| 3541 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3542 |  |  |  |  |  |  |  | 
| 3543 |  |  |  |  |  |  | =head3 SetTempStr | 
| 3544 |  |  |  |  |  |  |  | 
| 3545 |  |  |  |  |  |  | Description: | 
| 3546 |  |  |  |  |  |  |  | 
| 3547 |  |  |  |  |  |  | Sets member variable to passed string parameter. Sets temporary member string to passed string parameter. | 
| 3548 |  |  |  |  |  |  | (Temporary placeholder for Medline Title and Abstract data). | 
| 3549 |  |  |  |  |  |  |  | 
| 3550 |  |  |  |  |  |  | Note: This removes special characters and converts all characters to lowercase. | 
| 3551 |  |  |  |  |  |  |  | 
| 3552 |  |  |  |  |  |  | Warning: This is a private function and should not be called or manipulated. | 
| 3553 |  |  |  |  |  |  |  | 
| 3554 |  |  |  |  |  |  | Input: | 
| 3555 |  |  |  |  |  |  |  | 
| 3556 |  |  |  |  |  |  | $string -> String | 
| 3557 |  |  |  |  |  |  |  | 
| 3558 |  |  |  |  |  |  | Ouput: | 
| 3559 |  |  |  |  |  |  |  | 
| 3560 |  |  |  |  |  |  | None | 
| 3561 |  |  |  |  |  |  |  | 
| 3562 |  |  |  |  |  |  | Example: | 
| 3563 |  |  |  |  |  |  |  | 
| 3564 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3565 |  |  |  |  |  |  |  | 
| 3566 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3567 |  |  |  |  |  |  | $xmlconv->SetTempStr( "Hello World!" ); | 
| 3568 |  |  |  |  |  |  |  | 
| 3569 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3570 |  |  |  |  |  |  |  | 
| 3571 |  |  |  |  |  |  | =head3 AppendToTempStr | 
| 3572 |  |  |  |  |  |  |  | 
| 3573 |  |  |  |  |  |  | Description: | 
| 3574 |  |  |  |  |  |  |  | 
| 3575 |  |  |  |  |  |  | Appends string parameter to temporary member string in memory. | 
| 3576 |  |  |  |  |  |  |  | 
| 3577 |  |  |  |  |  |  | Note: This removes special characters and converts all characters to lowercase. | 
| 3578 |  |  |  |  |  |  |  | 
| 3579 |  |  |  |  |  |  | Warning: This is a private function and should not be called or manipulated. | 
| 3580 |  |  |  |  |  |  |  | 
| 3581 |  |  |  |  |  |  | Input: | 
| 3582 |  |  |  |  |  |  |  | 
| 3583 |  |  |  |  |  |  | $string -> String | 
| 3584 |  |  |  |  |  |  |  | 
| 3585 |  |  |  |  |  |  | Ouput: | 
| 3586 |  |  |  |  |  |  |  | 
| 3587 |  |  |  |  |  |  | None | 
| 3588 |  |  |  |  |  |  |  | 
| 3589 |  |  |  |  |  |  | Example: | 
| 3590 |  |  |  |  |  |  |  | 
| 3591 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3592 |  |  |  |  |  |  |  | 
| 3593 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3594 |  |  |  |  |  |  | $xmlconv->AppendToTempStr( "Hello World!" ); | 
| 3595 |  |  |  |  |  |  |  | 
| 3596 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3597 |  |  |  |  |  |  |  | 
| 3598 |  |  |  |  |  |  | =head3 ClearTempStr | 
| 3599 |  |  |  |  |  |  |  | 
| 3600 |  |  |  |  |  |  | Clears the temporary string storage in memory. | 
| 3601 |  |  |  |  |  |  |  | 
| 3602 |  |  |  |  |  |  | Warning: This is a private function and should not be called or manipulated. | 
| 3603 |  |  |  |  |  |  |  | 
| 3604 |  |  |  |  |  |  | Input: | 
| 3605 |  |  |  |  |  |  |  | 
| 3606 |  |  |  |  |  |  | None | 
| 3607 |  |  |  |  |  |  |  | 
| 3608 |  |  |  |  |  |  | Ouput: | 
| 3609 |  |  |  |  |  |  |  | 
| 3610 |  |  |  |  |  |  | None | 
| 3611 |  |  |  |  |  |  |  | 
| 3612 |  |  |  |  |  |  | Example: | 
| 3613 |  |  |  |  |  |  |  | 
| 3614 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3615 |  |  |  |  |  |  |  | 
| 3616 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3617 |  |  |  |  |  |  | $xmlconv->ClearTempStr(); | 
| 3618 |  |  |  |  |  |  |  | 
| 3619 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3620 |  |  |  |  |  |  |  | 
| 3621 |  |  |  |  |  |  | =head3 SetTempDate | 
| 3622 |  |  |  |  |  |  |  | 
| 3623 |  |  |  |  |  |  | Description: | 
| 3624 |  |  |  |  |  |  |  | 
| 3625 |  |  |  |  |  |  | Sets member variable to passed string parameter. Sets temporary date string to passed string. | 
| 3626 |  |  |  |  |  |  |  | 
| 3627 |  |  |  |  |  |  | Note: Date Format - "XX/XX/XXXX" (Mon/Day/Year) | 
| 3628 |  |  |  |  |  |  |  | 
| 3629 |  |  |  |  |  |  | Warning: This is a private function and should not be called or manipulated. | 
| 3630 |  |  |  |  |  |  |  | 
| 3631 |  |  |  |  |  |  | Input: | 
| 3632 |  |  |  |  |  |  |  | 
| 3633 |  |  |  |  |  |  | $string -> Date string - Format: "XX/XX/XXXX" | 
| 3634 |  |  |  |  |  |  |  | 
| 3635 |  |  |  |  |  |  | Ouput: | 
| 3636 |  |  |  |  |  |  |  | 
| 3637 |  |  |  |  |  |  | None | 
| 3638 |  |  |  |  |  |  |  | 
| 3639 |  |  |  |  |  |  | Example: | 
| 3640 |  |  |  |  |  |  |  | 
| 3641 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3642 |  |  |  |  |  |  |  | 
| 3643 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3644 |  |  |  |  |  |  | $xmlconv->SetTempDate( "08/13/2016" ); | 
| 3645 |  |  |  |  |  |  |  | 
| 3646 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3647 |  |  |  |  |  |  |  | 
| 3648 |  |  |  |  |  |  | =head3 ClearTempDate | 
| 3649 |  |  |  |  |  |  |  | 
| 3650 |  |  |  |  |  |  | Description: | 
| 3651 |  |  |  |  |  |  |  | 
| 3652 |  |  |  |  |  |  | Clears the temporary date storage location in memory. | 
| 3653 |  |  |  |  |  |  |  | 
| 3654 |  |  |  |  |  |  | Warning: This is a private function and should not be called or manipulated. | 
| 3655 |  |  |  |  |  |  |  | 
| 3656 |  |  |  |  |  |  | Input: | 
| 3657 |  |  |  |  |  |  |  | 
| 3658 |  |  |  |  |  |  | None | 
| 3659 |  |  |  |  |  |  |  | 
| 3660 |  |  |  |  |  |  | Ouput: | 
| 3661 |  |  |  |  |  |  |  | 
| 3662 |  |  |  |  |  |  | None | 
| 3663 |  |  |  |  |  |  |  | 
| 3664 |  |  |  |  |  |  | Example: | 
| 3665 |  |  |  |  |  |  |  | 
| 3666 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3667 |  |  |  |  |  |  |  | 
| 3668 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3669 |  |  |  |  |  |  | $xmlconv->ClearTempDate(); | 
| 3670 |  |  |  |  |  |  |  | 
| 3671 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3672 |  |  |  |  |  |  |  | 
| 3673 |  |  |  |  |  |  | =head3 SetCompoundWordAry | 
| 3674 |  |  |  |  |  |  |  | 
| 3675 |  |  |  |  |  |  | Description: | 
| 3676 |  |  |  |  |  |  |  | 
| 3677 |  |  |  |  |  |  | Sets member variable to de-referenced passed array reference parameter. Stores compound word array by | 
| 3678 |  |  |  |  |  |  | de-referencing array reference parameter. | 
| 3679 |  |  |  |  |  |  |  | 
| 3680 |  |  |  |  |  |  | Note: Clears previous data if existing. | 
| 3681 |  |  |  |  |  |  |  | 
| 3682 |  |  |  |  |  |  | Warning: This is a private function and should not be called or manipulated. | 
| 3683 |  |  |  |  |  |  |  | 
| 3684 |  |  |  |  |  |  | Input: | 
| 3685 |  |  |  |  |  |  |  | 
| 3686 |  |  |  |  |  |  | $arrayReference -> Array reference of compound words | 
| 3687 |  |  |  |  |  |  |  | 
| 3688 |  |  |  |  |  |  | Ouput: | 
| 3689 |  |  |  |  |  |  |  | 
| 3690 |  |  |  |  |  |  | None | 
| 3691 |  |  |  |  |  |  |  | 
| 3692 |  |  |  |  |  |  | Example: | 
| 3693 |  |  |  |  |  |  |  | 
| 3694 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3695 |  |  |  |  |  |  |  | 
| 3696 |  |  |  |  |  |  | my @compoundWordAry = ( "big dog", "respiratory failure", "seven large masses" ); | 
| 3697 |  |  |  |  |  |  |  | 
| 3698 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3699 |  |  |  |  |  |  | $xmlconv->SetCompoundWordAry( \@compoundWordAry ); | 
| 3700 |  |  |  |  |  |  |  | 
| 3701 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3702 |  |  |  |  |  |  |  | 
| 3703 |  |  |  |  |  |  | =head3 ClearCompoundWordAry | 
| 3704 |  |  |  |  |  |  |  | 
| 3705 |  |  |  |  |  |  | Description: | 
| 3706 |  |  |  |  |  |  |  | 
| 3707 |  |  |  |  |  |  | Clears compound word array in memory. | 
| 3708 |  |  |  |  |  |  |  | 
| 3709 |  |  |  |  |  |  | Warning: This is a private function and should not be called or manipulated. | 
| 3710 |  |  |  |  |  |  |  | 
| 3711 |  |  |  |  |  |  | Input: | 
| 3712 |  |  |  |  |  |  |  | 
| 3713 |  |  |  |  |  |  | None | 
| 3714 |  |  |  |  |  |  |  | 
| 3715 |  |  |  |  |  |  | Ouput: | 
| 3716 |  |  |  |  |  |  |  | 
| 3717 |  |  |  |  |  |  | None | 
| 3718 |  |  |  |  |  |  |  | 
| 3719 |  |  |  |  |  |  | Example: | 
| 3720 |  |  |  |  |  |  |  | 
| 3721 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3722 |  |  |  |  |  |  |  | 
| 3723 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3724 |  |  |  |  |  |  | $xmlconv->ClearCompoundWordAry(); | 
| 3725 |  |  |  |  |  |  |  | 
| 3726 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3727 |  |  |  |  |  |  |  | 
| 3728 |  |  |  |  |  |  | =head3 SetCompoundWordBST | 
| 3729 |  |  |  |  |  |  |  | 
| 3730 |  |  |  |  |  |  | Description: | 
| 3731 |  |  |  |  |  |  |  | 
| 3732 |  |  |  |  |  |  | Sets member variable to passed Word2vec::Bst parameter. Sets compound word binary search tree to passed binary tree parameter. | 
| 3733 |  |  |  |  |  |  |  | 
| 3734 |  |  |  |  |  |  | Note: Un-defines previous binary tree if existing. | 
| 3735 |  |  |  |  |  |  |  | 
| 3736 |  |  |  |  |  |  | Warning: This is a private function and should not be called or manipulated. | 
| 3737 |  |  |  |  |  |  |  | 
| 3738 |  |  |  |  |  |  | Input: | 
| 3739 |  |  |  |  |  |  |  | 
| 3740 |  |  |  |  |  |  | Word2vec::Bst -> Binary Search Tree | 
| 3741 |  |  |  |  |  |  |  | 
| 3742 |  |  |  |  |  |  | Ouput: | 
| 3743 |  |  |  |  |  |  |  | 
| 3744 |  |  |  |  |  |  | None | 
| 3745 |  |  |  |  |  |  |  | 
| 3746 |  |  |  |  |  |  | Example: | 
| 3747 |  |  |  |  |  |  |  | 
| 3748 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3749 |  |  |  |  |  |  |  | 
| 3750 |  |  |  |  |  |  | my @compoundWordAry = ( "big dog", "respiratory failure", "seven large masses" ); | 
| 3751 |  |  |  |  |  |  | @compoundWordAry = sort( @compoundWordAry ); | 
| 3752 |  |  |  |  |  |  |  | 
| 3753 |  |  |  |  |  |  | my $arySize = @compoundWordAry; | 
| 3754 |  |  |  |  |  |  |  | 
| 3755 |  |  |  |  |  |  | my $bst = Word2vec::Bst; | 
| 3756 |  |  |  |  |  |  | $bst->CreateTree( \@compoundWordAry, 0, $arySize, undef ); | 
| 3757 |  |  |  |  |  |  |  | 
| 3758 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3759 |  |  |  |  |  |  | $xmlconv->SetCompoundWordBST( $bst ); | 
| 3760 |  |  |  |  |  |  |  | 
| 3761 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3762 |  |  |  |  |  |  |  | 
| 3763 |  |  |  |  |  |  | =head3 ClearCompoundWordBST | 
| 3764 |  |  |  |  |  |  |  | 
| 3765 |  |  |  |  |  |  | Description: | 
| 3766 |  |  |  |  |  |  |  | 
| 3767 |  |  |  |  |  |  | Clears/Un-defines existing compound word binary search tree from memory. | 
| 3768 |  |  |  |  |  |  |  | 
| 3769 |  |  |  |  |  |  | Warning: This is a private function and should not be called or manipulated. | 
| 3770 |  |  |  |  |  |  |  | 
| 3771 |  |  |  |  |  |  | Input: | 
| 3772 |  |  |  |  |  |  |  | 
| 3773 |  |  |  |  |  |  | None | 
| 3774 |  |  |  |  |  |  |  | 
| 3775 |  |  |  |  |  |  | Ouput: | 
| 3776 |  |  |  |  |  |  |  | 
| 3777 |  |  |  |  |  |  | None | 
| 3778 |  |  |  |  |  |  |  | 
| 3779 |  |  |  |  |  |  | Example: | 
| 3780 |  |  |  |  |  |  |  | 
| 3781 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3782 |  |  |  |  |  |  |  | 
| 3783 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3784 |  |  |  |  |  |  | $xmlconv->ClearCompoundWordBST(); | 
| 3785 |  |  |  |  |  |  |  | 
| 3786 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3787 |  |  |  |  |  |  |  | 
| 3788 |  |  |  |  |  |  | =head3 SetMaxCompoundWordLength | 
| 3789 |  |  |  |  |  |  |  | 
| 3790 |  |  |  |  |  |  | Description: | 
| 3791 |  |  |  |  |  |  |  | 
| 3792 |  |  |  |  |  |  | Sets member variable to passed integer parameter. Sets maximum number of compound words in a phrase for comparison. | 
| 3793 |  |  |  |  |  |  |  | 
| 3794 |  |  |  |  |  |  | ie. "medical campus of Virginia Commonwealth University" can be interpreted as a compound word of 6 words. | 
| 3795 |  |  |  |  |  |  | Setting this variable to 3 will only attempt compoundifying a maximum amount of three words. | 
| 3796 |  |  |  |  |  |  | The result would be "medical_campus_of Virginia commonwealth university" even-though an exact representation | 
| 3797 |  |  |  |  |  |  | of this compounded string can exist. Setting this variable to 6 will result in compounding all six words if | 
| 3798 |  |  |  |  |  |  | they exists in the compound word array/bst. | 
| 3799 |  |  |  |  |  |  |  | 
| 3800 |  |  |  |  |  |  | Warning: This is a private function and should not be called or manipulated. | 
| 3801 |  |  |  |  |  |  |  | 
| 3802 |  |  |  |  |  |  | Input: | 
| 3803 |  |  |  |  |  |  |  | 
| 3804 |  |  |  |  |  |  | $value -> Integer | 
| 3805 |  |  |  |  |  |  |  | 
| 3806 |  |  |  |  |  |  | Ouput: | 
| 3807 |  |  |  |  |  |  |  | 
| 3808 |  |  |  |  |  |  | None | 
| 3809 |  |  |  |  |  |  |  | 
| 3810 |  |  |  |  |  |  | Example: | 
| 3811 |  |  |  |  |  |  |  | 
| 3812 |  |  |  |  |  |  | use Word2vec::Xmltow2v; | 
| 3813 |  |  |  |  |  |  |  | 
| 3814 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3815 |  |  |  |  |  |  | $xmlconv->SetMaxCompoundWordLength( 8 ); | 
| 3816 |  |  |  |  |  |  |  | 
| 3817 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3818 |  |  |  |  |  |  |  | 
| 3819 |  |  |  |  |  |  | =head3 SetOverwriteExistingFile | 
| 3820 |  |  |  |  |  |  |  | 
| 3821 |  |  |  |  |  |  | Sets member variable to passed integer parameter. Requires 0 = False or 1 = True. Sets option to overwrite | 
| 3822 |  |  |  |  |  |  | existing text corpus during compilation if 1 or append to existing text corpus if 0. | 
| 3823 |  |  |  |  |  |  |  | 
| 3824 |  |  |  |  |  |  | =head2 Debug Functions | 
| 3825 |  |  |  |  |  |  |  | 
| 3826 |  |  |  |  |  |  | =head3 GetTime | 
| 3827 |  |  |  |  |  |  |  | 
| 3828 |  |  |  |  |  |  | Description: | 
| 3829 |  |  |  |  |  |  |  | 
| 3830 |  |  |  |  |  |  | Returns current time string in "Hour:Minute:Second" format. | 
| 3831 |  |  |  |  |  |  |  | 
| 3832 |  |  |  |  |  |  | Input: | 
| 3833 |  |  |  |  |  |  |  | 
| 3834 |  |  |  |  |  |  | None | 
| 3835 |  |  |  |  |  |  |  | 
| 3836 |  |  |  |  |  |  | Output: | 
| 3837 |  |  |  |  |  |  |  | 
| 3838 |  |  |  |  |  |  | $string -> XX:XX:XX ("Hour:Minute:Second") | 
| 3839 |  |  |  |  |  |  |  | 
| 3840 |  |  |  |  |  |  | Example: | 
| 3841 |  |  |  |  |  |  |  | 
| 3842 |  |  |  |  |  |  | use Word2vec::Xmltow2v: | 
| 3843 |  |  |  |  |  |  |  | 
| 3844 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3845 |  |  |  |  |  |  | my $time = $xmlconv->GetTime(); | 
| 3846 |  |  |  |  |  |  |  | 
| 3847 |  |  |  |  |  |  | print( "Current Time: $time\n" ) if defined( $time ); | 
| 3848 |  |  |  |  |  |  |  | 
| 3849 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3850 |  |  |  |  |  |  |  | 
| 3851 |  |  |  |  |  |  | =head3 GetDate | 
| 3852 |  |  |  |  |  |  |  | 
| 3853 |  |  |  |  |  |  | Description: | 
| 3854 |  |  |  |  |  |  |  | 
| 3855 |  |  |  |  |  |  | Returns current month, day and year string in "Month/Day/Year" format. | 
| 3856 |  |  |  |  |  |  |  | 
| 3857 |  |  |  |  |  |  | Input: | 
| 3858 |  |  |  |  |  |  |  | 
| 3859 |  |  |  |  |  |  | None | 
| 3860 |  |  |  |  |  |  |  | 
| 3861 |  |  |  |  |  |  | Output: | 
| 3862 |  |  |  |  |  |  |  | 
| 3863 |  |  |  |  |  |  | $string -> XX/XX/XXXX ("Month/Day/Year") | 
| 3864 |  |  |  |  |  |  |  | 
| 3865 |  |  |  |  |  |  | Example: | 
| 3866 |  |  |  |  |  |  |  | 
| 3867 |  |  |  |  |  |  | use Word2vec::Xmltow2v: | 
| 3868 |  |  |  |  |  |  |  | 
| 3869 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3870 |  |  |  |  |  |  | my $date = $xmlconv->GetDate(); | 
| 3871 |  |  |  |  |  |  |  | 
| 3872 |  |  |  |  |  |  | print( "Current Date: $date\n" ) if defined( $date ); | 
| 3873 |  |  |  |  |  |  |  | 
| 3874 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3875 |  |  |  |  |  |  |  | 
| 3876 |  |  |  |  |  |  | =head3 WriteLog | 
| 3877 |  |  |  |  |  |  |  | 
| 3878 |  |  |  |  |  |  | Description: | 
| 3879 |  |  |  |  |  |  |  | 
| 3880 |  |  |  |  |  |  | Prints passed string parameter to the console, log file or both depending on user options. | 
| 3881 |  |  |  |  |  |  |  | 
| 3882 |  |  |  |  |  |  | Note: printNewLine parameter prints a new line character following the string if the parameter | 
| 3883 |  |  |  |  |  |  | is undefined and does not if parameter is 0. | 
| 3884 |  |  |  |  |  |  |  | 
| 3885 |  |  |  |  |  |  | Input: | 
| 3886 |  |  |  |  |  |  |  | 
| 3887 |  |  |  |  |  |  | $string -> String to print to the console/log file. | 
| 3888 |  |  |  |  |  |  | $value  -> 0 = Do not print newline character after string, all else prints new line character including 'undef'. | 
| 3889 |  |  |  |  |  |  |  | 
| 3890 |  |  |  |  |  |  | Output: | 
| 3891 |  |  |  |  |  |  |  | 
| 3892 |  |  |  |  |  |  | None | 
| 3893 |  |  |  |  |  |  |  | 
| 3894 |  |  |  |  |  |  | Example: | 
| 3895 |  |  |  |  |  |  |  | 
| 3896 |  |  |  |  |  |  | use Word2vec::Xmltow2v: | 
| 3897 |  |  |  |  |  |  |  | 
| 3898 |  |  |  |  |  |  | my $xmlconv = Word2vec::Xmltow2v->new(); | 
| 3899 |  |  |  |  |  |  | $xmlconv->WriteLog( "Hello World" ); | 
| 3900 |  |  |  |  |  |  |  | 
| 3901 |  |  |  |  |  |  | undef( $xmlconv ); | 
| 3902 |  |  |  |  |  |  |  | 
| 3903 |  |  |  |  |  |  | =head1 Author | 
| 3904 |  |  |  |  |  |  |  | 
| 3905 |  |  |  |  |  |  | Clint Cuffy, Virginia Commonwealth University | 
| 3906 |  |  |  |  |  |  |  | 
| 3907 |  |  |  |  |  |  | =head1 COPYRIGHT | 
| 3908 |  |  |  |  |  |  |  | 
| 3909 |  |  |  |  |  |  | Copyright (c) 2016 | 
| 3910 |  |  |  |  |  |  |  | 
| 3911 |  |  |  |  |  |  | Bridget T McInnes, Virginia Commonwealth University | 
| 3912 |  |  |  |  |  |  | btmcinnes at vcu dot edu | 
| 3913 |  |  |  |  |  |  |  | 
| 3914 |  |  |  |  |  |  | Clint Cuffy, Virginia Commonwealth University | 
| 3915 |  |  |  |  |  |  | cuffyca at vcu dot edu | 
| 3916 |  |  |  |  |  |  |  | 
| 3917 |  |  |  |  |  |  | This program is free software; you can redistribute it and/or modify it | 
| 3918 |  |  |  |  |  |  | under the terms of the GNU General Public License as published by the Free | 
| 3919 |  |  |  |  |  |  | Software Foundation; either version 2 of the License, or (at your option) | 
| 3920 |  |  |  |  |  |  | any later version. | 
| 3921 |  |  |  |  |  |  |  | 
| 3922 |  |  |  |  |  |  | This program is distributed in the hope that it will be useful, but WITHOUT | 
| 3923 |  |  |  |  |  |  | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | 
| 3924 |  |  |  |  |  |  | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. | 
| 3925 |  |  |  |  |  |  |  | 
| 3926 |  |  |  |  |  |  | You should have received a copy of the GNU General Public License along with | 
| 3927 |  |  |  |  |  |  | this program; if not, write to: | 
| 3928 |  |  |  |  |  |  |  | 
| 3929 |  |  |  |  |  |  | The Free Software Foundation, Inc., | 
| 3930 |  |  |  |  |  |  | 59 Temple Place - Suite 330, | 
| 3931 |  |  |  |  |  |  | Boston, MA  02111-1307, USA. | 
| 3932 |  |  |  |  |  |  |  | 
| 3933 |  |  |  |  |  |  | =cut |