line |
true |
false |
branch |
127
|
16 |
0 |
if defined(@_ > 0) |
128
|
0 |
16 |
@_ ? : |
130
|
0 |
16 |
if $$self{'verbose'} |
131
|
0 |
16 |
if $$self{'verbose'} |
150
|
0 |
16 |
unless open VOCAB, ">$VOCAB_FILE" |
151
|
0 |
16 |
unless open SNT, ">$SNT_FILE" |
154
|
0 |
16 |
if ($NONTOKEN_FILE ne '') |
164
|
0 |
16 |
unless open CORPUS, $_ |
173
|
1263 |
954 |
if (not exists $vocab_hash{$token}) { } |
182
|
0 |
248 |
if $new_line |
222
|
6 |
10 |
if (-e $TOKEN_FILE) { } |
223
|
0 |
6 |
unless open TOKEN, $TOKEN_FILE |
227
|
0 |
8 |
if (length $_ <= 0) |
228
|
0 |
8 |
unless (m[^/] and m[/$]) |
245
|
12 |
16 |
if (length $tokenizerRegex > 0) |
255
|
0 |
16 |
if ($#tokenRegex < 0) |
273
|
0 |
0 |
if ($NONTOKEN_FILE) { } |
276
|
0 |
0 |
unless open NOTOK, $NONTOKEN_FILE |
283
|
0 |
0 |
if (/^\s*$/) |
285
|
0 |
0 |
unless (m[^/]) |
289
|
0 |
0 |
unless (m[/$]) |
302
|
0 |
0 |
if (length $nontokenizerRegex <= 0) |
326
|
0 |
3 |
unless open FILE, $file |
334
|
3 |
12 |
if (/^\s*$/) |
337
|
0 |
12 |
unless (m[^/]) |
341
|
0 |
12 |
unless (m[/$]) |
354
|
0 |
3 |
if (length $stop_regex <= 0) |
372
|
0 |
16 |
unless open VOCAB, $VOCAB_FILE |
551
|
2063 |
1080 |
if (vec($corpus, vec($suffix, $top, $bit) + $_, $bit) != $ngram[$_]) |
563
|
0 |
16 |
unless open SNTNGRAM, ">$SNTNGRAM_FILE" |
566
|
16 |
0 |
unless (@vocab_array) |
577
|
2063 |
18 |
if ($l + $min_ngram_size - 1 <= $N) { } |
588
|
0 |
0 |
if ($_ == 1) |
|
0 |
2063 |
if ($new_line) |
591
|
87 |
1976 |
if ($stop_flag) |
593
|
29 |
58 |
if ($stop_mode =~ /OR|or/) { } |
597
|
12 |
40 |
if ($token_ngram[$i] =~ /$stop_regex/) |
|
52 |
74 |
if ($stop_mode =~ /OR|or/) { } |
600
|
51 |
23 |
unless ($token_ngram[$i] =~ /$stop_regex/) |
602
|
19 |
68 |
if ($doStop and $marginals) |
604
|
6 |
32 |
if (exists $remove_hash{$_ . ':' . $ngram[$_]}) { } |
615
|
2044 |
19 |
if ($line == 0 and $doStop == 0) |
616
|
1173 |
871 |
if ($remove <= $freq) { } |
618
|
302 |
871 |
if ($frequency <= $freq) |
622
|
726 |
1016 |
if (exists $remove_hash{$_ . ':' . $ngram[$_]}) { } |
640
|
0 |
16 |
unless open SNTNGRAM, $SNTNGRAM_FILE |
643
|
0 |
16 |
unless open NGRAM, ">$NGRAM_FILE" |
646
|
0 |
16 |
unless (@vocab_array) |
659
|
279 |
23 |
if ($marginals) |
684
|
279 |
364 |
if ($i == 0) |
685
|
6 |
273 |
if ($_[$i] == vec($corpus, $N, $bit)) |
687
|
279 |
364 |
if ($i == $#_) |
688
|
1 |
278 |
if ($_[$i] == vec($corpus, 0, $bit)) |
691
|
186 |
457 |
if ($stop_flag or $remove > 0) |
692
|
51 |
135 |
if (exists $remove_hash{$i . ':' . $_[$i]}) |
711
|
0 |
0 |
if ($split == 1) |
712
|
0 |
0 |
if ($ngram[0] == vec($corpus, 0, $bit)) { } |
|
0 |
0 |
elsif ($ngram[$#ngram] == vec($corpus, $N, $bit)) { } |
731
|
0 |
0 |
if (vec($corpus, vec($suffix, $top, $bit) + $split, $bit) != $_[1]) |
742
|
0 |
16 |
unless open SNT, $SNT_FILE |
774
|
1240 |
23 |
if ($marginals == 1) |
790
|
1154 |
924 |
vec($corpus, $z, $bit) < vec($corpus, $x, $bit) ? : |
|
33 |
2078 |
vec($corpus, $z, $bit) == vec($corpus, $x, $bit) ? : |