| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Lingua::YaTeA::MultiWordPhrase; | 
| 2 | 5 |  |  | 5 |  | 28 | use strict; | 
|  | 5 |  |  |  |  | 46 |  | 
|  | 5 |  |  |  |  | 116 |  | 
| 3 | 5 |  |  | 5 |  | 20 | use warnings; | 
|  | 5 |  |  |  |  | 8 |  | 
|  | 5 |  |  |  |  | 94 |  | 
| 4 | 5 |  |  | 5 |  | 1767 | use Lingua::YaTeA::Phrase; | 
|  | 5 |  |  |  |  | 12 |  | 
|  | 5 |  |  |  |  | 47 |  | 
| 5 | 5 |  |  | 5 |  | 1996 | use Lingua::YaTeA::MultiWordUnit; | 
|  | 5 |  |  |  |  | 12 |  | 
|  | 5 |  |  |  |  | 47 |  | 
| 6 | 5 |  |  | 5 |  | 2016 | use Lingua::YaTeA::Tree; | 
|  | 5 |  |  |  |  | 15 |  | 
|  | 5 |  |  |  |  | 54 |  | 
| 7 | 5 |  |  | 5 |  | 129 | use Lingua::YaTeA::IndexSet; | 
|  | 5 |  |  |  |  | 9 |  | 
|  | 5 |  |  |  |  | 15 |  | 
| 8 | 5 |  |  | 5 |  | 82 | use UNIVERSAL; | 
|  | 5 |  |  |  |  | 9 |  | 
|  | 5 |  |  |  |  | 16 |  | 
| 9 | 5 |  |  | 5 |  | 105 | use Scalar::Util qw(blessed); | 
|  | 5 |  |  |  |  | 10 |  | 
|  | 5 |  |  |  |  | 178 |  | 
| 10 | 5 |  |  | 5 |  | 24 | use Data::Dumper; | 
|  | 5 |  |  |  |  | 10 |  | 
|  | 5 |  |  |  |  | 151 |  | 
| 11 | 5 |  |  | 5 |  | 23 | use NEXT; | 
|  | 5 |  |  |  |  | 9 |  | 
|  | 5 |  |  |  |  | 20 |  | 
| 12 | 5 |  |  | 5 |  | 123 | use base qw(Lingua::YaTeA::Phrase Lingua::YaTeA::MultiWordUnit); | 
|  | 5 |  |  |  |  | 9 |  | 
|  | 5 |  |  |  |  | 831 |  | 
| 13 |  |  |  |  |  |  |  | 
| 14 | 5 |  |  | 5 |  | 1934 | use Encode qw(:fallbacks);; | 
|  | 5 |  |  |  |  | 33715 |  | 
|  | 5 |  |  |  |  | 15135 |  | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | our $counter = 0; | 
| 17 |  |  |  |  |  |  | our $parsed = 0; | 
| 18 |  |  |  |  |  |  | our $VERSION=$Lingua::YaTeA::VERSION; | 
| 19 |  |  |  |  |  |  |  | 
| 20 |  |  |  |  |  |  | sub new | 
| 21 |  |  |  |  |  |  | { | 
| 22 | 81 |  |  | 81 | 1 | 264 | my ($class_or_object,$num_content_words,$words_a,$tag_set) = @_; | 
| 23 | 81 |  |  |  |  | 114 | my $this = shift; | 
| 24 | 81 | 100 |  |  |  | 207 | $this = bless {}, $this unless ref $this; | 
| 25 | 81 |  |  |  |  | 214 | $this->{ISLAND_SET} = (); | 
| 26 | 81 |  |  |  |  | 481 | $this->NEXT::new(@_); | 
| 27 | 81 |  |  |  |  | 212 | return $this; | 
| 28 |  |  |  |  |  |  | } | 
| 29 |  |  |  |  |  |  |  | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  |  | 
| 32 |  |  |  |  |  |  |  | 
| 33 |  |  |  |  |  |  | sub searchEndogenousIslands | 
| 34 |  |  |  |  |  |  | { | 
| 35 | 27 |  |  | 27 | 1 | 71 | my ($this,$phrase_set,$chunking_data,$tag_set,$lexicon,$sentence_set,$fh) = @_; | 
| 36 | 27 |  |  |  |  | 104 | my $sub_indexes_set_a = $this->getIndexSet->searchSubIndexesSet($this->getWords,$chunking_data,$tag_set,$lexicon,$sentence_set); | 
| 37 | 27 |  |  |  |  | 62 | my $sub_index; | 
| 38 |  |  |  |  |  |  | my $source_a; | 
| 39 | 27 |  |  |  |  | 38 | my $corrected = 0; | 
| 40 |  |  |  |  |  |  |  | 
| 41 | 27 | 50 |  |  |  | 69 | if(scalar  @$sub_indexes_set_a > 0) | 
| 42 |  |  |  |  |  |  | { | 
| 43 | 27 |  |  |  |  | 56 | foreach $sub_index (@$sub_indexes_set_a) | 
| 44 |  |  |  |  |  |  | { | 
| 45 |  |  |  |  |  |  |  | 
| 46 | 277 | 100 | 100 |  |  | 478 | if( | 
|  |  |  | 100 |  |  |  |  | 
| 47 |  |  |  |  |  |  | (!defined $this->getIslandSet) | 
| 48 |  |  |  |  |  |  | || | 
| 49 |  |  |  |  |  |  | ( | 
| 50 |  |  |  |  |  |  | (! $this->getIslandSet->existIsland($sub_index)) | 
| 51 |  |  |  |  |  |  | && | 
| 52 |  |  |  |  |  |  | (! $this->getIslandSet->existLargerIsland($sub_index)) | 
| 53 |  |  |  |  |  |  | ) | 
| 54 |  |  |  |  |  |  | ) | 
| 55 |  |  |  |  |  |  | { | 
| 56 | 259 | 100 |  |  |  | 477 | if($source_a = $phrase_set->searchFromIF($sub_index->buildIFSequence($this->getWords))) | 
| 57 |  |  |  |  |  |  | { | 
| 58 | 1 |  |  |  |  | 4 | $this->makeIsland($sub_index,$source_a,'endogenous','IF',$tag_set,$lexicon,$sentence_set,$fh); | 
| 59 |  |  |  |  |  |  | } | 
| 60 |  |  |  |  |  |  | else | 
| 61 |  |  |  |  |  |  | { | 
| 62 | 258 | 100 |  |  |  | 532 | if($source_a = $phrase_set->searchFromLF($sub_index->buildLFSequence($this->getWords))) | 
| 63 |  |  |  |  |  |  | { | 
| 64 | 5 |  |  |  |  | 20 | $this->makeIsland($sub_index,$source_a,'endogenous','LF',$tag_set,$lexicon,$sentence_set,$fh); | 
| 65 |  |  |  |  |  |  | } | 
| 66 |  |  |  |  |  |  | } | 
| 67 |  |  |  |  |  |  | } | 
| 68 |  |  |  |  |  |  | } | 
| 69 |  |  |  |  |  |  | } | 
| 70 |  |  |  |  |  |  | } | 
| 71 |  |  |  |  |  |  |  | 
| 72 |  |  |  |  |  |  |  | 
| 73 |  |  |  |  |  |  |  | 
| 74 |  |  |  |  |  |  | sub sortIslands | 
| 75 |  |  |  |  |  |  | { | 
| 76 | 0 |  |  | 0 | 0 | 0 | my ($a,$b,$parsing_direction,$fh) = @_; | 
| 77 |  |  |  |  |  |  | # print $fh "a: " ; | 
| 78 |  |  |  |  |  |  | #     $a->getIndexSet->print($fh); | 
| 79 |  |  |  |  |  |  | #     print $fh " : " .$a->gapSize . "\n"; | 
| 80 |  |  |  |  |  |  | #     print $fh "b: " ; | 
| 81 |  |  |  |  |  |  | #     $b->getIndexSet->print($fh); | 
| 82 |  |  |  |  |  |  | #     print $fh " : " .$b->gapSize . "\n"; | 
| 83 |  |  |  |  |  |  |  | 
| 84 | 0 | 0 |  |  |  | 0 | if($parsing_direction eq "LEFT") | 
| 85 |  |  |  |  |  |  | { | 
| 86 | 0 | 0 |  |  |  | 0 | if($a->getIndexSet->getFirst == $b->getIndexSet->getFirst) | 
| 87 |  |  |  |  |  |  | { | 
| 88 | 0 |  |  |  |  | 0 | return $b->gapSize <=> $a->gapSize; | 
| 89 |  |  |  |  |  |  | } | 
| 90 |  |  |  |  |  |  | else | 
| 91 |  |  |  |  |  |  | { | 
| 92 | 0 |  |  |  |  | 0 | return $a->getIndexSet->getFirst <=> $b->getIndexSet->getFirst; | 
| 93 |  |  |  |  |  |  | } | 
| 94 |  |  |  |  |  |  | } | 
| 95 |  |  |  |  |  |  | else | 
| 96 |  |  |  |  |  |  | { | 
| 97 | 0 | 0 |  |  |  | 0 | if($parsing_direction eq "RIGHT") | 
| 98 |  |  |  |  |  |  | { | 
| 99 | 0 | 0 |  |  |  | 0 | if($a->getIndexSet->getLast == $b->getIndexSet->getLast) | 
| 100 |  |  |  |  |  |  | { | 
| 101 | 0 |  |  |  |  | 0 | return $b->gapSize <=> $a->gapSize; | 
| 102 |  |  |  |  |  |  | } | 
| 103 |  |  |  |  |  |  | else | 
| 104 |  |  |  |  |  |  | { | 
| 105 | 0 |  |  |  |  | 0 | return $b->getIndexSet->getLast <=> $a->getIndexSet->getLast; | 
| 106 |  |  |  |  |  |  | } | 
| 107 |  |  |  |  |  |  | } | 
| 108 |  |  |  |  |  |  | } | 
| 109 |  |  |  |  |  |  | } | 
| 110 |  |  |  |  |  |  |  | 
| 111 |  |  |  |  |  |  |  | 
| 112 |  |  |  |  |  |  |  | 
| 113 |  |  |  |  |  |  |  | 
| 114 |  |  |  |  |  |  | sub integrateIslands | 
| 115 |  |  |  |  |  |  | { | 
| 116 |  |  |  |  |  |  | #  my ($this,$chunking_data,$tag_set,$lexicon,$parsing_direction,$sentence_set,$fh) = @_; | 
| 117 | 11 |  |  | 11 | 0 | 40 | my ($this,$tag_set,$lexicon,$parsing_direction,$sentence_set,$fh) = @_; | 
| 118 | 11 |  |  |  |  | 18 | my $test; | 
| 119 | 11 |  |  |  |  | 18 | my $corrected = 0; | 
| 120 | 11 |  |  |  |  | 17 | my $island; | 
| 121 | 11 |  |  |  |  | 17 | my @islands = values %{$this->getIslandSet->getIslands}; | 
|  | 11 |  |  |  |  | 25 |  | 
| 122 |  |  |  |  |  |  | #@islands = sort({$a->getIndexSet->getSize <=> $b->getIndexSet->getSize} @islands); | 
| 123 | 11 |  |  |  |  | 28 | @islands = sort({&sortIslands($a,$b,$parsing_direction,$fh)} @islands); | 
|  | 0 |  |  |  |  | 0 |  | 
| 124 | 11 | 50 | 33 |  |  | 79 | if ((blessed($this)) && ($this->isa('Lingua::YaTeA::MultiWordPhrase'))) | 
| 125 |  |  |  |  |  |  | { | 
| 126 | 11 |  |  |  |  | 25 | foreach $island (@islands) | 
| 127 |  |  |  |  |  |  | { | 
| 128 |  |  |  |  |  |  | #	    print $fh "integrate essai " . $island->getIF . "\n"; | 
| 129 | 11 | 100 |  |  |  | 30 | if($island->isIntegrated == 0) | 
| 130 |  |  |  |  |  |  | { | 
| 131 | 9 |  |  |  |  | 29 | $test = $this->integrateIsland($island,$tag_set,$lexicon,$sentence_set,$fh); | 
| 132 | 9 | 100 |  |  |  | 26 | if($test == 1) | 
| 133 |  |  |  |  |  |  | { | 
| 134 | 4 |  |  |  |  | 10 | $corrected = 1; | 
| 135 |  |  |  |  |  |  | } | 
| 136 |  |  |  |  |  |  | #print $fh "apres l'ilot " . $island->getIF . "\n"; | 
| 137 |  |  |  |  |  |  | #	    $this->printForest($fh); | 
| 138 |  |  |  |  |  |  | } | 
| 139 |  |  |  |  |  |  | } | 
| 140 |  |  |  |  |  |  | } | 
| 141 | 11 |  |  |  |  | 39 | return ($this->checkParseCompleteness($fh),$corrected); | 
| 142 |  |  |  |  |  |  | } | 
| 143 |  |  |  |  |  |  |  | 
| 144 |  |  |  |  |  |  |  | 
| 145 |  |  |  |  |  |  |  | 
| 146 |  |  |  |  |  |  |  | 
| 147 |  |  |  |  |  |  | sub integrateIsland | 
| 148 |  |  |  |  |  |  | { | 
| 149 | 9 |  |  | 9 | 1 | 21 | my ($this,$island,$tagset,$lexicon,$sentence_set,$fh) = @_; | 
| 150 | 9 |  |  |  |  | 16 | my $i; | 
| 151 |  |  |  |  |  |  | my $tree; | 
| 152 | 9 |  |  |  |  | 25 | my $node_sets_a = $island->importNodeSets; | 
| 153 | 9 |  |  |  |  | 19 | my @new_trees; | 
| 154 |  |  |  |  |  |  | my $new; | 
| 155 | 9 |  |  |  |  | 13 | my $integrated_at_least_once = 0; | 
| 156 | 9 |  |  |  |  | 14 | my $success; | 
| 157 | 9 |  |  |  |  | 13 | my $corrected = 0; | 
| 158 | 9 | 50 |  |  |  | 28 | if(!defined $this->getForest) | 
| 159 |  |  |  |  |  |  | { | 
| 160 | 9 |  |  |  |  | 32 | $tree = Lingua::YaTeA::Tree->new; | 
| 161 | 9 |  |  |  |  | 31 | $tree->setSimplifiedIndexSet($this->getIndexSet); | 
| 162 | 9 |  |  |  |  | 32 | $this->addTree($tree); | 
| 163 |  |  |  |  |  |  | } | 
| 164 |  |  |  |  |  |  |  | 
| 165 | 9 |  |  |  |  | 21 | while ($tree = pop @{$this->getForest}) | 
|  | 18 |  |  |  |  | 41 |  | 
| 166 |  |  |  |  |  |  | { | 
| 167 |  |  |  |  |  |  | #print $fh "essaie dans arebre :" . $tree . "\n"; | 
| 168 | 9 |  |  |  |  | 31 | ($success) = $tree->integrateIslandNodeSets($node_sets_a,$island->getIndexSet,\@new_trees,$this->getWords,$tagset,$fh); | 
| 169 | 9 | 50 |  |  |  | 24 | if($success == 1) | 
| 170 |  |  |  |  |  |  | { | 
| 171 | 9 |  |  |  |  | 18 | $integrated_at_least_once = 1; | 
| 172 |  |  |  |  |  |  | } | 
| 173 |  |  |  |  |  |  | } | 
| 174 |  |  |  |  |  |  |  | 
| 175 | 9 |  |  |  |  | 26 | while ($new = pop @new_trees) | 
| 176 |  |  |  |  |  |  | { | 
| 177 |  |  |  |  |  |  | #print $fh "pop new ici :" . $new . "\n"; | 
| 178 | 9 |  |  |  |  | 19 | $this->addTree($new); | 
| 179 |  |  |  |  |  |  | } | 
| 180 |  |  |  |  |  |  |  | 
| 181 | 9 | 50 |  |  |  | 22 | if($integrated_at_least_once == 1) | 
| 182 |  |  |  |  |  |  | { | 
| 183 | 9 |  |  |  |  | 17 | $island->{INTEGRATED} = 1; | 
| 184 | 9 |  |  |  |  | 27 | $corrected = $this->correctPOSandLemma($island,$lexicon,$sentence_set,$fh); | 
| 185 |  |  |  |  |  |  | } | 
| 186 |  |  |  |  |  |  | else | 
| 187 |  |  |  |  |  |  | { | 
| 188 | 0 |  |  |  |  | 0 | $this->removeIsland($island,$fh); | 
| 189 |  |  |  |  |  |  | } | 
| 190 | 9 |  |  |  |  | 46 | return $corrected; | 
| 191 |  |  |  |  |  |  | } | 
| 192 |  |  |  |  |  |  |  | 
| 193 |  |  |  |  |  |  | sub correctPOSandLemma | 
| 194 |  |  |  |  |  |  | { | 
| 195 | 9 |  |  | 9 | 1 | 22 | my ($this,$island,$lexicon,$sentence_set,$fh) = @_; | 
| 196 | 9 |  |  |  |  | 24 | my $i; | 
| 197 |  |  |  |  |  |  | my $index; | 
| 198 | 9 |  |  |  |  | 15 | my $corrected = 0; | 
| 199 |  |  |  |  |  |  |  | 
| 200 | 9 |  |  |  |  | 19 | for ($i=0; $i< scalar @{$island->getIndexSet->getIndexes}; $i++) | 
|  | 29 |  |  |  |  | 56 |  | 
| 201 |  |  |  |  |  |  | { | 
| 202 | 20 |  |  |  |  | 39 | $index = $island->getIndexSet->getIndexes->[$i]; | 
| 203 | 20 | 50 |  |  |  | 39 | if (defined ($island->getSource->getWord($i))) { | 
| 204 | 20 | 100 |  |  |  | 45 | if  ($island->getSource->getWord($i)->getPOS ne $this->getWord($index)->getPOS) | 
| 205 |  |  |  |  |  |  | { | 
| 206 |  |  |  |  |  |  | #print $fh  $island->getSource->getWord($i)->getPOS . " !=" .$this->getWord($index)->getPOS . "\n"; | 
| 207 | 9 | 100 |  |  |  | 59 | if(lc($island->getSource->getWord($i)->getIF) eq lc($this->getWord($index)->getIF)) | 
| 208 |  |  |  |  |  |  | { | 
| 209 |  |  |  |  |  |  |  | 
| 210 | 4 | 50 |  |  |  | 12 | if($this->isCorrectedWord($index) == 0) # added by SA (29/08/2008) : a word can be corrected only once | 
| 211 |  |  |  |  |  |  | { | 
| 212 |  |  |  |  |  |  | #print $fh lc($island->getSource->getWord($i)->getIF) . "=" .  lc($this->getWord($index)->getIF) . "=> corrige\n"; | 
| 213 | 4 |  |  |  |  | 13 | $this->correctWord($index,$island->getSource->getWord($i),"POS",$lexicon,$sentence_set); | 
| 214 | 4 |  |  |  |  | 7 | push @{$this->{CORRECTED_WORDS}}, $index; | 
|  | 4 |  |  |  |  | 11 |  | 
| 215 | 4 |  |  |  |  | 15 | $corrected = 1; | 
| 216 |  |  |  |  |  |  | } | 
| 217 |  |  |  |  |  |  | } | 
| 218 |  |  |  |  |  |  | } | 
| 219 | 20 | 50 |  |  |  | 48 | if($island->getSource->getWord($i)->getLF ne $this->getWord($index)->getLF) | 
| 220 |  |  |  |  |  |  | { | 
| 221 | 0 | 0 |  |  |  | 0 | if($this->isCorrectedWord($index) == 0) # added by SA (29/08/2008) : a word can be corrected only once | 
| 222 |  |  |  |  |  |  | { | 
| 223 |  |  |  |  |  |  | # print $fh  $island->getSource->getWord($i)->getLF . " !=" .$this->getWord($index)->getLF . "=>corrige\n"; | 
| 224 | 0 |  |  |  |  | 0 | $this->correctWord($index,$island->getSource->getWord($i),"LF",$lexicon,$sentence_set); | 
| 225 | 0 |  |  |  |  | 0 | push @{$this->{CORRECTED_WORDS}}, $index; | 
|  | 0 |  |  |  |  | 0 |  | 
| 226 | 0 |  |  |  |  | 0 | $corrected = 1; | 
| 227 |  |  |  |  |  |  | } | 
| 228 |  |  |  |  |  |  | } | 
| 229 |  |  |  |  |  |  | } else { | 
| 230 | 0 |  |  |  |  | 0 | warn "Word undefined\n"; | 
| 231 |  |  |  |  |  |  | } | 
| 232 |  |  |  |  |  |  | } | 
| 233 | 9 |  |  |  |  | 22 | return $corrected; | 
| 234 |  |  |  |  |  |  | } | 
| 235 |  |  |  |  |  |  |  | 
| 236 |  |  |  |  |  |  | # added by SA (29/08/2008) : check if a word has already been corrected | 
| 237 |  |  |  |  |  |  | sub isCorrectedWord | 
| 238 |  |  |  |  |  |  | { | 
| 239 | 4 |  |  | 4 | 0 | 19 | my ($this,$index) = @_; | 
| 240 | 4 |  |  |  |  | 10 | my $i; | 
| 241 | 4 | 50 |  |  |  | 10 | if(defined $this->getCorrectedWords) | 
| 242 |  |  |  |  |  |  | { | 
| 243 | 0 |  |  |  |  | 0 | foreach $i (@{$this->getCorrectedWords}) | 
|  | 0 |  |  |  |  | 0 |  | 
| 244 |  |  |  |  |  |  | { | 
| 245 | 0 | 0 |  |  |  | 0 | if($i == $index) | 
| 246 |  |  |  |  |  |  | { | 
| 247 | 0 |  |  |  |  | 0 | return 1; | 
| 248 |  |  |  |  |  |  | } | 
| 249 |  |  |  |  |  |  | } | 
| 250 |  |  |  |  |  |  | } | 
| 251 | 4 |  |  |  |  | 12 | return 0; | 
| 252 |  |  |  |  |  |  | } | 
| 253 |  |  |  |  |  |  |  | 
| 254 |  |  |  |  |  |  | sub getCorrectedWords | 
| 255 |  |  |  |  |  |  | { | 
| 256 | 4 |  |  | 4 | 0 | 8 | my ($this) = @_; | 
| 257 | 4 |  |  |  |  | 11 | return $this->{CORRECTED_WORDS}; | 
| 258 |  |  |  |  |  |  | } | 
| 259 |  |  |  |  |  |  |  | 
| 260 |  |  |  |  |  |  | sub correctWord | 
| 261 |  |  |  |  |  |  | { | 
| 262 | 4 |  |  | 4 | 1 | 11 | my ($this,$index,$standard,$type,$lexicon,$sentence_set) = @_; | 
| 263 | 4 |  |  |  |  | 8 | my $form; | 
| 264 |  |  |  |  |  |  | my $new_word; | 
| 265 |  |  |  |  |  |  |  | 
| 266 | 4 | 50 |  |  |  | 11 | if($type eq "POS") | 
| 267 |  |  |  |  |  |  | { | 
| 268 | 4 |  |  |  |  | 13 | $form = $this->{WORDS}->[$index]->getIF . "\t" . $standard->getPOS .  "\t" . $this->{WORDS}->[$index]->getLF; | 
| 269 |  |  |  |  |  |  |  | 
| 270 |  |  |  |  |  |  | } | 
| 271 |  |  |  |  |  |  | else | 
| 272 |  |  |  |  |  |  | { | 
| 273 | 0 |  |  |  |  | 0 | $form = $this->{WORDS}->[$index]->getIF . "\t" . $this->{WORDS}->[$index]->getPOS .  "\t" . $standard->getLF; | 
| 274 |  |  |  |  |  |  |  | 
| 275 |  |  |  |  |  |  | } | 
| 276 | 4 |  |  |  |  | 23 | $new_word = Lingua::YaTeA::WordFromCorpus->new($form,$lexicon,$sentence_set); | 
| 277 | 4 |  |  |  |  | 21 | $this->{WORDS}->[$index] = $new_word->getLexItem; | 
| 278 |  |  |  |  |  |  | } | 
| 279 |  |  |  |  |  |  |  | 
| 280 |  |  |  |  |  |  |  | 
| 281 |  |  |  |  |  |  |  | 
| 282 |  |  |  |  |  |  | sub getIslandSet | 
| 283 |  |  |  |  |  |  | { | 
| 284 | 609 |  |  | 609 | 1 | 832 | my ($this) = @_; | 
| 285 | 609 |  |  |  |  | 1393 | return $this->{ISLAND_SET}; | 
| 286 |  |  |  |  |  |  | } | 
| 287 |  |  |  |  |  |  |  | 
| 288 |  |  |  |  |  |  |  | 
| 289 |  |  |  |  |  |  |  | 
| 290 |  |  |  |  |  |  |  | 
| 291 |  |  |  |  |  |  | sub checkMaximumLength | 
| 292 |  |  |  |  |  |  | { | 
| 293 | 72 |  |  | 72 | 1 | 122 | my ($this,$max_length) = @_; | 
| 294 |  |  |  |  |  |  |  | 
| 295 | 72 | 50 |  |  |  | 204 | if($this->getLength > $max_length) | 
| 296 |  |  |  |  |  |  | { | 
| 297 | 0 |  |  |  |  | 0 | return 0; | 
| 298 |  |  |  |  |  |  | } | 
| 299 | 72 |  |  |  |  | 157 | return 1; | 
| 300 |  |  |  |  |  |  | } | 
| 301 |  |  |  |  |  |  |  | 
| 302 |  |  |  |  |  |  |  | 
| 303 |  |  |  |  |  |  |  | 
| 304 |  |  |  |  |  |  | sub existIsland | 
| 305 |  |  |  |  |  |  | { | 
| 306 | 0 |  |  | 0 | 1 | 0 | my ($this,$index) = @_; | 
| 307 | 0 | 0 |  |  |  | 0 | if(! defined $this->getIslandSet) | 
| 308 |  |  |  |  |  |  | { | 
| 309 | 0 |  |  |  |  | 0 | return 0; | 
| 310 |  |  |  |  |  |  | } | 
| 311 | 0 |  |  |  |  | 0 | return $this->getIslandSet->existIsland($index); | 
| 312 |  |  |  |  |  |  | } | 
| 313 |  |  |  |  |  |  |  | 
| 314 |  |  |  |  |  |  | sub makeIsland | 
| 315 | 9 |  |  | 9 | 1 | 26 | {    my ($this,$index,$source_a,$type,$access,$tag_set,$lexicon,$sentence_set,$fh) = @_; | 
| 316 | 9 |  |  |  |  | 34 | my $source; | 
| 317 |  |  |  |  |  |  | my $s; | 
| 318 | 9 |  |  |  |  | 0 | my $island; | 
| 319 | 9 |  |  |  |  | 0 | my $corrected; | 
| 320 |  |  |  |  |  |  |  | 
| 321 | 9 | 100 |  |  |  | 24 | if($type eq "endogenous") | 
| 322 |  |  |  |  |  |  | { | 
| 323 | 6 |  |  |  |  | 23 | $source = $index->chooseBestSource($source_a,$this->getWords,$tag_set); | 
| 324 |  |  |  |  |  |  | } | 
| 325 |  |  |  |  |  |  | else | 
| 326 |  |  |  |  |  |  | { | 
| 327 | 3 |  |  |  |  | 5 | $source = $source_a->[0]; | 
| 328 |  |  |  |  |  |  | } | 
| 329 |  |  |  |  |  |  |  | 
| 330 |  |  |  |  |  |  | #we verify if the island is multi-word phrase | 
| 331 | 9 | 50 | 33 |  |  | 75 | if ((blessed($source)) && ($source->isa('Lingua::YaTeA::MultiWordPhrase'))) | 
| 332 |  |  |  |  |  |  | { | 
| 333 | 9 |  |  |  |  | 58 | $island = Lingua::YaTeA::Island->new($index,$type,$source); | 
| 334 |  |  |  |  |  |  |  | 
| 335 |  |  |  |  |  |  |  | 
| 336 | 9 |  |  |  |  | 28 | $this->addIsland($island,$fh); | 
| 337 |  |  |  |  |  |  | } | 
| 338 |  |  |  |  |  |  |  | 
| 339 |  |  |  |  |  |  | # if($this->isa('Lingua::YaTeA::MultiWordPhrase')) | 
| 340 |  |  |  |  |  |  | #      { | 
| 341 |  |  |  |  |  |  |  | 
| 342 |  |  |  |  |  |  | # 	 $corrected = $this->integrateIsland($island,$tag_set,$lexicon,$sentence_set,$fh); | 
| 343 |  |  |  |  |  |  | #      } | 
| 344 |  |  |  |  |  |  | #     print $fh "coorected:" . $corrected ; | 
| 345 | 9 |  |  |  |  | 36 | return $corrected; | 
| 346 |  |  |  |  |  |  | } | 
| 347 |  |  |  |  |  |  |  | 
| 348 |  |  |  |  |  |  |  | 
| 349 |  |  |  |  |  |  | sub removeIsland | 
| 350 |  |  |  |  |  |  | { | 
| 351 | 0 |  |  | 0 | 1 | 0 | my ($this,$island,$fh) = @_; | 
| 352 | 0 |  |  |  |  | 0 | $this->getIslandSet->removeIsland($island,$fh); | 
| 353 |  |  |  |  |  |  | } | 
| 354 |  |  |  |  |  |  |  | 
| 355 |  |  |  |  |  |  | sub addIsland | 
| 356 |  |  |  |  |  |  | { | 
| 357 | 9 |  |  | 9 | 1 | 21 | my ($this,$island,$fh) = @_; | 
| 358 | 9 | 50 |  |  |  | 18 | if(!defined $this->getIslandSet) | 
| 359 |  |  |  |  |  |  | { | 
| 360 | 9 |  |  |  |  | 45 | $this->{ISLAND_SET} = Lingua::YaTeA::IslandSet->new; | 
| 361 |  |  |  |  |  |  | } | 
| 362 | 9 |  |  |  |  | 22 | $this->getIslandSet->addIsland($island,$fh); | 
| 363 |  |  |  |  |  |  | } | 
| 364 |  |  |  |  |  |  |  | 
| 365 |  |  |  |  |  |  |  | 
| 366 |  |  |  |  |  |  | sub getParsablePotentialIslands | 
| 367 |  |  |  |  |  |  | { | 
| 368 | 3 |  |  | 3 | 1 | 8 | my ($this,$parsing_pattern_set,$tag_set,$parsing_direction) = @_; | 
| 369 | 3 |  |  |  |  | 9 | my %potential_islands; | 
| 370 |  |  |  |  |  |  | my $concurrent_set_a; | 
| 371 | 3 |  |  |  |  | 0 | my $concurrent; | 
| 372 | 3 |  |  |  |  | 0 | my $key; | 
| 373 | 3 |  |  |  |  | 5 | while (($key,$concurrent_set_a) = each (%{$this->getTestifiedTerms})) | 
|  | 6 |  |  |  |  | 13 |  | 
| 374 |  |  |  |  |  |  | { | 
| 375 |  |  |  |  |  |  | # islands can be created only from MultiWordTestifiedTerm | 
| 376 | 3 | 50 | 33 |  |  | 33 | if ((blessed($concurrent_set_a->[0])) && ($concurrent_set_a->[0]->isa('Lingua::YaTeA::MultiWordTestifiedTerm'))) | 
| 377 |  |  |  |  |  |  | { | 
| 378 | 3 |  |  |  |  | 7 | foreach $concurrent (@$concurrent_set_a) | 
| 379 |  |  |  |  |  |  | { | 
| 380 |  |  |  |  |  |  | # filter 1a : only testified terms that have a length inferior or equal to that of the phrase are kept | 
| 381 | 3 | 50 |  |  |  | 13 | if($concurrent->getLength <= $this->getLength) | 
| 382 |  |  |  |  |  |  | { | 
| 383 |  |  |  |  |  |  | # filter 1b : only testified term that have a parse are kept | 
| 384 | 3 | 50 |  |  |  | 10 | if($concurrent->getIfParsable($parsing_pattern_set,$tag_set,$parsing_direction)) | 
| 385 |  |  |  |  |  |  | { | 
| 386 | 3 |  |  |  |  | 5 | push @{$potential_islands{$key}}, $concurrent; | 
|  | 3 |  |  |  |  | 10 |  | 
| 387 |  |  |  |  |  |  | } | 
| 388 |  |  |  |  |  |  | } | 
| 389 |  |  |  |  |  |  | } | 
| 390 |  |  |  |  |  |  | } | 
| 391 |  |  |  |  |  |  |  | 
| 392 |  |  |  |  |  |  | } | 
| 393 | 3 |  |  |  |  | 8 | return \%potential_islands; | 
| 394 |  |  |  |  |  |  | } | 
| 395 |  |  |  |  |  |  |  | 
| 396 |  |  |  |  |  |  |  | 
| 397 |  |  |  |  |  |  | sub getBestExogenousIslands | 
| 398 |  |  |  |  |  |  | { | 
| 399 | 3 |  |  | 3 | 1 | 6 | my ($this,$potential_islands_h) = @_; | 
| 400 | 3 |  |  |  |  | 10 | my $concurrent_set_a; | 
| 401 |  |  |  |  |  |  | my $concurrent; | 
| 402 | 3 |  |  |  |  | 0 | my $key; | 
| 403 | 3 |  |  |  |  | 0 | my %preselected_islands; | 
| 404 |  |  |  |  |  |  |  | 
| 405 | 3 |  |  |  |  | 24 | while (($key,$concurrent_set_a) = each (%$potential_islands_h)) | 
| 406 |  |  |  |  |  |  | { | 
| 407 |  |  |  |  |  |  | # if more than one testified terms exist for a given span of text | 
| 408 | 3 | 50 |  |  |  | 9 | if(scalar @$concurrent_set_a > 1) | 
| 409 |  |  |  |  |  |  | { | 
| 410 | 0 |  |  |  |  | 0 | $preselected_islands{$key} =  $this->orderConcurrentPotentialIslands($key,$concurrent_set_a,\%preselected_islands); | 
| 411 |  |  |  |  |  |  |  | 
| 412 |  |  |  |  |  |  | } | 
| 413 |  |  |  |  |  |  | else | 
| 414 |  |  |  |  |  |  | { | 
| 415 | 3 |  |  |  |  | 10 | $preselected_islands{$key} = $concurrent_set_a->[0]; | 
| 416 |  |  |  |  |  |  | } | 
| 417 |  |  |  |  |  |  | } | 
| 418 | 3 |  |  |  |  | 7 | return \%preselected_islands; | 
| 419 |  |  |  |  |  |  | } | 
| 420 |  |  |  |  |  |  |  | 
| 421 |  |  |  |  |  |  | sub searchExogenousIslands | 
| 422 |  |  |  |  |  |  | { | 
| 423 | 3 |  |  | 3 | 1 | 9 | my ($this,$parsing_pattern_set,$tag_set,$parsing_direction,$lexicon,$sentence_set) = @_; | 
| 424 | 3 |  |  |  |  | 7 | my $potential_islands_h; | 
| 425 |  |  |  |  |  |  | my $preselected_islands_h; | 
| 426 | 3 |  |  |  |  | 0 | my $key; | 
| 427 | 3 |  |  |  |  | 4 | my $corrected = 0; | 
| 428 |  |  |  |  |  |  |  | 
| 429 | 3 |  |  |  |  | 9 | $potential_islands_h = $this->getParsablePotentialIslands($parsing_pattern_set,$tag_set,$parsing_direction); | 
| 430 | 3 |  |  |  |  | 9 | $preselected_islands_h = $this->getBestExogenousIslands($potential_islands_h); | 
| 431 |  |  |  |  |  |  |  | 
| 432 |  |  |  |  |  |  |  | 
| 433 | 3 |  |  |  |  | 5 | my @source; | 
| 434 | 3 |  |  |  |  | 11 | foreach $key (sort ({$this->sortIslandKeys($a,$b)} keys %$preselected_islands_h)) | 
|  | 0 |  |  |  |  | 0 |  | 
| 435 |  |  |  |  |  |  | { | 
| 436 | 3 |  |  |  |  | 15 | my $index = Lingua::YaTeA::IndexSet->new; | 
| 437 | 3 |  |  |  |  | 5 | @{$index->{INDEXES}} = split /-/, $key; | 
|  | 3 |  |  |  |  | 11 |  | 
| 438 | 3 | 50 | 0 |  |  | 10 | if | 
|  |  |  | 33 |  |  |  |  | 
| 439 |  |  |  |  |  |  | ( | 
| 440 |  |  |  |  |  |  | (!defined $this->getIslandSet) | 
| 441 |  |  |  |  |  |  | || | 
| 442 |  |  |  |  |  |  | ( | 
| 443 |  |  |  |  |  |  | (! $this->getIslandSet->existIsland($index)) | 
| 444 |  |  |  |  |  |  | && | 
| 445 |  |  |  |  |  |  | (! $this->getIslandSet->existLargerIsland($index)) | 
| 446 |  |  |  |  |  |  | ) | 
| 447 |  |  |  |  |  |  | ) | 
| 448 |  |  |  |  |  |  | { | 
| 449 | 3 |  |  |  |  | 6 | $source[0] = $preselected_islands_h->{$key}; | 
| 450 |  |  |  |  |  |  |  | 
| 451 |  |  |  |  |  |  | # 	    if($this->makeIsland($index,\@source,'exogenous','UNKNOWN',$tag_set,$lexicon,$sentence_set) == 1) | 
| 452 |  |  |  |  |  |  | # 	    { | 
| 453 |  |  |  |  |  |  | # 		$corrected =1; | 
| 454 |  |  |  |  |  |  | # 	    } | 
| 455 |  |  |  |  |  |  |  | 
| 456 | 3 |  |  |  |  | 10 | $this->makeIsland($index,\@source,'exogenous','UNKNOWN',$tag_set,$lexicon,$sentence_set); | 
| 457 |  |  |  |  |  |  | } | 
| 458 |  |  |  |  |  |  | } | 
| 459 |  |  |  |  |  |  | #$this->printIslands(*STDERR); | 
| 460 |  |  |  |  |  |  | # return ($this->checkParseCompleteness,$corrected); | 
| 461 |  |  |  |  |  |  | } | 
| 462 |  |  |  |  |  |  |  | 
| 463 |  |  |  |  |  |  | sub plugInternalFreeNodes | 
| 464 |  |  |  |  |  |  | { | 
| 465 | 27 |  |  | 27 | 1 | 64 | my ($this,$parsing_pattern_set,$parsing_direction,$tag_set,$fh) = @_; | 
| 466 | 27 |  |  |  |  | 233 | my $island; | 
| 467 |  |  |  |  |  |  | my $key; | 
| 468 |  |  |  |  |  |  |  | 
| 469 | 27 |  |  |  |  | 0 | my $tree; | 
| 470 | 27 |  |  |  |  | 0 | my $tree_updated; | 
| 471 | 27 |  |  |  |  | 0 | my $unplugged_a; | 
| 472 | 27 |  |  |  |  | 0 | my $unplugged; | 
| 473 | 27 |  |  |  |  | 0 | my $unplugged_index_set; | 
| 474 | 27 |  |  |  |  | 0 | my %unexploitable_islands; | 
| 475 | 27 |  |  |  |  | 0 | my @tmp_forest; | 
| 476 | 27 |  |  |  |  | 0 | my @new_trees; | 
| 477 |  |  |  |  |  |  |  | 
| 478 | 27 |  |  |  |  | 0 | my $free_nodes_a; | 
| 479 | 27 |  |  |  |  | 0 | my $new_plugging; | 
| 480 |  |  |  |  |  |  | #    print $fh "plugInternalFreeNodes\n"; | 
| 481 |  |  |  |  |  |  |  | 
| 482 | 27 | 100 |  |  |  | 79 | if(defined $this->getForest) | 
| 483 |  |  |  |  |  |  | { | 
| 484 |  |  |  |  |  |  | #	print $fh "nb arbres: " . scalar @{$this->getForest} . "\n"; | 
| 485 | 8 |  |  |  |  | 16 | foreach  $tree (@{$this->getForest}) | 
|  | 8 |  |  |  |  | 18 |  | 
| 486 |  |  |  |  |  |  | { | 
| 487 |  |  |  |  |  |  | #	    print $fh "TREE: ". $tree ."\n"; | 
| 488 | 8 |  |  |  |  | 11 | $tree_updated = 0; | 
| 489 |  |  |  |  |  |  |  | 
| 490 | 8 |  |  |  |  | 19 | $new_plugging = 1; | 
| 491 | 8 |  |  |  |  | 19 | while ($new_plugging == 1) | 
| 492 |  |  |  |  |  |  | { | 
| 493 | 8 |  |  |  |  | 24 | $new_plugging = $tree->plugNodePairs($parsing_pattern_set,$parsing_direction,$tag_set,$this->getWords,$fh); | 
| 494 |  |  |  |  |  |  | } | 
| 495 | 8 |  |  |  |  | 40 | $tree->completeDiscontinuousNodes($parsing_pattern_set,$parsing_direction,$tag_set,$this->getWords,$fh); | 
| 496 |  |  |  |  |  |  | #	    print $fh "avant removeDiscontinuousNodes\n"; | 
| 497 |  |  |  |  |  |  | #	    $tree->print($this->getWords,$fh); | 
| 498 | 8 |  |  |  |  | 28 | ($tree_updated,$unplugged_a) = $tree->removeDiscontinuousNodes($this->getWords,$fh); | 
| 499 |  |  |  |  |  |  |  | 
| 500 | 8 | 50 |  |  |  | 21 | if($tree_updated == 1) | 
| 501 |  |  |  |  |  |  | { | 
| 502 |  |  |  |  |  |  | #		print $fh "tree upodate " .$tree . "\n"; | 
| 503 |  |  |  |  |  |  | #		$tree->print($this->getWords,$fh); | 
| 504 | 0 | 0 |  |  |  | 0 | if(scalar @{$tree->getNodeSet->getNodes} > 0) | 
|  | 0 |  |  |  |  | 0 |  | 
| 505 |  |  |  |  |  |  | { | 
| 506 | 0 |  |  |  |  | 0 | $tree->updateIndexes($this->getIndexSet,$this->getWords); | 
| 507 | 0 | 0 |  |  |  | 0 | if(scalar @$unplugged_a > 0) | 
| 508 |  |  |  |  |  |  | { | 
| 509 |  |  |  |  |  |  | #			print $fh "ya a des unplugged\n"; | 
| 510 | 0 |  |  |  |  | 0 | foreach $unplugged (@$unplugged_a) | 
| 511 |  |  |  |  |  |  | { | 
| 512 |  |  |  |  |  |  | #			    print $fh "unpl: " . $unplugged->getID . "\n"; | 
| 513 | 0 |  |  |  |  | 0 | $free_nodes_a = $tree->getNodeSet->searchFreeNodes($this->getWords); | 
| 514 | 0 |  |  |  |  | 0 | $unplugged->hitchMore($free_nodes_a,$tree,$this->getWords,$fh); | 
| 515 |  |  |  |  |  |  | } | 
| 516 |  |  |  |  |  |  | } | 
| 517 |  |  |  |  |  |  | #		    print $fh "push " . $tree . "\n"; | 
| 518 |  |  |  |  |  |  | #		    $tree->print($this->getWords,$fh); | 
| 519 | 0 |  |  |  |  | 0 | push @tmp_forest, $tree; | 
| 520 |  |  |  |  |  |  |  | 
| 521 |  |  |  |  |  |  | } | 
| 522 |  |  |  |  |  |  |  | 
| 523 |  |  |  |  |  |  | } | 
| 524 |  |  |  |  |  |  | else | 
| 525 |  |  |  |  |  |  | { | 
| 526 | 8 |  |  |  |  | 19 | push @tmp_forest, $tree; | 
| 527 |  |  |  |  |  |  | } | 
| 528 |  |  |  |  |  |  | } | 
| 529 | 8 | 50 |  |  |  | 20 | if(scalar @tmp_forest > 0) | 
| 530 |  |  |  |  |  |  | { | 
| 531 |  |  |  |  |  |  | #	    print $fh "redefinition forest\n"; | 
| 532 |  |  |  |  |  |  |  | 
| 533 | 8 |  |  |  |  | 16 | @{$this->{FOREST}} = @tmp_forest; | 
|  | 8 |  |  |  |  | 27 |  | 
| 534 |  |  |  |  |  |  | #	    $this->printForest($fh); | 
| 535 |  |  |  |  |  |  | #@{$this->getForest} = @tmp_forest; | 
| 536 |  |  |  |  |  |  |  | 
| 537 |  |  |  |  |  |  | } | 
| 538 |  |  |  |  |  |  | else | 
| 539 |  |  |  |  |  |  | { | 
| 540 | 0 |  |  |  |  | 0 | undef $this->{FOREST}; | 
| 541 |  |  |  |  |  |  | } | 
| 542 |  |  |  |  |  |  | } | 
| 543 |  |  |  |  |  |  |  | 
| 544 |  |  |  |  |  |  | } | 
| 545 |  |  |  |  |  |  |  | 
| 546 |  |  |  |  |  |  |  | 
| 547 |  |  |  |  |  |  |  | 
| 548 |  |  |  |  |  |  |  | 
| 549 |  |  |  |  |  |  |  | 
| 550 |  |  |  |  |  |  |  | 
| 551 |  |  |  |  |  |  |  | 
| 552 |  |  |  |  |  |  | sub checkParseCompleteness | 
| 553 |  |  |  |  |  |  | { | 
| 554 | 11 |  |  | 11 | 1 | 33 | my ($this,$fh) = @_; | 
| 555 | 11 |  |  |  |  | 26 | my @uncomplete_trees; | 
| 556 |  |  |  |  |  |  | my @complete_trees; | 
| 557 | 11 |  |  |  |  | 0 | my $tree; | 
| 558 | 11 |  |  |  |  | 18 | my $parsed =0; | 
| 559 |  |  |  |  |  |  |  | 
| 560 | 11 | 50 |  |  |  | 32 | if(!defined $this->getForest) | 
| 561 |  |  |  |  |  |  | { | 
| 562 | 0 |  |  |  |  | 0 | return 0; | 
| 563 |  |  |  |  |  |  | } | 
| 564 |  |  |  |  |  |  | else | 
| 565 |  |  |  |  |  |  | { | 
| 566 | 11 |  |  |  |  | 17 | while ($tree = pop @{$this->getForest}) | 
|  | 22 |  |  |  |  | 39 |  | 
| 567 |  |  |  |  |  |  | { | 
| 568 |  |  |  |  |  |  | #	    print $fh "pop : ". $tree . "\n"; | 
| 569 | 11 | 50 |  |  |  | 29 | if($tree->getSimplifiedIndexSet->getSize == 1) | 
| 570 |  |  |  |  |  |  | { | 
| 571 | 0 |  |  |  |  | 0 | $parsed = 1; | 
| 572 | 0 |  |  |  |  | 0 | $tree->setHead; | 
| 573 | 0 |  |  |  |  | 0 | $tree->setReliability(3); | 
| 574 | 0 |  |  |  |  | 0 | push @complete_trees, $tree; | 
| 575 |  |  |  |  |  |  | } | 
| 576 |  |  |  |  |  |  | else | 
| 577 |  |  |  |  |  |  | { | 
| 578 | 11 |  |  |  |  | 24 | push @uncomplete_trees, $tree; | 
| 579 |  |  |  |  |  |  | } | 
| 580 |  |  |  |  |  |  | } | 
| 581 |  |  |  |  |  |  | } | 
| 582 | 11 | 50 |  |  |  | 25 | if($parsed == 1) | 
| 583 |  |  |  |  |  |  | { | 
| 584 | 0 |  |  |  |  | 0 | @{$this->{FOREST}} = @complete_trees; | 
|  | 0 |  |  |  |  | 0 |  | 
| 585 | 0 |  |  |  |  | 0 | return 1; | 
| 586 |  |  |  |  |  |  | } | 
| 587 |  |  |  |  |  |  | else | 
| 588 |  |  |  |  |  |  | { | 
| 589 | 11 |  |  |  |  | 18 | @{$this->{FOREST}} = @uncomplete_trees; | 
|  | 11 |  |  |  |  | 24 |  | 
| 590 | 11 |  |  |  |  | 40 | return 0; | 
| 591 |  |  |  |  |  |  | } | 
| 592 |  |  |  |  |  |  | } | 
| 593 |  |  |  |  |  |  |  | 
| 594 |  |  |  |  |  |  | sub orderConcurrentPotentialIslands | 
| 595 |  |  |  |  |  |  | { | 
| 596 | 0 |  |  | 0 | 1 | 0 | my ($this,$key,$concurrent_set_a) = @_; | 
| 597 | 0 |  |  |  |  | 0 | my $concurrent; | 
| 598 |  |  |  |  |  |  | my $inflected_score; | 
| 599 | 0 |  |  |  |  | 0 | my %inflected_form_scores; | 
| 600 | 0 |  |  |  |  | 0 | my @sorted_scores; | 
| 601 | 0 |  |  |  |  | 0 | my $best_set_a; | 
| 602 |  |  |  |  |  |  |  | 
| 603 |  |  |  |  |  |  | # filter 2 : compare inflected forms | 
| 604 | 0 |  |  |  |  | 0 | foreach $concurrent (@$concurrent_set_a) | 
| 605 |  |  |  |  |  |  | { | 
| 606 | 0 |  |  |  |  | 0 | $inflected_score = $this->compareInflectedFormWithTestified($concurrent,$key); | 
| 607 | 0 |  |  |  |  | 0 | push @{$inflected_form_scores{$inflected_score}}, $concurrent; | 
|  | 0 |  |  |  |  | 0 |  | 
| 608 |  |  |  |  |  |  | } | 
| 609 | 0 |  |  |  |  | 0 | @sorted_scores = sort ({ $b <=> $a } keys (%inflected_form_scores)) ; | 
|  | 0 |  |  |  |  | 0 |  | 
| 610 |  |  |  |  |  |  |  | 
| 611 | 0 |  |  |  |  | 0 | $best_set_a = $inflected_form_scores{$sorted_scores[0]}; | 
| 612 |  |  |  |  |  |  |  | 
| 613 |  |  |  |  |  |  | # filter 3 : compare POS sequence | 
| 614 |  |  |  |  |  |  | # if several testified terms have the same inflected and lemmatized forms | 
| 615 | 0 | 0 |  |  |  | 0 | if(scalar @$best_set_a > 1) | 
| 616 |  |  |  |  |  |  | { | 
| 617 | 0 |  |  |  |  | 0 | @$best_set_a = sort ({$this->sortPotentialIslandsAccordingToPOS($a,$b,$key)} @$best_set_a) ; | 
|  | 0 |  |  |  |  | 0 |  | 
| 618 |  |  |  |  |  |  |  | 
| 619 |  |  |  |  |  |  | } | 
| 620 |  |  |  |  |  |  |  | 
| 621 | 0 |  |  |  |  | 0 | return $best_set_a->[0]; | 
| 622 |  |  |  |  |  |  | } | 
| 623 |  |  |  |  |  |  |  | 
| 624 |  |  |  |  |  |  |  | 
| 625 |  |  |  |  |  |  |  | 
| 626 |  |  |  |  |  |  | sub sortIslandKeys | 
| 627 |  |  |  |  |  |  | { | 
| 628 | 0 |  |  | 0 | 1 | 0 | my ($this,$first,$second) = @_; | 
| 629 | 0 |  |  |  |  | 0 | my @first_index = split /-/, $first; | 
| 630 | 0 |  |  |  |  | 0 | my @second_index = split /-/, $second; | 
| 631 | 0 |  |  |  |  | 0 | return (scalar @second_index <=> scalar @first_index); | 
| 632 |  |  |  |  |  |  | } | 
| 633 |  |  |  |  |  |  |  | 
| 634 |  |  |  |  |  |  |  | 
| 635 |  |  |  |  |  |  | sub sortPotentialIslandsAccordingToPOS | 
| 636 |  |  |  |  |  |  | { | 
| 637 | 0 |  |  | 0 | 1 | 0 | my ($this,$first,$second,$key) = @_; | 
| 638 | 0 |  |  |  |  | 0 | return ($this->comparePOSWithTestified($second,$key) <=> $this->comparePOSWithTestified($first,$key)); | 
| 639 |  |  |  |  |  |  | } | 
| 640 |  |  |  |  |  |  |  | 
| 641 |  |  |  |  |  |  |  | 
| 642 |  |  |  |  |  |  | sub compareInflectedFormWithTestified | 
| 643 |  |  |  |  |  |  | { | 
| 644 | 0 |  |  | 0 | 1 | 0 | my ($this,$testified_term,$key) = @_; | 
| 645 | 0 |  |  |  |  | 0 | my $i; | 
| 646 |  |  |  |  |  |  | my $j; | 
| 647 | 0 |  |  |  |  | 0 | my $score = 0; | 
| 648 | 0 |  |  |  |  | 0 | my @index = split(/-/,$key); | 
| 649 | 0 |  |  |  |  | 0 | for ($i = $index[0]; $i <= $index[$#index]; $i++) | 
| 650 |  |  |  |  |  |  | { | 
| 651 | 0 |  |  |  |  | 0 | for ($j = 0; $j < scalar @index; $j++) | 
| 652 |  |  |  |  |  |  | { | 
| 653 | 0 | 0 |  |  |  | 0 | if($this->getWord($i)->getIF eq $testified_term->getWord($j)->getIF) | 
| 654 |  |  |  |  |  |  | { | 
| 655 | 0 |  |  |  |  | 0 | $score++; | 
| 656 |  |  |  |  |  |  | } | 
| 657 |  |  |  |  |  |  | } | 
| 658 |  |  |  |  |  |  | } | 
| 659 | 0 |  |  |  |  | 0 | return $score; | 
| 660 |  |  |  |  |  |  | } | 
| 661 |  |  |  |  |  |  |  | 
| 662 |  |  |  |  |  |  |  | 
| 663 |  |  |  |  |  |  | sub comparePOSWithTestified | 
| 664 |  |  |  |  |  |  | { | 
| 665 | 0 |  |  | 0 | 1 | 0 | my ($this,$testified_term,$key) = @_; | 
| 666 | 0 |  |  |  |  | 0 | my $i; | 
| 667 |  |  |  |  |  |  | my $j; | 
| 668 | 0 |  |  |  |  | 0 | my $score = 0; | 
| 669 | 0 |  |  |  |  | 0 | my @index = split(/-/,$key); | 
| 670 | 0 |  |  |  |  | 0 | for ($i = $index[0]; $i <= $index[$#index]; $i++) | 
| 671 |  |  |  |  |  |  | { | 
| 672 | 0 |  |  |  |  | 0 | for ($j = 0; $j < scalar @index; $j++) | 
| 673 |  |  |  |  |  |  | { | 
| 674 | 0 | 0 |  |  |  | 0 | if($this->getWord($i)->getPOS eq $testified_term->getWord($j)->getPOS) | 
| 675 |  |  |  |  |  |  | { | 
| 676 | 0 |  |  |  |  | 0 | $score++; | 
| 677 |  |  |  |  |  |  | } | 
| 678 |  |  |  |  |  |  | } | 
| 679 |  |  |  |  |  |  | } | 
| 680 | 0 |  |  |  |  | 0 | return $score; | 
| 681 |  |  |  |  |  |  | } | 
| 682 |  |  |  |  |  |  |  | 
| 683 |  |  |  |  |  |  |  | 
| 684 |  |  |  |  |  |  | sub printIslands | 
| 685 |  |  |  |  |  |  | { | 
| 686 | 72 |  |  | 72 | 1 | 107 | my ($this,$fh) = @_; | 
| 687 |  |  |  |  |  |  |  | 
| 688 | 72 | 50 |  |  |  | 108 | if(defined $fh) | 
| 689 |  |  |  |  |  |  | { | 
| 690 | 72 | 100 |  |  |  | 123 | if(defined $this->getIslandSet) | 
| 691 |  |  |  |  |  |  | { | 
| 692 | 3 |  |  |  |  | 8 | print $fh " " . $this->getIslandSet->size . "\n"; | 
| 693 | 3 |  |  |  |  | 6 | $this->getIslandSet->print($fh); | 
| 694 |  |  |  |  |  |  | } | 
| 695 |  |  |  |  |  |  | else | 
| 696 |  |  |  |  |  |  | { | 
| 697 | 69 |  |  |  |  | 155 | print $fh " 0\n"; | 
| 698 |  |  |  |  |  |  | } | 
| 699 |  |  |  |  |  |  | } | 
| 700 |  |  |  |  |  |  | else | 
| 701 |  |  |  |  |  |  | { | 
| 702 | 0 | 0 |  |  |  | 0 | if(defined $this->getIslandSet) | 
| 703 |  |  |  |  |  |  | { | 
| 704 | 0 |  |  |  |  | 0 | print "\n"; | 
| 705 | 0 |  |  |  |  | 0 | $this->getIslandSet->print; | 
| 706 |  |  |  |  |  |  | } | 
| 707 |  |  |  |  |  |  | else | 
| 708 |  |  |  |  |  |  | { | 
| 709 | 0 |  |  |  |  | 0 | print "0\n"; | 
| 710 |  |  |  |  |  |  | } | 
| 711 |  |  |  |  |  |  | } | 
| 712 |  |  |  |  |  |  | } | 
| 713 |  |  |  |  |  |  |  | 
| 714 |  |  |  |  |  |  | sub print | 
| 715 |  |  |  |  |  |  | { | 
| 716 | 72 |  |  | 72 | 1 | 105 | my ($this,$fh) = @_; | 
| 717 |  |  |  |  |  |  |  | 
| 718 | 72 | 50 |  |  |  | 120 | if(defined $fh) | 
| 719 |  |  |  |  |  |  | { | 
| 720 |  |  |  |  |  |  |  | 
| 721 | 72 |  |  |  |  | 150 | print $fh  "if: " . Encode::encode("UTF-8", $this->getIF) . "\n"; | 
| 722 | 72 |  |  |  |  | 2668 | print $fh "pos: " . Encode::encode("UTF-8", $this->getPOS) . "\n"; | 
| 723 | 72 |  |  |  |  | 2381 | print $fh "lf: " . Encode::encode("UTF-8", $this->getLF) . "\n"; | 
| 724 | 72 |  |  |  |  | 2364 | print $fh "is a term candidate: " . $this->isTC. "\n"; | 
| 725 | 72 | 100 |  |  |  | 151 | if($this->isTC) | 
| 726 |  |  |  |  |  |  | { | 
| 727 | 45 |  |  |  |  | 107 | print $fh "parsing method: ". $this->getParsingMethod . "\n"; | 
| 728 | 45 |  |  |  |  | 71 | print $fh "forest: " ; | 
| 729 | 45 |  |  |  |  | 100 | $this->printForestParenthesised($fh); | 
| 730 |  |  |  |  |  |  |  | 
| 731 |  |  |  |  |  |  | } | 
| 732 | 72 |  |  |  |  | 124 | print $fh "islands:"; | 
| 733 | 72 |  |  |  |  | 129 | $this->printIslands($fh); | 
| 734 |  |  |  |  |  |  |  | 
| 735 |  |  |  |  |  |  | } | 
| 736 |  |  |  |  |  |  | else | 
| 737 |  |  |  |  |  |  | { | 
| 738 | 0 |  |  |  |  |  | print  "if: " . $this->getIF . "\n"; | 
| 739 | 0 |  |  |  |  |  | print "pos: " . $this->getPOS . "\n"; | 
| 740 | 0 |  |  |  |  |  | print "lf: " . $this->getLF . "\n"; | 
| 741 | 0 |  |  |  |  |  | print "is a term candidate: " . $this->isTC. "\n"; | 
| 742 | 0 | 0 |  |  |  |  | if($this->isTC) | 
| 743 |  |  |  |  |  |  | { | 
| 744 | 0 |  |  |  |  |  | print "parsing method: ". $this->getParsingMethod . "\n"; | 
| 745 | 0 |  |  |  |  |  | print "forest: " ; | 
| 746 | 0 |  |  |  |  |  | $this->printForestParenthesised; | 
| 747 |  |  |  |  |  |  |  | 
| 748 |  |  |  |  |  |  | } | 
| 749 | 0 |  |  |  |  |  | print "islands:"; | 
| 750 | 0 |  |  |  |  |  | $this->printIslands; | 
| 751 | 0 |  |  |  |  |  | print "\n"; | 
| 752 |  |  |  |  |  |  | } | 
| 753 |  |  |  |  |  |  | } | 
| 754 |  |  |  |  |  |  |  | 
| 755 |  |  |  |  |  |  |  | 
| 756 |  |  |  |  |  |  |  | 
| 757 |  |  |  |  |  |  |  | 
| 758 |  |  |  |  |  |  | 1; | 
| 759 |  |  |  |  |  |  |  | 
| 760 |  |  |  |  |  |  |  | 
| 761 |  |  |  |  |  |  | __END__ |