| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Lingua::YaTeA::Phrase; | 
| 2 | 5 |  |  | 5 |  | 40 | use strict; | 
|  | 5 |  |  |  |  | 10 |  | 
|  | 5 |  |  |  |  | 178 |  | 
| 3 | 5 |  |  | 5 |  | 34 | use warnings; | 
|  | 5 |  |  |  |  | 11 |  | 
|  | 5 |  |  |  |  | 117 |  | 
| 4 | 5 |  |  | 5 |  | 2069 | use Lingua::YaTeA::Occurrence; | 
|  | 5 |  |  |  |  | 20 |  | 
|  | 5 |  |  |  |  | 53 |  | 
| 5 | 5 |  |  | 5 |  | 2058 | use Lingua::YaTeA::Island; | 
|  | 5 |  |  |  |  | 10 |  | 
|  | 5 |  |  |  |  | 58 |  | 
| 6 | 5 |  |  | 5 |  | 2055 | use Lingua::YaTeA::IslandSet; | 
|  | 5 |  |  |  |  | 31 |  | 
|  | 5 |  |  |  |  | 58 |  | 
| 7 | 5 |  |  | 5 |  | 3127 | use NEXT; | 
|  | 5 |  |  |  |  | 21516 |  | 
|  | 5 |  |  |  |  | 39 |  | 
| 8 | 5 |  |  | 5 |  | 178 | use UNIVERSAL; | 
|  | 5 |  |  |  |  | 10 |  | 
|  | 5 |  |  |  |  | 16 |  | 
| 9 | 5 |  |  | 5 |  | 185 | use Scalar::Util qw(blessed); | 
|  | 5 |  |  |  |  | 12 |  | 
|  | 5 |  |  |  |  | 239 |  | 
| 10 |  |  |  |  |  |  |  | 
| 11 | 5 |  |  | 5 |  | 39 | use Data::Dumper; | 
|  | 5 |  |  |  |  | 11 |  | 
|  | 5 |  |  |  |  | 10250 |  | 
| 12 |  |  |  |  |  |  | our $counter = 0; | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | our $VERSION=$Lingua::YaTeA::VERSION; | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | sub new | 
| 17 |  |  |  |  |  |  | { | 
| 18 | 123 |  |  | 123 | 1 | 24340 | my ($class_or_object,$num_content_words,$words_a,$tag_set) = @_; | 
| 19 | 123 |  |  |  |  | 234 | my $this = shift; | 
| 20 | 123 | 50 |  |  |  | 301 | $this = bless {}, $this unless ref $this; | 
| 21 | 123 |  |  |  |  | 256 | $this->{ID} = $counter; | 
| 22 | 123 |  |  |  |  | 259 | $this->{WORDS} = []; | 
| 23 | 123 |  |  |  |  | 260 | $this->{IF} = ""; | 
| 24 | 123 |  |  |  |  | 238 | $this->{POS} = ""; | 
| 25 | 123 |  |  |  |  | 203 | $this->{LF} = ""; | 
| 26 | 123 |  |  |  |  | 205 | $this->{TC} = 0; | 
| 27 | 123 |  |  |  |  | 282 | $this->{FREQUENCY} = 0; | 
| 28 | 123 |  |  |  |  | 252 | $this->{OCCURRENCES} = []; | 
| 29 | 123 |  |  |  |  | 212 | $this->{RELIABILITY} = 0; | 
| 30 | 123 |  |  |  |  | 236 | $this->{TESTIFIED_TERMS} = (); | 
| 31 | 123 |  |  |  |  | 435 | $this->{INDEX_SET} = Lingua::YaTeA::IndexSet->new; | 
| 32 |  |  |  |  |  |  |  | 
| 33 | 123 |  |  |  |  | 568 | $this->buildLinguisticInfos($words_a,$tag_set); | 
| 34 | 123 |  |  |  |  | 376 | $this->getIndexSet->fill($words_a); | 
| 35 | 123 |  |  |  |  | 750 | $this->NEXT::new(@_); | 
| 36 |  |  |  |  |  |  |  | 
| 37 | 123 |  |  |  |  | 634 | return $this; | 
| 38 |  |  |  |  |  |  | } | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  |  | 
| 41 |  |  |  |  |  |  | sub setTC | 
| 42 |  |  |  |  |  |  | { | 
| 43 | 72 |  |  | 72 | 1 | 165 | my ($this,$status) = @_; | 
| 44 | 72 |  |  |  |  | 171 | $this->{TC} = $status; | 
| 45 |  |  |  |  |  |  | } | 
| 46 |  |  |  |  |  |  |  | 
| 47 |  |  |  |  |  |  |  | 
| 48 |  |  |  |  |  |  |  | 
| 49 |  |  |  |  |  |  | sub buildLinguisticInfos | 
| 50 |  |  |  |  |  |  | { | 
| 51 | 124 |  |  | 124 | 1 | 252 | my ($this,$words_a,$tag_set) = @_; | 
| 52 | 124 |  |  |  |  | 462 | my $word; | 
| 53 |  |  |  |  |  |  | my $lex; | 
| 54 | 124 |  |  |  |  | 0 | my $IF; | 
| 55 | 124 |  |  |  |  | 0 | my $POS; | 
| 56 | 124 |  |  |  |  | 0 | my $LF; | 
| 57 | 124 |  |  |  |  | 396 | my %prep = ("of"=>"of", "to"=>"to"); | 
| 58 |  |  |  |  |  |  |  | 
| 59 | 124 |  |  |  |  | 257 | foreach $word (@$words_a) | 
| 60 |  |  |  |  |  |  | { | 
| 61 | 298 | 100 | 66 |  |  | 1662 | if ((blessed($word)) && ($word->isa("Lingua::YaTeA::WordFromCorpus"))) | 
| 62 |  |  |  |  |  |  | { | 
| 63 | 286 |  |  |  |  | 670 | $lex  = $word->getLexItem; | 
| 64 | 286 |  |  |  |  | 719 | $IF .= $lex->getIF . " " ; | 
| 65 | 286 | 100 |  |  |  | 657 | if ($tag_set->existTag('PREPOSITIONS',$lex->getIF)) | 
| 66 |  |  |  |  |  |  | { | 
| 67 | 28 |  |  |  |  | 66 | $POS .= $lex->getIF . " "; | 
| 68 |  |  |  |  |  |  | } | 
| 69 |  |  |  |  |  |  | else | 
| 70 |  |  |  |  |  |  | { | 
| 71 | 258 |  |  |  |  | 546 | $POS .= $lex->getPOS . " "; | 
| 72 |  |  |  |  |  |  | } | 
| 73 | 286 |  |  |  |  | 701 | $LF .= $lex->getLF . " " ; | 
| 74 | 286 |  |  |  |  | 442 | push @{$this->getWords}, $lex; | 
|  | 286 |  |  |  |  | 663 |  | 
| 75 |  |  |  |  |  |  | } | 
| 76 |  |  |  |  |  |  | else | 
| 77 |  |  |  |  |  |  | { # update existing linguistic info for a phrase | 
| 78 | 12 | 50 | 33 |  |  | 68 | if((blessed($word)) && ($word->isa("Lingua::YaTeA::LexiconItem"))) | 
| 79 |  |  |  |  |  |  | { | 
| 80 | 12 |  |  |  |  | 28 | $IF .= $word->getIF . " " ; | 
| 81 | 12 | 100 |  |  |  | 35 | if ($tag_set->existTag('PREPOSITIONS',$word->getIF)) | 
| 82 |  |  |  |  |  |  | { | 
| 83 | 2 |  |  |  |  | 6 | $POS .= $word->getIF . " "; | 
| 84 |  |  |  |  |  |  | } | 
| 85 |  |  |  |  |  |  | else | 
| 86 |  |  |  |  |  |  | { | 
| 87 | 10 |  |  |  |  | 24 | $POS .= $word->getPOS . " "; | 
| 88 |  |  |  |  |  |  | } | 
| 89 | 12 |  |  |  |  | 27 | $LF .= $word->getLF . " " ; | 
| 90 |  |  |  |  |  |  | } | 
| 91 |  |  |  |  |  |  |  | 
| 92 |  |  |  |  |  |  | } | 
| 93 |  |  |  |  |  |  | } | 
| 94 | 124 |  |  |  |  | 930 | $IF =~ s/\s+$//o; | 
| 95 | 124 |  |  |  |  | 557 | $POS =~ s/\s+$//o; | 
| 96 | 124 |  |  |  |  | 496 | $LF =~ s/\s+$//o; | 
| 97 | 124 |  |  |  |  | 491 | $this->setIF($IF); | 
| 98 | 124 |  |  |  |  | 377 | $this->setPOS($POS); | 
| 99 | 124 |  |  |  |  | 317 | $this->setLF($LF); | 
| 100 |  |  |  |  |  |  | } | 
| 101 |  |  |  |  |  |  |  | 
| 102 |  |  |  |  |  |  |  | 
| 103 |  |  |  |  |  |  |  | 
| 104 |  |  |  |  |  |  | sub addOccurrence | 
| 105 |  |  |  |  |  |  | { | 
| 106 | 122 |  |  | 122 | 1 | 258 | my ($this,$words_a,$maximal,$fh) = @_; | 
| 107 | 122 |  |  |  |  | 274 | my $testified; | 
| 108 |  |  |  |  |  |  | my $testified_set_a; | 
| 109 | 122 |  |  |  |  | 0 | my $key; | 
| 110 | 122 |  |  |  |  | 356 | $this->incrementFrequency; | 
| 111 | 122 |  |  |  |  | 369 | my $occurrence = Lingua::YaTeA::Occurrence->new; | 
| 112 | 122 |  |  |  |  | 371 | $occurrence->setInfoForPhrase($words_a,$maximal); | 
| 113 | 122 |  |  |  |  | 180 | push @{$this->{OCCURRENCES}}, $occurrence; | 
|  | 122 |  |  |  |  | 273 |  | 
| 114 | 122 | 100 |  |  |  | 295 | if(defined $this->getTestifiedTerms) | 
| 115 |  |  |  |  |  |  | { | 
| 116 | 3 |  |  |  |  | 9 | while (($key,$testified_set_a) = each %{$this->getTestifiedTerms}) | 
|  | 6 |  |  |  |  | 14 |  | 
| 117 |  |  |  |  |  |  | { | 
| 118 | 3 |  |  |  |  | 9 | foreach $testified (@{$testified_set_a}) | 
|  | 3 |  |  |  |  | 21 |  | 
| 119 |  |  |  |  |  |  | { | 
| 120 | 3 |  |  |  |  | 22 | $testified->addOccurrence($occurrence,$this,$key,$fh); | 
| 121 |  |  |  |  |  |  | } | 
| 122 |  |  |  |  |  |  | } | 
| 123 |  |  |  |  |  |  | } | 
| 124 |  |  |  |  |  |  | } | 
| 125 |  |  |  |  |  |  |  | 
| 126 |  |  |  |  |  |  |  | 
| 127 |  |  |  |  |  |  |  | 
| 128 |  |  |  |  |  |  | sub incrementFrequency | 
| 129 |  |  |  |  |  |  | { | 
| 130 | 122 |  |  | 122 | 1 | 206 | my ($this) = @_; | 
| 131 | 122 |  |  |  |  | 234 | $this->{FREQUENCY}++; | 
| 132 |  |  |  |  |  |  | } | 
| 133 |  |  |  |  |  |  |  | 
| 134 |  |  |  |  |  |  | sub getWords | 
| 135 |  |  |  |  |  |  | { | 
| 136 | 1317 |  |  | 1317 | 1 | 2074 | my ($this) = @_; | 
| 137 | 1317 |  |  |  |  | 4060 | return $this->{WORDS}; | 
| 138 |  |  |  |  |  |  | } | 
| 139 |  |  |  |  |  |  |  | 
| 140 |  |  |  |  |  |  | sub setIF | 
| 141 |  |  |  |  |  |  | { | 
| 142 | 124 |  |  | 124 | 1 | 252 | my ($this,$new) = @_; | 
| 143 | 124 |  |  |  |  | 270 | $this->{IF} = $new; | 
| 144 |  |  |  |  |  |  | } | 
| 145 |  |  |  |  |  |  |  | 
| 146 |  |  |  |  |  |  | sub setPOS | 
| 147 |  |  |  |  |  |  | { | 
| 148 | 124 |  |  | 124 | 1 | 270 | my ($this,$new) = @_; | 
| 149 | 124 |  |  |  |  | 272 | $this->{POS} = $new; | 
| 150 |  |  |  |  |  |  | } | 
| 151 |  |  |  |  |  |  |  | 
| 152 |  |  |  |  |  |  | sub setLF | 
| 153 |  |  |  |  |  |  | { | 
| 154 | 124 |  |  | 124 | 1 | 223 | my ($this,$new) = @_; | 
| 155 | 124 |  |  |  |  | 448 | $this->{LF} = $new; | 
| 156 |  |  |  |  |  |  | } | 
| 157 |  |  |  |  |  |  |  | 
| 158 |  |  |  |  |  |  | sub getIF | 
| 159 |  |  |  |  |  |  | { | 
| 160 | 260 |  |  | 260 | 1 | 520 | my ($this) = @_; | 
| 161 | 260 |  |  |  |  | 1096 | return $this->{IF}; | 
| 162 |  |  |  |  |  |  | } | 
| 163 |  |  |  |  |  |  |  | 
| 164 |  |  |  |  |  |  | sub getPOS | 
| 165 |  |  |  |  |  |  | { | 
| 166 | 175 |  |  | 175 | 1 | 309 | my ($this) = @_; | 
| 167 | 175 |  |  |  |  | 603 | return $this->{POS}; | 
| 168 |  |  |  |  |  |  | } | 
| 169 |  |  |  |  |  |  |  | 
| 170 |  |  |  |  |  |  | sub getLF | 
| 171 |  |  |  |  |  |  | { | 
| 172 | 176 |  |  | 176 | 1 | 340 | my ($this) = @_; | 
| 173 | 176 |  |  |  |  | 634 | return $this->{LF}; | 
| 174 |  |  |  |  |  |  | } | 
| 175 |  |  |  |  |  |  |  | 
| 176 |  |  |  |  |  |  | sub buildKey | 
| 177 |  |  |  |  |  |  | { | 
| 178 | 126 |  |  | 126 | 1 | 259 | my ($this) = @_; | 
| 179 | 126 |  |  |  |  | 442 | my $key = $this->{"IF"} . "~" . $this->{"POS"} . "~" . $this->{"LF"}; | 
| 180 | 126 |  |  |  |  | 338 | return $key; | 
| 181 |  |  |  |  |  |  | } | 
| 182 |  |  |  |  |  |  |  | 
| 183 |  |  |  |  |  |  |  | 
| 184 |  |  |  |  |  |  | sub getWord | 
| 185 |  |  |  |  |  |  | { | 
| 186 | 109 |  |  | 109 | 1 | 195 | my ($this,$index) = @_; | 
| 187 | 109 |  |  |  |  | 203 | return $this->getWords->[$index]; | 
| 188 |  |  |  |  |  |  |  | 
| 189 |  |  |  |  |  |  | } | 
| 190 |  |  |  |  |  |  |  | 
| 191 |  |  |  |  |  |  | sub isTC | 
| 192 |  |  |  |  |  |  | { | 
| 193 | 178 |  |  | 178 | 1 | 304 | my ($this) = @_; | 
| 194 | 178 |  |  |  |  | 491 | return $this->{TC}; | 
| 195 |  |  |  |  |  |  | } | 
| 196 |  |  |  |  |  |  |  | 
| 197 |  |  |  |  |  |  | sub getFrequency | 
| 198 |  |  |  |  |  |  | { | 
| 199 | 0 |  |  | 0 | 1 | 0 | my ($this) = @_; | 
| 200 | 0 |  |  |  |  | 0 | return $this->{FREQUENCY}; | 
| 201 |  |  |  |  |  |  | } | 
| 202 |  |  |  |  |  |  |  | 
| 203 |  |  |  |  |  |  | sub getOccurrences | 
| 204 |  |  |  |  |  |  | { | 
| 205 | 70 |  |  | 70 | 1 | 123 | my ($this) = @_; | 
| 206 | 70 |  |  |  |  | 256 | return $this->{OCCURRENCES}; | 
| 207 |  |  |  |  |  |  | } | 
| 208 |  |  |  |  |  |  |  | 
| 209 |  |  |  |  |  |  | sub addTermCandidates | 
| 210 |  |  |  |  |  |  | { | 
| 211 | 70 |  |  | 70 | 1 | 161 | my ($this,$term_candidates_h,$mapping_from_phrases_to_TCs_h,$tc_max_length,$option_set,$phrase_set,$monolexical_transfer_h) = @_; | 
| 212 | 70 |  |  |  |  | 297 | my @term_candidates; | 
| 213 |  |  |  |  |  |  | my $tc; | 
| 214 | 70 |  |  |  |  | 0 | my $reference; | 
| 215 | 70 |  |  |  |  | 0 | my $occurrence; | 
| 216 | 70 |  |  |  |  | 0 | my $max_tc; | 
| 217 | 70 |  |  |  |  | 0 | my $mono; | 
| 218 | 70 |  |  |  |  | 107 | my $offset = 0; | 
| 219 |  |  |  |  |  |  |  | 
| 220 |  |  |  |  |  |  |  | 
| 221 | 70 | 50 | 33 |  |  | 408 | if ((blessed($this)) && ($this->isa('Lingua::YaTeA::MultiWordPhrase'))) | 
| 222 |  |  |  |  |  |  | { | 
| 223 | 70 |  |  |  |  | 223 | $max_tc = $this->getTree(0)->getRoot->buildTermList(\@term_candidates,$this->getWords,$this->getOccurrences,$this->getIslandSet,\$offset,1); | 
| 224 |  |  |  |  |  |  | } | 
| 225 |  |  |  |  |  |  | else | 
| 226 |  |  |  |  |  |  | { | 
| 227 | 0 |  |  |  |  | 0 | $mono =  Lingua::YaTeA::MonolexicalTermCandidate->new; | 
| 228 | 0 |  |  |  |  | 0 | $mono->editKey("( " . $this->getWord(0)->getIF."<=S=".$this->getWord(0)->getPOS . "=" . $this->getWord(0)->getLF. "> )"); | 
| 229 | 0 |  |  |  |  | 0 | push @{$mono->getWords},$this->getWord(0); | 
|  | 0 |  |  |  |  | 0 |  | 
| 230 |  |  |  |  |  |  |  | 
| 231 | 0 |  |  |  |  | 0 | $mono->setOccurrences($this->getOccurrences,$offset,$this->getWord(0)->getLength,1); | 
| 232 | 0 |  |  |  |  | 0 | push @term_candidates, $mono; | 
| 233 | 0 |  |  |  |  | 0 | $max_tc = $mono; | 
| 234 |  |  |  |  |  |  | } | 
| 235 |  |  |  |  |  |  |  | 
| 236 | 70 |  |  |  |  | 240 | @term_candidates = sort ({$a->getLength <=> $b->getLength} @term_candidates); | 
|  | 452 |  |  |  |  | 950 |  | 
| 237 | 70 |  |  |  |  | 144 | foreach $tc (@term_candidates) | 
| 238 |  |  |  |  |  |  | { | 
| 239 |  |  |  |  |  |  | #print STDERR $tc->getIF . " : " .$tc->getLength . " -> "; | 
| 240 | 306 | 50 |  |  |  | 625 | if($tc->getLength < $tc_max_length) | 
| 241 |  |  |  |  |  |  | { | 
| 242 |  |  |  |  |  |  | #print STDERR " ajoute \n"; | 
| 243 | 306 |  |  |  |  | 570 | $tc->{ORIGINAL_PHRASE} = $this; | 
| 244 | 306 | 100 |  |  |  | 608 | if(!exists $term_candidates_h->{$tc->getKey}) | 
| 245 |  |  |  |  |  |  | { | 
| 246 | 240 |  |  |  |  | 621 | $tc->setHead; | 
| 247 |  |  |  |  |  |  | # TODO: change the criteria for the relevance of the term | 
| 248 |  |  |  |  |  |  | # currently: a tc receives the confidence rate of the groupe for which it is extracted | 
| 249 | 240 | 50 | 33 |  |  | 1307 | if ((blessed($this)) && ($this->isa('Lingua::YaTeA::MultiWordTermCandidate'))) | 
| 250 |  |  |  |  |  |  | { | 
| 251 | 0 |  |  |  |  | 0 | $tc->setReliability($this->getTree(0)->getReliability); | 
| 252 |  |  |  |  |  |  | } | 
| 253 |  |  |  |  |  |  | else | 
| 254 |  |  |  |  |  |  | { | 
| 255 | 240 |  |  |  |  | 542 | $tc->setReliability(0.5); | 
| 256 |  |  |  |  |  |  | } | 
| 257 | 240 |  |  |  |  | 528 | $term_candidates_h->{$tc->getKey} = $tc; | 
| 258 | 240 |  |  |  |  | 396 | $reference = $tc; | 
| 259 |  |  |  |  |  |  |  | 
| 260 |  |  |  |  |  |  |  | 
| 261 |  |  |  |  |  |  | # Correction Sophie Aubin 11/16/2007 | 
| 262 | 240 | 0 | 33 |  |  | 552 | if | 
|  |  |  | 0 |  |  |  |  | 
|  |  |  | 33 |  |  |  |  | 
|  |  |  | 0 |  |  |  |  | 
|  |  |  | 0 |  |  |  |  | 
| 263 |  |  |  |  |  |  | ( | 
| 264 |  |  |  |  |  |  | (defined $option_set->getOption('monolexical-included')) | 
| 265 |  |  |  |  |  |  | && | 
| 266 |  |  |  |  |  |  | ($option_set->getOption('monolexical-included')->getValue() == 1) | 
| 267 |  |  |  |  |  |  | && | 
| 268 |  |  |  |  |  |  | ((blessed($tc)) && ($tc->isa('Lingua::YaTeA::MonolexicalTermCandidate'))) | 
| 269 |  |  |  |  |  |  | && | 
| 270 |  |  |  |  |  |  | ( | 
| 271 |  |  |  |  |  |  | (!defined $option_set->getOption('monolexical-all')) | 
| 272 |  |  |  |  |  |  | || | 
| 273 |  |  |  |  |  |  | ($option_set->getOption('monolexical-all')->getValue() == 0) | 
| 274 |  |  |  |  |  |  | ) | 
| 275 |  |  |  |  |  |  | ) | 
| 276 |  |  |  |  |  |  | { | 
| 277 | 0 |  |  |  |  | 0 | $tc->addMonolexicalOccurrences($phrase_set,$monolexical_transfer_h) | 
| 278 |  |  |  |  |  |  | } | 
| 279 |  |  |  |  |  |  | } | 
| 280 |  |  |  |  |  |  | else | 
| 281 |  |  |  |  |  |  | { | 
| 282 | 66 |  |  |  |  | 149 | $reference =  $term_candidates_h->{$tc->getKey}; | 
| 283 | 66 |  |  |  |  | 172 | $reference->addOccurrences($tc->getOccurrences); | 
| 284 | 66 |  |  |  |  | 193 | $this->adjustReferences(\@term_candidates,$tc,$reference); | 
| 285 |  |  |  |  |  |  |  | 
| 286 |  |  |  |  |  |  | # add a frequency creteria for the confidence rate of the tcs | 
| 287 |  |  |  |  |  |  | } | 
| 288 |  |  |  |  |  |  |  | 
| 289 |  |  |  |  |  |  | # record the link between this phrase and the TC that covers it completely | 
| 290 | 306 | 100 |  |  |  | 728 | if($tc->getID == $max_tc->getID) | 
| 291 |  |  |  |  |  |  | { | 
| 292 | 70 |  |  |  |  | 184 | $mapping_from_phrases_to_TCs_h->{$this->getID} = $reference; | 
| 293 | 70 |  |  |  |  | 469 | $reference->{ORIGINAL_PHRASE} = $this; | 
| 294 |  |  |  |  |  |  | } | 
| 295 |  |  |  |  |  |  | } | 
| 296 |  |  |  |  |  |  | #else | 
| 297 |  |  |  |  |  |  | #{ | 
| 298 |  |  |  |  |  |  | # print STDERR " NON \n"; | 
| 299 |  |  |  |  |  |  | #} | 
| 300 |  |  |  |  |  |  | } | 
| 301 |  |  |  |  |  |  | } | 
| 302 |  |  |  |  |  |  |  | 
| 303 |  |  |  |  |  |  |  | 
| 304 |  |  |  |  |  |  | sub getID | 
| 305 |  |  |  |  |  |  | { | 
| 306 | 92 |  |  | 92 | 1 | 176 | my ($this) = @_; | 
| 307 | 92 |  |  |  |  | 350 | return $this->{ID}; | 
| 308 |  |  |  |  |  |  | } | 
| 309 |  |  |  |  |  |  |  | 
| 310 |  |  |  |  |  |  | sub getTestifiedTerms | 
| 311 |  |  |  |  |  |  | { | 
| 312 | 209 |  |  | 209 | 1 | 359 | my ($this) = @_; | 
| 313 | 209 |  |  |  |  | 868 | return $this->{TESTIFIED_TERMS}; | 
| 314 |  |  |  |  |  |  | } | 
| 315 |  |  |  |  |  |  |  | 
| 316 |  |  |  |  |  |  | sub addTestifiedTerms | 
| 317 |  |  |  |  |  |  | { | 
| 318 | 3 |  |  | 3 | 1 | 9 | my ($this,$term_frontiers_h,$testified_term_set,$fh) = @_; | 
| 319 | 3 |  |  |  |  | 27 | my $testified; | 
| 320 |  |  |  |  |  |  | my @index; | 
| 321 | 3 |  |  |  |  | 0 | my $index; | 
| 322 | 3 |  |  |  |  | 0 | my $key; | 
| 323 | 3 |  |  |  |  | 9 | $this->{TESTIFIED_TERMS} = {}; | 
| 324 | 3 |  |  |  |  | 14 | foreach my $tt_mark (values (%$term_frontiers_h)) | 
| 325 |  |  |  |  |  |  | { | 
| 326 | 3 |  |  |  |  | 15 | $index = $tt_mark->getStart; | 
| 327 |  |  |  |  |  |  |  | 
| 328 | 3 | 50 |  |  |  | 44 | if (defined $index) { | 
| 329 | 3 |  |  |  |  | 16 | while ($index < $tt_mark->getEnd) | 
| 330 |  |  |  |  |  |  | { | 
| 331 | 6 |  |  |  |  | 25 | push @index, $index++; | 
| 332 |  |  |  |  |  |  | } | 
| 333 | 3 |  |  |  |  | 14 | $key = join("-",@index); | 
| 334 | 3 |  |  |  |  | 8 | push @{$this->getTestifiedTerms->{$key}}, $testified_term_set->getTestifiedTerms->{$tt_mark->getTestifiedID}; | 
|  | 3 |  |  |  |  | 12 |  | 
| 335 | 3 |  |  |  |  | 13 | @index = (); | 
| 336 |  |  |  |  |  |  | } | 
| 337 |  |  |  |  |  |  | } | 
| 338 |  |  |  |  |  |  |  | 
| 339 |  |  |  |  |  |  | } | 
| 340 |  |  |  |  |  |  |  | 
| 341 |  |  |  |  |  |  |  | 
| 342 |  |  |  |  |  |  |  | 
| 343 |  |  |  |  |  |  |  | 
| 344 |  |  |  |  |  |  | sub getIndexSet | 
| 345 |  |  |  |  |  |  | { | 
| 346 | 235 |  |  | 235 | 1 | 443 | my ($this) = @_; | 
| 347 | 235 |  |  |  |  | 867 | return $this->{INDEX_SET}; | 
| 348 |  |  |  |  |  |  | } | 
| 349 |  |  |  |  |  |  |  | 
| 350 |  |  |  |  |  |  |  | 
| 351 |  |  |  |  |  |  | sub addOccurrences | 
| 352 |  |  |  |  |  |  | { | 
| 353 | 0 |  |  | 0 | 1 | 0 | my ($this,$occurrences_a) = @_; | 
| 354 | 0 |  |  |  |  | 0 | my $occurrence; | 
| 355 |  |  |  |  |  |  |  | 
| 356 | 0 |  |  |  |  | 0 | foreach $occurrence (@$occurrences_a) | 
| 357 |  |  |  |  |  |  | { | 
| 358 | 0 | 0 |  |  |  | 0 | if($occurrence->isMaximal) | 
| 359 |  |  |  |  |  |  | { | 
| 360 | 0 |  |  |  |  | 0 | $this->{MNP_STATUS} = 1;  # added by SA 13/02/2009:: if at least one occurrence is a MNP, Phrase is a MNP | 
| 361 |  |  |  |  |  |  | } | 
| 362 | 0 |  |  |  |  | 0 | $this->addExistingOccurrence($occurrence); | 
| 363 |  |  |  |  |  |  | } | 
| 364 |  |  |  |  |  |  | } | 
| 365 |  |  |  |  |  |  |  | 
| 366 |  |  |  |  |  |  | sub adjustReferences | 
| 367 |  |  |  |  |  |  | { | 
| 368 | 66 |  |  | 66 | 1 | 121 | my ($this,$term_candidates_a,$current,$reference) = @_; | 
| 369 | 66 |  |  |  |  | 106 | my $term_candidate; | 
| 370 |  |  |  |  |  |  | my $island; | 
| 371 |  |  |  |  |  |  |  | 
| 372 | 66 |  |  |  |  | 109 | foreach $term_candidate (@$term_candidates_a) | 
| 373 |  |  |  |  |  |  | { | 
| 374 | 324 | 100 | 66 |  |  | 1545 | if ((blessed($term_candidate)) && ($term_candidate->isa('Lingua::YaTeA::MultiWordTermCandidate'))) | 
| 375 |  |  |  |  |  |  | { | 
| 376 | 129 | 100 |  |  |  | 295 | if($term_candidate->getRootHead->getID == $current->getID) | 
| 377 |  |  |  |  |  |  | { | 
| 378 | 28 |  |  |  |  | 55 | $term_candidate->{ROOT_HEAD} = $reference; | 
| 379 | 28 |  |  |  |  | 72 | $reference->setROOT($term_candidate); | 
| 380 |  |  |  |  |  |  | } | 
| 381 | 129 | 100 |  |  |  | 288 | if($term_candidate->getRootModifier->getID == $current->getID) | 
| 382 |  |  |  |  |  |  | { | 
| 383 | 37 |  |  |  |  | 70 | $term_candidate->{ROOT_MODIFIER} = $reference; | 
| 384 | 37 |  |  |  |  | 79 | $reference->setROOT($term_candidate); | 
| 385 |  |  |  |  |  |  | } | 
| 386 |  |  |  |  |  |  | } | 
| 387 |  |  |  |  |  |  | } | 
| 388 |  |  |  |  |  |  |  | 
| 389 |  |  |  |  |  |  | } | 
| 390 |  |  |  |  |  |  |  | 
| 391 |  |  |  |  |  |  |  | 
| 392 |  |  |  |  |  |  | sub addExistingOccurrence | 
| 393 |  |  |  |  |  |  | { | 
| 394 | 0 |  |  | 0 | 1 |  | my ($this,$occurrence) = @_; | 
| 395 | 0 |  |  |  |  |  | push @{$this->{OCCURRENCES}}, $occurrence; | 
|  | 0 |  |  |  |  |  |  | 
| 396 |  |  |  |  |  |  | } | 
| 397 |  |  |  |  |  |  |  | 
| 398 |  |  |  |  |  |  |  | 
| 399 |  |  |  |  |  |  | sub getWordIndex | 
| 400 |  |  |  |  |  |  | { | 
| 401 | 0 |  |  | 0 | 1 |  | my ($this,$word) = @_; | 
| 402 | 0 |  |  |  |  |  | my $w; | 
| 403 | 0 |  |  |  |  |  | my $i = 0; | 
| 404 | 0 |  |  |  |  |  | foreach $w (@{$this->getWords}) | 
|  | 0 |  |  |  |  |  |  | 
| 405 |  |  |  |  |  |  | { | 
| 406 | 0 | 0 |  |  |  |  | if($w == $word) | 
| 407 |  |  |  |  |  |  | { | 
| 408 | 0 |  |  |  |  |  | return $i; | 
| 409 |  |  |  |  |  |  | } | 
| 410 | 0 |  |  |  |  |  | $i++; | 
| 411 |  |  |  |  |  |  | } | 
| 412 |  |  |  |  |  |  | } | 
| 413 |  |  |  |  |  |  |  | 
| 414 |  |  |  |  |  |  |  | 
| 415 |  |  |  |  |  |  | 1; | 
| 416 |  |  |  |  |  |  |  | 
| 417 |  |  |  |  |  |  | __END__ |