|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Lingua::YaTeA::MultiWordPhrase;  | 
| 
2
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
35
 | 
 use strict;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
59
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
151
 | 
    | 
| 
3
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
26
 | 
 use warnings;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
137
 | 
    | 
| 
4
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
2177
 | 
 use Lingua::YaTeA::Phrase;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
57
 | 
    | 
| 
5
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
2487
 | 
 use Lingua::YaTeA::MultiWordUnit;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
56
 | 
    | 
| 
6
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
2443
 | 
 use Lingua::YaTeA::Tree;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
61
 | 
    | 
| 
7
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
202
 | 
 use Lingua::YaTeA::IndexSet;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
    | 
| 
8
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
150
 | 
 use UNIVERSAL;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
    | 
| 
9
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
132
 | 
 use Scalar::Util qw(blessed);  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
218
 | 
    | 
| 
10
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
29
 | 
 use Data::Dumper;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
183
 | 
    | 
| 
11
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
29
 | 
 use NEXT;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
    | 
| 
12
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
166
 | 
 use base qw(Lingua::YaTeA::Phrase Lingua::YaTeA::MultiWordUnit);  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1063
 | 
    | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
14
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
2418
 | 
 use Encode qw(:fallbacks);;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38443
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18389
 | 
    | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $counter = 0;  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $parsed = 0;  | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $VERSION=$Lingua::YaTeA::VERSION;  | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
22
 | 
81
 | 
 
 | 
 
 | 
  
81
  
 | 
  
1
  
 | 
343
 | 
     my ($class_or_object,$num_content_words,$words_a,$tag_set) = @_;  | 
| 
23
 | 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
150
 | 
     my $this = shift;  | 
| 
24
 | 
81
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
266
 | 
     $this = bless {}, $this unless ref $this;  | 
| 
25
 | 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
230
 | 
     $this->{ISLAND_SET} = ();  | 
| 
26
 | 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
564
 | 
     $this->NEXT::new(@_);  | 
| 
27
 | 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
270
 | 
      return $this;  | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub searchEndogenousIslands  | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
35
 | 
27
 | 
 
 | 
 
 | 
  
27
  
 | 
  
1
  
 | 
101
 | 
     my ($this,$phrase_set,$chunking_data,$tag_set,$lexicon,$sentence_set,$fh) = @_;  | 
| 
36
 | 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
114
 | 
     my $sub_indexes_set_a = $this->getIndexSet->searchSubIndexesSet($this->getWords,$chunking_data,$tag_set,$lexicon,$sentence_set);  | 
| 
37
 | 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
76
 | 
     my $sub_index;  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $source_a;  | 
| 
39
 | 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
70
 | 
     my $corrected = 0;  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
41
 | 
27
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
97
 | 
     if(scalar  @$sub_indexes_set_a > 0)  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
43
 | 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
69
 | 
 	foreach $sub_index (@$sub_indexes_set_a)  | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      | 
| 
46
 | 
277
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
639
 | 
 	    if(  | 
| 
 
 | 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		(!defined $this->getIslandSet)  | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		||  | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		(  | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		 (! $this->getIslandSet->existIsland($sub_index))  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		 &&  | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		 (! $this->getIslandSet->existLargerIsland($sub_index))  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		)  | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		)  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
56
 | 
259
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
641
 | 
 		if($source_a = $phrase_set->searchFromIF($sub_index->buildIFSequence($this->getWords)))  | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
58
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 		    $this->makeIsland($sub_index,$source_a,'endogenous','IF',$tag_set,$lexicon,$sentence_set,$fh);  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		else  | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
62
 | 
258
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
643
 | 
 		    if($source_a = $phrase_set->searchFromLF($sub_index->buildLFSequence($this->getWords)))  | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    {  | 
| 
64
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
35
 | 
 			$this->makeIsland($sub_index,$source_a,'endogenous','LF',$tag_set,$lexicon,$sentence_set,$fh);  | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    }  | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub sortIslands  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
76
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
0
  
 | 
0
 | 
     my ($a,$b,$parsing_direction,$fh) = @_;  | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # print $fh "a: " ;  | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #     $a->getIndexSet->print($fh);  | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #     print $fh " : " .$a->gapSize . "\n";   | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #     print $fh "b: " ;  | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #     $b->getIndexSet->print($fh);  | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #     print $fh " : " .$b->gapSize . "\n";   | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      | 
| 
84
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     if($parsing_direction eq "LEFT")  | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
86
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	if($a->getIndexSet->getFirst == $b->getIndexSet->getFirst)  | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
88
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	   return $b->gapSize <=> $a->gapSize;  | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
92
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    return $a->getIndexSet->getFirst <=> $b->getIndexSet->getFirst;  | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else  | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
97
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	if($parsing_direction eq "RIGHT")  | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
99
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    if($a->getIndexSet->getLast == $b->getIndexSet->getLast)  | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
101
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		return $b->gapSize <=> $a->gapSize;  | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    else  | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
105
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		return $b->getIndexSet->getLast <=> $a->getIndexSet->getLast;  | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub integrateIslands  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   #  my ($this,$chunking_data,$tag_set,$lexicon,$parsing_direction,$sentence_set,$fh) = @_;  | 
| 
117
 | 
11
 | 
 
 | 
 
 | 
  
11
  
 | 
  
0
  
 | 
49
 | 
     my ($this,$tag_set,$lexicon,$parsing_direction,$sentence_set,$fh) = @_;  | 
| 
118
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
     my $test;  | 
| 
119
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
     my $corrected = 0;  | 
| 
120
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
     my $island;  | 
| 
121
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
     my @islands = values %{$this->getIslandSet->getIslands};  | 
| 
 
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
    | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #@islands = sort({$a->getIndexSet->getSize <=> $b->getIndexSet->getSize} @islands);  | 
| 
123
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
39
 | 
     @islands = sort({&sortIslands($a,$b,$parsing_direction,$fh)} @islands);  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
124
 | 
11
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
108
 | 
     if ((blessed($this)) && ($this->isa('Lingua::YaTeA::MultiWordPhrase')))  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
126
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
35
 | 
 	foreach $island (@islands)  | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #	    print $fh "integrate essai " . $island->getIF . "\n";  | 
| 
129
 | 
11
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
45
 | 
 	    if($island->isIntegrated == 0)  | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
131
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
43
 | 
 		$test = $this->integrateIsland($island,$tag_set,$lexicon,$sentence_set,$fh);  | 
| 
132
 | 
9
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
37
 | 
 		if($test == 1)  | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
134
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
 		    $corrected = 1;  | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		#print $fh "apres l'ilot " . $island->getIF . "\n";  | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #	    $this->printForest($fh);  | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
141
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
66
 | 
     return ($this->checkParseCompleteness($fh),$corrected);  | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub integrateIsland  | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
149
 | 
9
 | 
 
 | 
 
 | 
  
9
  
 | 
  
1
  
 | 
32
 | 
     my ($this,$island,$tagset,$lexicon,$sentence_set,$fh) = @_;  | 
| 
150
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
     my $i;  | 
| 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $tree;   | 
| 
152
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
     my $node_sets_a = $island->importNodeSets;     | 
| 
153
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
     my @new_trees;  | 
| 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $new;  | 
| 
155
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
     my $integrated_at_least_once = 0;  | 
| 
156
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
     my $success;  | 
| 
157
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
     my $corrected = 0;  | 
| 
158
 | 
9
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
37
 | 
     if(!defined $this->getForest)  | 
| 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
160
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
50
 | 
 	$tree = Lingua::YaTeA::Tree->new;  | 
| 
161
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
63
 | 
 	$tree->setSimplifiedIndexSet($this->getIndexSet);  | 
| 
162
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41
 | 
 	$this->addTree($tree);  | 
| 
163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
165
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
     while ($tree = pop @{$this->getForest})  | 
| 
 
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
56
 | 
    | 
| 
166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#print $fh "essaie dans arebre :" . $tree . "\n";  | 
| 
168
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38
 | 
 	($success) = $tree->integrateIslandNodeSets($node_sets_a,$island->getIndexSet,\@new_trees,$this->getWords,$tagset,$fh);  | 
| 
169
 | 
9
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
32
 | 
 	if($success == 1)  | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
171
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
 	    $integrated_at_least_once = 1;  | 
| 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
175
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
     while ($new = pop @new_trees)  | 
| 
176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#print $fh "pop new ici :" . $new . "\n";  | 
| 
178
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
 	$this->addTree($new);  | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
181
 | 
9
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
42
 | 
     if($integrated_at_least_once == 1)  | 
| 
182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
183
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
 	$island->{INTEGRATED} = 1;  | 
| 
184
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
43
 | 
 	$corrected = $this->correctPOSandLemma($island,$lexicon,$sentence_set,$fh);  | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else  | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
188
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$this->removeIsland($island,$fh);  | 
| 
189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
190
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
59
 | 
     return $corrected;  | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub correctPOSandLemma  | 
| 
194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
195
 | 
9
 | 
 
 | 
 
 | 
  
9
  
 | 
  
1
  
 | 
28
 | 
     my ($this,$island,$lexicon,$sentence_set,$fh) = @_;  | 
| 
196
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
     my $i;  | 
| 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $index;  | 
| 
198
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
     my $corrected = 0;  | 
| 
199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
200
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
     for ($i=0; $i< scalar @{$island->getIndexSet->getIndexes}; $i++)  | 
| 
 
 | 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
73
 | 
    | 
| 
201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
202
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
52
 | 
 	$index = $island->getIndexSet->getIndexes->[$i];  | 
| 
203
 | 
20
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
56
 | 
 	if (defined ($island->getSource->getWord($i))) {  | 
| 
204
 | 
20
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
61
 | 
 	    if  ($island->getSource->getWord($i)->getPOS ne $this->getWord($index)->getPOS)  | 
| 
205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		#print $fh  $island->getSource->getWord($i)->getPOS . " !=" .$this->getWord($index)->getPOS . "\n";   | 
| 
207
 | 
9
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
29
 | 
 		if(lc($island->getSource->getWord($i)->getIF) eq lc($this->getWord($index)->getIF))  | 
| 
208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    | 
| 
210
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
22
 | 
 		    if($this->isCorrectedWord($index) == 0) # added by SA (29/08/2008) : a word can be corrected only once  | 
| 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    {  | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			#print $fh lc($island->getSource->getWord($i)->getIF) . "=" .  lc($this->getWord($index)->getIF) . "=> corrige\n";  | 
| 
213
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
 			$this->correctWord($index,$island->getSource->getWord($i),"POS",$lexicon,$sentence_set);  | 
| 
214
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 			push @{$this->{CORRECTED_WORDS}}, $index;  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
    | 
| 
215
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
 			$corrected = 1;  | 
| 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    }  | 
| 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
219
 | 
20
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
72
 | 
 	    if($island->getSource->getWord($i)->getLF ne $this->getWord($index)->getLF)  | 
| 
220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
221
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		 if($this->isCorrectedWord($index) == 0) # added by SA (29/08/2008) : a word can be corrected only once  | 
| 
222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		 {  | 
| 
223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    # print $fh  $island->getSource->getWord($i)->getLF . " !=" .$this->getWord($index)->getLF . "=>corrige\n";   | 
| 
224
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		     $this->correctWord($index,$island->getSource->getWord($i),"LF",$lexicon,$sentence_set);  | 
| 
225
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		     push @{$this->{CORRECTED_WORDS}}, $index;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
226
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		     $corrected = 1;  | 
| 
227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		 }  | 
| 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	} else {  | 
| 
230
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    warn "Word undefined\n";  | 
| 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
233
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
     return $corrected;  | 
| 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # added by SA (29/08/2008) : check if a word has already been corrected  | 
| 
237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub isCorrectedWord  | 
| 
238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
239
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
0
  
 | 
29
 | 
     my ($this,$index) = @_;  | 
| 
240
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
     my $i;  | 
| 
241
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
21
 | 
     if(defined $this->getCorrectedWords)  | 
| 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
243
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	foreach $i (@{$this->getCorrectedWords})  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
245
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    if($i == $index)  | 
| 
246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
247
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		return 1;  | 
| 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
251
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
     return 0;  | 
| 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getCorrectedWords  | 
| 
255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
256
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
0
  
 | 
12
 | 
     my ($this) = @_;  | 
| 
257
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
     return $this->{CORRECTED_WORDS};  | 
| 
258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub correctWord  | 
| 
261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
262
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
17
 | 
     my ($this,$index,$standard,$type,$lexicon,$sentence_set) = @_;  | 
| 
263
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
     my $form;  | 
| 
264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $new_word;  | 
| 
265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
266
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
15
 | 
     if($type eq "POS")  | 
| 
267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
268
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
 	$form = $this->{WORDS}->[$index]->getIF . "\t" . $standard->getPOS .  "\t" . $this->{WORDS}->[$index]->getLF;  | 
| 
269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else  | 
| 
272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
273
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$form = $this->{WORDS}->[$index]->getIF . "\t" . $this->{WORDS}->[$index]->getPOS .  "\t" . $standard->getLF;  | 
| 
274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
276
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
37
 | 
     $new_word = Lingua::YaTeA::WordFromCorpus->new($form,$lexicon,$sentence_set);  | 
| 
277
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
     $this->{WORDS}->[$index] = $new_word->getLexItem;  | 
| 
278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getIslandSet  | 
| 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
284
 | 
609
 | 
 
 | 
 
 | 
  
609
  
 | 
  
1
  
 | 
1089
 | 
     my ($this) = @_;  | 
| 
285
 | 
609
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1852
 | 
     return $this->{ISLAND_SET};  | 
| 
286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub checkMaximumLength  | 
| 
292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
293
 | 
72
 | 
 
 | 
 
 | 
  
72
  
 | 
  
1
  
 | 
168
 | 
     my ($this,$max_length) = @_;  | 
| 
294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
295
 | 
72
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
273
 | 
     if($this->getLength > $max_length)  | 
| 
296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
297
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	return 0;  | 
| 
298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
299
 | 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
236
 | 
     return 1;  | 
| 
300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
302
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub existIsland  | 
| 
305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
306
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
     my ($this,$index) = @_;  | 
| 
307
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     if(! defined $this->getIslandSet)  | 
| 
308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
309
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	return 0;  | 
| 
310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
311
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return $this->getIslandSet->existIsland($index);  | 
| 
312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub makeIsland  | 
| 
315
 | 
9
 | 
 
 | 
 
 | 
  
9
  
 | 
  
1
  
 | 
46
 | 
 {    my ($this,$index,$source_a,$type,$access,$tag_set,$lexicon,$sentence_set,$fh) = @_;  | 
| 
316
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
43
 | 
     my $source;  | 
| 
317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $s;  | 
| 
318
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $island;  | 
| 
319
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
      my $corrected;  | 
| 
320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        | 
| 
321
 | 
9
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
36
 | 
      if($type eq "endogenous")  | 
| 
322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      {  | 
| 
323
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
40
 | 
 	 $source = $index->chooseBestSource($source_a,$this->getWords,$tag_set);  | 
| 
324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      }  | 
| 
325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      else  | 
| 
326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      {  | 
| 
327
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	$source = $source_a->[0];   | 
| 
328
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      }  | 
| 
329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #we verify if the island is multi-word phrase  | 
| 
331
 | 
9
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
122
 | 
      if ((blessed($source)) && ($source->isa('Lingua::YaTeA::MultiWordPhrase')))  | 
| 
332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      {     | 
| 
333
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
83
 | 
 	 $island = Lingua::YaTeA::Island->new($index,$type,$source);   | 
| 
334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        | 
| 
335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   | 
| 
336
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
39
 | 
 	 $this->addIsland($island,$fh);  | 
| 
337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      }  | 
| 
338
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      # if($this->isa('Lingua::YaTeA::MultiWordPhrase'))  | 
| 
340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #      {  | 
| 
341
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   | 
| 
342
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # 	 $corrected = $this->integrateIsland($island,$tag_set,$lexicon,$sentence_set,$fh);  | 
| 
343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #      }  | 
| 
344
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #     print $fh "coorected:" . $corrected ;  | 
| 
345
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
46
 | 
      return $corrected;  | 
| 
346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub removeIsland  | 
| 
350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
351
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
     my ($this,$island,$fh) = @_;  | 
| 
352
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $this->getIslandSet->removeIsland($island,$fh);  | 
| 
353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub addIsland  | 
| 
356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
357
 | 
9
 | 
 
 | 
 
 | 
  
9
  
 | 
  
1
  
 | 
29
 | 
     my ($this,$island,$fh) = @_;  | 
| 
358
 | 
9
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
27
 | 
     if(!defined $this->getIslandSet)  | 
| 
359
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
360
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
70
 | 
 	$this->{ISLAND_SET} = Lingua::YaTeA::IslandSet->new;  | 
| 
361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
362
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
     $this->getIslandSet->addIsland($island,$fh);  | 
| 
363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
366
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getParsablePotentialIslands  | 
| 
367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
368
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
  
1
  
 | 
10
 | 
     my ($this,$parsing_pattern_set,$tag_set,$parsing_direction) = @_;  | 
| 
369
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
     my %potential_islands;  | 
| 
370
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $concurrent_set_a;  | 
| 
371
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $concurrent;  | 
| 
372
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $key;  | 
| 
373
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     while (($key,$concurrent_set_a) = each (%{$this->getTestifiedTerms}))  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
    | 
| 
374
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
375
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# islands can be created only from MultiWordTestifiedTerm  | 
| 
376
 | 
3
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
44
 | 
 	if ((blessed($concurrent_set_a->[0])) && ($concurrent_set_a->[0]->isa('Lingua::YaTeA::MultiWordTestifiedTerm')))  | 
| 
377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
378
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
 	    foreach $concurrent (@$concurrent_set_a)  | 
| 
379
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# filter 1a : only testified terms that have a length inferior or equal to that of the phrase are kept  | 
| 
381
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
16
 | 
 		if($concurrent->getLength <= $this->getLength)  | 
| 
382
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    # filter 1b : only testified term that have a parse are kept    | 
| 
384
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
17
 | 
 		    if($concurrent->getIfParsable($parsing_pattern_set,$tag_set,$parsing_direction))  | 
| 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    {  | 
| 
386
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 			push @{$potential_islands{$key}}, $concurrent;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
    | 
| 
387
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    }  | 
| 
388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
390
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
393
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     return \%potential_islands;  | 
| 
394
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getBestExogenousIslands  | 
| 
398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
399
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
  
1
  
 | 
9
 | 
     my ($this,$potential_islands_h) = @_;  | 
| 
400
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
     my $concurrent_set_a;  | 
| 
401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $concurrent;  | 
| 
402
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $key;  | 
| 
403
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my %preselected_islands;  | 
| 
404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
405
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
     while (($key,$concurrent_set_a) = each (%$potential_islands_h))  | 
| 
406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
407
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# if more than one testified terms exist for a given span of text  | 
| 
408
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
12
 | 
 	if(scalar @$concurrent_set_a > 1)   | 
| 
409
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
410
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    $preselected_islands{$key} =  $this->orderConcurrentPotentialIslands($key,$concurrent_set_a,\%preselected_islands);  | 
| 
411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      | 
| 
412
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
413
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else  | 
| 
414
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
415
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
 	    $preselected_islands{$key} = $concurrent_set_a->[0];  | 
| 
416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
417
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
418
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     return \%preselected_islands;  | 
| 
419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
421
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub searchExogenousIslands  | 
| 
422
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
423
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
  
1
  
 | 
11
 | 
     my ($this,$parsing_pattern_set,$tag_set,$parsing_direction,$lexicon,$sentence_set) = @_;  | 
| 
424
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
     my $potential_islands_h;  | 
| 
425
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $preselected_islands_h;  | 
| 
426
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $key;  | 
| 
427
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
     my $corrected = 0;  | 
| 
428
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
429
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
     $potential_islands_h = $this->getParsablePotentialIslands($parsing_pattern_set,$tag_set,$parsing_direction);  | 
| 
430
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
     $preselected_islands_h = $this->getBestExogenousIslands($potential_islands_h);  | 
| 
431
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
432
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
433
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     my @source;  | 
| 
434
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
     foreach $key (sort ({$this->sortIslandKeys($a,$b)} keys %$preselected_islands_h))  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
435
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
436
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
 	my $index = Lingua::YaTeA::IndexSet->new;  | 
| 
437
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 	@{$index->{INDEXES}} = split /-/, $key;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
    | 
| 
438
 | 
3
 | 
  
 50
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
14
 | 
 	if  | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
439
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    (  | 
| 
440
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     (!defined $this->getIslandSet)  | 
| 
441
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     ||  | 
| 
442
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     (  | 
| 
443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      (! $this->getIslandSet->existIsland($index))  | 
| 
444
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      &&  | 
| 
445
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      (! $this->getIslandSet->existLargerIsland($index))  | 
| 
446
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     )  | 
| 
447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    )  | 
| 
448
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
449
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 	    $source[0] = $preselected_islands_h->{$key};  | 
| 
450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      | 
| 
451
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # 	    if($this->makeIsland($index,\@source,'exogenous','UNKNOWN',$tag_set,$lexicon,$sentence_set) == 1)  | 
| 
452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # 	    {  | 
| 
453
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # 		$corrected =1;  | 
| 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # 	    }  | 
| 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      | 
| 
456
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
 	    $this->makeIsland($index,\@source,'exogenous','UNKNOWN',$tag_set,$lexicon,$sentence_set);  | 
| 
457
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}      | 
| 
458
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
459
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #$this->printIslands(*STDERR);  | 
| 
460
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    # return ($this->checkParseCompleteness,$corrected);  | 
| 
461
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
462
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
463
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub plugInternalFreeNodes  | 
| 
464
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
465
 | 
27
 | 
 
 | 
 
 | 
  
27
  
 | 
  
1
  
 | 
110
 | 
     my ($this,$parsing_pattern_set,$parsing_direction,$tag_set,$fh) = @_;  | 
| 
466
 | 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
337
 | 
     my $island;  | 
| 
467
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $key;  | 
| 
468
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
469
 | 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $tree;  | 
| 
470
 | 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $tree_updated;  | 
| 
471
 | 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $unplugged_a;  | 
| 
472
 | 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $unplugged;  | 
| 
473
 | 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $unplugged_index_set;  | 
| 
474
 | 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my %unexploitable_islands;  | 
| 
475
 | 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my @tmp_forest;  | 
| 
476
 | 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my @new_trees;  | 
| 
477
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
478
 | 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $free_nodes_a;  | 
| 
479
 | 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $new_plugging;  | 
| 
480
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    print $fh "plugInternalFreeNodes\n";  | 
| 
481
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
482
 | 
27
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
122
 | 
     if(defined $this->getForest)  | 
| 
483
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
484
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #	print $fh "nb arbres: " . scalar @{$this->getForest} . "\n";  | 
| 
485
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
 	foreach  $tree (@{$this->getForest})  | 
| 
 
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
    | 
| 
486
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
487
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #	    print $fh "TREE: ". $tree ."\n";  | 
| 
488
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
 	    $tree_updated = 0;  | 
| 
489
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      | 
| 
490
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
 	    $new_plugging = 1;  | 
| 
491
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
 	    while ($new_plugging == 1)  | 
| 
492
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
493
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
 		$new_plugging = $tree->plugNodePairs($parsing_pattern_set,$parsing_direction,$tag_set,$this->getWords,$fh);  | 
| 
494
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
495
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
 	    $tree->completeDiscontinuousNodes($parsing_pattern_set,$parsing_direction,$tag_set,$this->getWords,$fh);  | 
| 
496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #	    print $fh "avant removeDiscontinuousNodes\n";  | 
| 
497
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #	    $tree->print($this->getWords,$fh);  | 
| 
498
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
35
 | 
 	    ($tree_updated,$unplugged_a) = $tree->removeDiscontinuousNodes($this->getWords,$fh);  | 
| 
499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      | 
| 
500
 | 
8
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
34
 | 
 	    if($tree_updated == 1)  | 
| 
501
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
502
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #		print $fh "tree upodate " .$tree . "\n";  | 
| 
503
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #		$tree->print($this->getWords,$fh);  | 
| 
504
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		if(scalar @{$tree->getNodeSet->getNodes} > 0)  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
506
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		    $tree->updateIndexes($this->getIndexSet,$this->getWords);  | 
| 
507
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		    if(scalar @$unplugged_a > 0)  | 
| 
508
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    {  | 
| 
509
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #			print $fh "ya a des unplugged\n";  | 
| 
510
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			foreach $unplugged (@$unplugged_a)  | 
| 
511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			{  | 
| 
512
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #			    print $fh "unpl: " . $unplugged->getID . "\n";  | 
| 
513
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			    $free_nodes_a = $tree->getNodeSet->searchFreeNodes($this->getWords);  | 
| 
514
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			    $unplugged->hitchMore($free_nodes_a,$tree,$this->getWords,$fh);  | 
| 
515
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
516
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    }  | 
| 
517
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #		    print $fh "push " . $tree . "\n";  | 
| 
518
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #		    $tree->print($this->getWords,$fh);  | 
| 
519
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		    push @tmp_forest, $tree;  | 
| 
520
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		      | 
| 
521
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
522
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
523
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
524
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    else  | 
| 
525
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
526
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30
 | 
 		push @tmp_forest, $tree;  | 
| 
527
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
528
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
529
 | 
8
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
31
 | 
 	if(scalar @tmp_forest > 0)  | 
| 
530
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
531
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #	    print $fh "redefinition forest\n";  | 
| 
532
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      | 
| 
533
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
 	    @{$this->{FOREST}} = @tmp_forest;  | 
| 
 
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
37
 | 
    | 
| 
534
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #	    $this->printForest($fh);  | 
| 
535
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    #@{$this->getForest} = @tmp_forest;  | 
| 
536
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
537
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
538
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else  | 
| 
539
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
540
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    undef $this->{FOREST};  | 
| 
541
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
542
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
543
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        | 
| 
544
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
545
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
546
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
547
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
548
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
549
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
551
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
552
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub checkParseCompleteness  | 
| 
553
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
554
 | 
11
 | 
 
 | 
 
 | 
  
11
  
 | 
  
1
  
 | 
35
 | 
     my ($this,$fh) = @_;  | 
| 
555
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41
 | 
     my @uncomplete_trees;  | 
| 
556
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my @complete_trees;  | 
| 
557
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $tree;  | 
| 
558
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
     my $parsed =0;  | 
| 
559
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
560
 | 
11
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
47
 | 
     if(!defined $this->getForest)  | 
| 
561
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
562
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	return 0;  | 
| 
563
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
564
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else  | 
| 
565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
566
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
 	while ($tree = pop @{$this->getForest})  | 
| 
 
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
50
 | 
    | 
| 
567
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
568
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #	    print $fh "pop : ". $tree . "\n";  | 
| 
569
 | 
11
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
51
 | 
 	    if($tree->getSimplifiedIndexSet->getSize == 1)  | 
| 
570
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
571
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$parsed = 1;  | 
| 
572
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$tree->setHead;  | 
| 
573
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$tree->setReliability(3);  | 
| 
574
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		push @complete_trees, $tree;  | 
| 
575
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
576
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    else  | 
| 
577
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
578
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
35
 | 
 		push @uncomplete_trees, $tree;  | 
| 
579
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
580
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
581
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
582
 | 
11
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
43
 | 
     if($parsed == 1)  | 
| 
583
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
584
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	@{$this->{FOREST}} = @complete_trees;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
585
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	return 1;  | 
| 
586
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
587
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else  | 
| 
588
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
589
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
 	@{$this->{FOREST}} = @uncomplete_trees;  | 
| 
 
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
34
 | 
    | 
| 
590
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
55
 | 
 	return 0;  | 
| 
591
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
592
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
593
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
594
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub orderConcurrentPotentialIslands  | 
| 
595
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
596
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
     my ($this,$key,$concurrent_set_a) = @_;  | 
| 
597
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $concurrent;  | 
| 
598
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $inflected_score;  | 
| 
599
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my %inflected_form_scores;  | 
| 
600
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my @sorted_scores;  | 
| 
601
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $best_set_a;  | 
| 
602
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
603
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # filter 2 : compare inflected forms  | 
| 
604
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     foreach $concurrent (@$concurrent_set_a)  | 
| 
605
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
606
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$inflected_score = $this->compareInflectedFormWithTestified($concurrent,$key);  | 
| 
607
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	push @{$inflected_form_scores{$inflected_score}}, $concurrent;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
608
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
609
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     @sorted_scores = sort ({ $b <=> $a } keys (%inflected_form_scores)) ;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
610
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
611
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $best_set_a = $inflected_form_scores{$sorted_scores[0]};  | 
| 
612
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
613
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # filter 3 : compare POS sequence  | 
| 
614
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # if several testified terms have the same inflected and lemmatized forms  | 
| 
615
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     if(scalar @$best_set_a > 1)  | 
| 
616
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
617
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	@$best_set_a = sort ({$this->sortPotentialIslandsAccordingToPOS($a,$b,$key)} @$best_set_a) ;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
618
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          | 
| 
619
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
620
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
621
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return $best_set_a->[0];  | 
| 
622
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
623
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
624
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
625
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
626
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub sortIslandKeys  | 
| 
627
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
628
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
     my ($this,$first,$second) = @_;  | 
| 
629
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my @first_index = split /-/, $first;  | 
| 
630
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my @second_index = split /-/, $second;  | 
| 
631
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return (scalar @second_index <=> scalar @first_index);  | 
| 
632
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
633
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
634
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
635
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub sortPotentialIslandsAccordingToPOS  | 
| 
636
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
637
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
     my ($this,$first,$second,$key) = @_;  | 
| 
638
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return ($this->comparePOSWithTestified($second,$key) <=> $this->comparePOSWithTestified($first,$key));  | 
| 
639
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
640
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
641
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub compareInflectedFormWithTestified  | 
| 
643
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
644
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
     my ($this,$testified_term,$key) = @_;   | 
| 
645
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $i;  | 
| 
646
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $j;  | 
| 
647
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $score = 0;  | 
| 
648
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my @index = split(/-/,$key);   | 
| 
649
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     for ($i = $index[0]; $i <= $index[$#index]; $i++)  | 
| 
650
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
651
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	for ($j = 0; $j < scalar @index; $j++)  | 
| 
652
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
653
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    if($this->getWord($i)->getIF eq $testified_term->getWord($j)->getIF)  | 
| 
654
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
655
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$score++;  | 
| 
656
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
657
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
658
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
659
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return $score;  | 
| 
660
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
661
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
662
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
663
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub comparePOSWithTestified  | 
| 
664
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
665
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
     my ($this,$testified_term,$key) = @_;   | 
| 
666
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $i;  | 
| 
667
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $j;  | 
| 
668
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $score = 0;  | 
| 
669
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my @index = split(/-/,$key);   | 
| 
670
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     for ($i = $index[0]; $i <= $index[$#index]; $i++)  | 
| 
671
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
672
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	for ($j = 0; $j < scalar @index; $j++)  | 
| 
673
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
674
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    if($this->getWord($i)->getPOS eq $testified_term->getWord($j)->getPOS)  | 
| 
675
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
676
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$score++;  | 
| 
677
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
678
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
679
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
680
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return $score;  | 
| 
681
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
682
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
683
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
684
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub printIslands  | 
| 
685
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
686
 | 
72
 | 
 
 | 
 
 | 
  
72
  
 | 
  
1
  
 | 
133
 | 
     my ($this,$fh) = @_;  | 
| 
687
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
688
 | 
72
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
143
 | 
     if(defined $fh)  | 
| 
689
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
690
 | 
72
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
154
 | 
 	if(defined $this->getIslandSet)  | 
| 
691
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
692
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	    print $fh " " . $this->getIslandSet->size . "\n";  | 
| 
693
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
 	    $this->getIslandSet->print($fh);  | 
| 
694
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
695
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else  | 
| 
696
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
697
 | 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
198
 | 
 	    print $fh " 0\n";  | 
| 
698
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
699
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
700
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else  | 
| 
701
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
702
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	if(defined $this->getIslandSet)  | 
| 
703
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
704
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    print "\n";  | 
| 
705
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    $this->getIslandSet->print;  | 
| 
706
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
707
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else  | 
| 
708
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
709
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    print "0\n";  | 
| 
710
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
711
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
712
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
714
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub print   | 
| 
715
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
716
 | 
72
 | 
 
 | 
 
 | 
  
72
  
 | 
  
1
  
 | 
136
 | 
     my ($this,$fh) = @_;  | 
| 
717
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
718
 | 
72
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
152
 | 
     if(defined $fh)  | 
| 
719
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
720
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
721
 | 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
201
 | 
 	print $fh  "if: " . Encode::encode("UTF-8", $this->getIF) . "\n";  | 
| 
722
 | 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3300
 | 
 	print $fh "pos: " . Encode::encode("UTF-8", $this->getPOS) . "\n";  | 
| 
723
 | 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2833
 | 
 	print $fh "lf: " . Encode::encode("UTF-8", $this->getLF) . "\n";  | 
| 
724
 | 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2826
 | 
 	print $fh "is a term candidate: " . $this->isTC. "\n";  | 
| 
725
 | 
72
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
178
 | 
 	if($this->isTC)  | 
| 
726
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
727
 | 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
142
 | 
 	    print $fh "parsing method: ". $this->getParsingMethod . "\n";  | 
| 
728
 | 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
94
 | 
 	    print $fh "forest: " ;  | 
| 
729
 | 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
148
 | 
 	    $this->printForestParenthesised($fh);  | 
| 
730
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      | 
| 
731
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
732
 | 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
192
 | 
 	print $fh "islands:";  | 
| 
733
 | 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
175
 | 
 	$this->printIslands($fh);  | 
| 
734
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
735
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
736
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else  | 
| 
737
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
738
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	print  "if: " . $this->getIF . "\n";  | 
| 
739
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	print "pos: " . $this->getPOS . "\n";  | 
| 
740
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	print "lf: " . $this->getLF . "\n";  | 
| 
741
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	print "is a term candidate: " . $this->isTC. "\n";  | 
| 
742
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if($this->isTC)  | 
| 
743
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
744
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    print "parsing method: ". $this->getParsingMethod . "\n";  | 
| 
745
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    print "forest: " ;  | 
| 
746
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    $this->printForestParenthesised;  | 
| 
747
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      | 
| 
748
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
749
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	print "islands:";  | 
| 
750
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	$this->printIslands;  | 
| 
751
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	print "\n";  | 
| 
752
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
753
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
754
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
755
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
756
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
757
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
758
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
759
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
760
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
761
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __END__  |