| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package RDF::NLP::SPARQLQuery::Question; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 11 |  |  | 11 |  | 63 | use utf8; | 
|  | 11 |  |  |  |  | 22 |  | 
|  | 11 |  |  |  |  | 88 |  | 
| 4 | 11 |  |  | 11 |  | 302 | use strict; | 
|  | 11 |  |  |  |  | 27 |  | 
|  | 11 |  |  |  |  | 353 |  | 
| 5 | 11 |  |  | 11 |  | 62 | use warnings; | 
|  | 11 |  |  |  |  | 22 |  | 
|  | 11 |  |  |  |  | 381 |  | 
| 6 | 11 |  |  | 11 |  | 56 | use Data::Dumper; | 
|  | 11 |  |  |  |  | 19 |  | 
|  | 11 |  |  |  |  | 636 |  | 
| 7 |  |  |  |  |  |  |  | 
| 8 | 11 |  |  | 11 |  | 9190 | use Mouse; | 
|  | 11 |  |  |  |  | 448850 |  | 
|  | 11 |  |  |  |  | 66 |  | 
| 9 |  |  |  |  |  |  |  | 
| 10 |  |  |  |  |  |  |  | 
| 11 | 11 |  |  | 11 |  | 13805 | use RDF::NLP::SPARQLQuery::Query; | 
|  | 11 |  |  |  |  | 39 |  | 
|  | 11 |  |  |  |  | 80568 |  | 
| 12 |  |  |  |  |  |  |  | 
| 13 |  |  |  |  |  |  | our $VERSION='0.1'; | 
| 14 |  |  |  |  |  |  |  | 
| 15 |  |  |  |  |  |  | has 'docId'         => (is => 'rw', isa => 'Str'); | 
| 16 |  |  |  |  |  |  | has 'verbose'         => (is => 'rw', isa => 'Int'); | 
| 17 |  |  |  |  |  |  | has 'language'      => (is => 'rw', isa => 'Str'); | 
| 18 |  |  |  |  |  |  | has 'sentences'     => (is => 'rw', isa => 'ArrayRef'); | 
| 19 |  |  |  |  |  |  | has 'postags'       => (is => 'rw', isa => 'ArrayRef'); | 
| 20 |  |  |  |  |  |  | has 'semanticUnits' => (is => 'rw', isa => 'ArrayRef'); | 
| 21 |  |  |  |  |  |  | has 'semanticCorrespondance' => (is => 'rw', isa => 'HashRef'); | 
| 22 |  |  |  |  |  |  | has 'aggregation' => (is => 'rw', isa => 'HashRef'); | 
| 23 |  |  |  |  |  |  | # has 'conjunction' => (is => 'rw', isa => 'Int'); | 
| 24 |  |  |  |  |  |  | has 'negation' => (is => 'rw', isa => 'HashRef'); | 
| 25 |  |  |  |  |  |  | # has 'varPrefix' => (is => 'rw', isa => 'Str'); | 
| 26 |  |  |  |  |  |  | # has 'variableCounter' => (is => 'rw', isa => 'Int'); | 
| 27 |  |  |  |  |  |  | # has 'variableSet' => (is => 'rw', isa => 'HashRef'); | 
| 28 |  |  |  |  |  |  | has 'union'       => (is => 'rw', isa => 'ArrayRef'); | 
| 29 |  |  |  |  |  |  | has 'query'       => (is => 'rw', isa => 'Object'); | 
| 30 |  |  |  |  |  |  | has 'questionTopic'       => (is => 'rw', isa => 'Str'); | 
| 31 |  |  |  |  |  |  | has 'semFeaturesIndex' => (is => 'rw', isa => 'HashRef'); | 
| 32 |  |  |  |  |  |  | has 'config' => (is => 'rw', isa => 'HashRef'); | 
| 33 |  |  |  |  |  |  |  | 
| 34 |  |  |  |  |  |  | # has '' => (is => 'rw', isa => 'HashRef'); | 
| 35 |  |  |  |  |  |  |  | 
| 36 |  |  |  |  |  |  |  | 
| 37 |  |  |  |  |  |  | # DOC | 
| 38 |  |  |  |  |  |  | sub new { | 
| 39 | 10 |  |  | 10 | 1 | 22 | my $class = shift; | 
| 40 | 10 |  |  |  |  | 80 | my %args = @_; | 
| 41 | 10 |  |  |  |  | 18 | my $i; | 
| 42 |  |  |  |  |  |  |  | 
| 43 | 10 |  |  |  |  | 221 | my $Question = { | 
| 44 |  |  |  |  |  |  | "verbose" => 0, | 
| 45 |  |  |  |  |  |  | "docId" => undef, | 
| 46 |  |  |  |  |  |  | "language" => undef, | 
| 47 |  |  |  |  |  |  | "sentences" => undef, | 
| 48 |  |  |  |  |  |  | "postags" => undef, | 
| 49 |  |  |  |  |  |  | "semanticUnits" => undef, | 
| 50 |  |  |  |  |  |  | # "selectPart" => [], | 
| 51 |  |  |  |  |  |  | # "wherePart" => [], | 
| 52 |  |  |  |  |  |  | "sortedSemanticUnits" => undef, | 
| 53 |  |  |  |  |  |  | "semanticCorrespondance" => undef, | 
| 54 |  |  |  |  |  |  | "aggregation" => {'TERM' => { | 
| 55 |  |  |  |  |  |  | 'count' => {}, | 
| 56 |  |  |  |  |  |  | 'max' => {}, | 
| 57 |  |  |  |  |  |  | 'min' => {}, | 
| 58 |  |  |  |  |  |  | 'distinct' => {}, | 
| 59 |  |  |  |  |  |  | 'per' => {}, | 
| 60 |  |  |  |  |  |  | }, | 
| 61 |  |  |  |  |  |  | 'QT' => {}, | 
| 62 |  |  |  |  |  |  | 'QTVAR' => {}, | 
| 63 |  |  |  |  |  |  | 'PREDICATE' => {}, | 
| 64 |  |  |  |  |  |  | 'ASK' => 0, | 
| 65 |  |  |  |  |  |  | }, | 
| 66 |  |  |  |  |  |  | #	"conjunction" => 0, | 
| 67 |  |  |  |  |  |  | "negation" => {}, | 
| 68 |  |  |  |  |  |  | "config" => undef, | 
| 69 |  |  |  |  |  |  | # "varPrefix" => "?v", | 
| 70 |  |  |  |  |  |  | # "variableCounter" => 0, | 
| 71 |  |  |  |  |  |  | # "variableSet" => {}, | 
| 72 |  |  |  |  |  |  | "union" => [], | 
| 73 |  |  |  |  |  |  | "questionTopic" => undef, | 
| 74 |  |  |  |  |  |  | 'semFeaturesIndex' => {} | 
| 75 |  |  |  |  |  |  | }; | 
| 76 | 10 |  |  |  |  | 38 | bless $Question, $class; | 
| 77 |  |  |  |  |  |  |  | 
| 78 | 10 |  |  |  |  | 163 | $Question->verbose($args{'verbose'}); | 
| 79 | 10 |  |  |  |  | 69 | $Question->docId($args{'docId'}); | 
| 80 | 10 |  |  |  |  | 51 | $Question->language($args{'language'}); | 
| 81 | 10 |  |  |  |  | 17 | $Question->sentences([@{$args{'sentences'}}]); | 
|  | 10 |  |  |  |  | 71 |  | 
| 82 | 10 |  |  |  |  | 16 | $Question->postags([@{$args{'postags'}}]); | 
|  | 10 |  |  |  |  | 64 |  | 
| 83 | 10 |  |  |  |  | 18 | $Question->semanticUnits([@{$args{'semanticUnits'}}]); | 
|  | 10 |  |  |  |  | 58 |  | 
| 84 | 10 |  |  |  |  | 87 | $Question->config($args{'config'}); | 
| 85 |  |  |  |  |  |  |  | 
| 86 | 10 |  |  |  |  | 15 | my $term; | 
| 87 | 10 |  |  |  |  | 17 | foreach $term (@{$Question->semanticUnits}) { | 
|  | 10 |  |  |  |  | 41 |  | 
| 88 | 35 |  |  |  |  | 140 | for($i=$term->{'start_offset'}; $i <= $term->{'end_offset'};$i++) { | 
| 89 | 279 |  |  |  |  | 645 | $Question->_termIndex($i, $term); | 
| 90 |  |  |  |  |  |  | } | 
| 91 |  |  |  |  |  |  | # warn "=> " . $term->{'semanticUnit'} . "\n"; | 
| 92 |  |  |  |  |  |  | # warn "\t" . join(",", keys %{$term->{'semanticTypes'}}) . "\n"; | 
| 93 |  |  |  |  |  |  | } | 
| 94 |  |  |  |  |  |  |  | 
| 95 |  |  |  |  |  |  | # foreach $term (@{$Question->sortedSemanticUnits}) { | 
| 96 |  |  |  |  |  |  | # 	warn "=> " . $term->{'semanticUnit'} . "\n"; | 
| 97 |  |  |  |  |  |  |  | 
| 98 |  |  |  |  |  |  | # } | 
| 99 |  |  |  |  |  |  |  | 
| 100 |  |  |  |  |  |  |  | 
| 101 | 10 |  |  |  |  | 56 | return($Question); | 
| 102 |  |  |  |  |  |  | } | 
| 103 |  |  |  |  |  |  |  | 
| 104 |  |  |  |  |  |  | sub _regexForm { | 
| 105 | 1 |  |  | 1 |  | 3 | my $self = shift; | 
| 106 |  |  |  |  |  |  |  | 
| 107 | 1 |  |  |  |  | 14 | return($self->config->{'NLQUESTION'}->{'language="' . uc($self->language) . '"'}->{'REGEXFORM'}); | 
| 108 |  |  |  |  |  |  | } | 
| 109 |  |  |  |  |  |  |  | 
| 110 |  |  |  |  |  |  |  | 
| 111 |  |  |  |  |  |  | sub _reset_sortedSemanticUnits { | 
| 112 | 9 |  |  | 9 |  | 17 | my $self = shift; | 
| 113 |  |  |  |  |  |  |  | 
| 114 | 9 |  |  |  |  | 24 | $self->{'sortedSemanticUnits'} = undef; | 
| 115 |  |  |  |  |  |  |  | 
| 116 |  |  |  |  |  |  | } | 
| 117 |  |  |  |  |  |  |  | 
| 118 |  |  |  |  |  |  | sub _sortedSemanticUnits { | 
| 119 | 331 |  |  | 331 |  | 4657 | my $self = shift; | 
| 120 |  |  |  |  |  |  |  | 
| 121 | 331 | 100 |  |  |  | 584 | if (@_) { | 
| 122 | 9 |  |  |  |  | 18 | $self->{'sortedSemanticUnits'} = shift; | 
| 123 |  |  |  |  |  |  | } else { | 
| 124 | 322 | 100 |  |  |  | 726 | if (!defined $self->{'sortedSemanticUnits'}) { | 
| 125 | 18 |  |  |  |  | 29 | $self->{'sortedSemanticUnits'} = [sort {$a->{'start_offset'} <=> $b->{'start_offset'}} @{$self->semanticUnits}]; | 
|  | 68 |  |  |  |  | 184 |  | 
|  | 18 |  |  |  |  | 140 |  | 
| 126 |  |  |  |  |  |  | } | 
| 127 |  |  |  |  |  |  | } | 
| 128 | 331 |  |  |  |  | 1497 | return($self->{'sortedSemanticUnits'}); | 
| 129 |  |  |  |  |  |  | } | 
| 130 |  |  |  |  |  |  |  | 
| 131 |  |  |  |  |  |  | sub _getTerms { | 
| 132 | 12 |  |  | 12 |  | 13 | my $self = shift; | 
| 133 |  |  |  |  |  |  |  | 
| 134 | 12 |  |  |  |  | 14 | my $offset = shift; | 
| 135 | 12 |  |  |  |  | 22 | return($self->_termIndex($offset)); | 
| 136 |  |  |  |  |  |  | } | 
| 137 |  |  |  |  |  |  |  | 
| 138 |  |  |  |  |  |  | sub _termIndex { | 
| 139 | 291 |  |  | 291 |  | 317 | my $self = shift; | 
| 140 | 291 |  |  |  |  | 308 | my $offset = shift; | 
| 141 |  |  |  |  |  |  |  | 
| 142 | 291 | 100 |  |  |  | 468 | if (@_) { | 
| 143 | 279 |  |  |  |  | 287 | my $term = shift; | 
| 144 | 279 | 100 |  |  |  | 727 | if (!exists $self->{'termIndex'}->{$offset}) { | 
| 145 | 269 |  |  |  |  | 736 | $self->{'termIndex'}->{$offset} = []; | 
| 146 |  |  |  |  |  |  | } | 
| 147 | 279 |  |  |  |  | 346 | push @{$self->{'termIndex'}->{$offset}}, $term; | 
|  | 279 |  |  |  |  | 614 |  | 
| 148 |  |  |  |  |  |  | } else { | 
| 149 | 12 | 50 |  |  |  | 28 | if (exists $self->{'termIndex'}->{$offset}) { | 
| 150 | 12 |  |  |  |  | 38 | return($self->{'termIndex'}->{$offset}); | 
| 151 |  |  |  |  |  |  | } else { | 
| 152 | 0 |  |  |  |  | 0 | return(exists($self->{'termIndex'}->{$offset})); | 
| 153 |  |  |  |  |  |  | } | 
| 154 |  |  |  |  |  |  | } | 
| 155 | 279 |  |  |  |  | 1001 | return($self->{'termIndex'}); | 
| 156 |  |  |  |  |  |  |  | 
| 157 |  |  |  |  |  |  | } | 
| 158 |  |  |  |  |  |  |  | 
| 159 |  |  |  |  |  |  | sub _delSemanticUnit { | 
| 160 | 0 |  |  | 0 |  | 0 | my $self = shift; | 
| 161 | 0 |  |  |  |  | 0 | my $term = shift; | 
| 162 | 0 |  |  |  |  | 0 | my $max; | 
| 163 |  |  |  |  |  |  | my $i; | 
| 164 | 0 |  |  |  |  | 0 | my $deleted; | 
| 165 |  |  |  |  |  |  | # del in semanticUnits | 
| 166 |  |  |  |  |  |  | #    $self->semanticUnits->[$term | 
| 167 |  |  |  |  |  |  |  | 
| 168 | 0 |  |  |  |  | 0 | $max = scalar(@{$self->semanticUnits}); | 
|  | 0 |  |  |  |  | 0 |  | 
| 169 | 0 |  |  |  |  | 0 | $i=0; | 
| 170 | 0 |  |  |  |  | 0 | $deleted = 0; | 
| 171 | 0 |  | 0 |  |  | 0 | do { | 
| 172 | 0 | 0 |  |  |  | 0 | if ($self->semanticUnits->[$i]->{'id'} == $term->{'id'}) { | 
| 173 | 0 |  |  |  |  | 0 | splice(@{$self->semanticUnits}, $i, 1); | 
|  | 0 |  |  |  |  | 0 |  | 
| 174 | 0 |  |  |  |  | 0 | $deleted = 1; | 
| 175 |  |  |  |  |  |  | } | 
| 176 | 0 |  |  |  |  | 0 | $i++; | 
| 177 |  |  |  |  |  |  | } while(($i < $max)&&($deleted==0)); | 
| 178 | 0 | 0 |  |  |  | 0 | if ($deleted==0) { | 
| 179 | 0 |  |  |  |  | 0 | $self->_printVerbose("term (" . $term->{'id'} . ") to delete not found in SemanticUnits\n",2); | 
| 180 |  |  |  |  |  |  | } | 
| 181 |  |  |  |  |  |  |  | 
| 182 |  |  |  |  |  |  | # del in index | 
| 183 |  |  |  |  |  |  | # warn "$term\n"; | 
| 184 |  |  |  |  |  |  | # warn "\n". $term->{'start_offset'} . "\n"; | 
| 185 |  |  |  |  |  |  | # warn $self->_termIndex($term->{'start_offset'}); | 
| 186 | 0 |  |  |  |  | 0 | $max = scalar(@{$self->_termIndex($term->{'start_offset'})}); | 
|  | 0 |  |  |  |  | 0 |  | 
| 187 | 0 |  |  |  |  | 0 | $deleted = 0; | 
| 188 | 0 |  |  |  |  | 0 | $i=0; | 
| 189 | 0 |  | 0 |  |  | 0 | do { | 
| 190 | 0 | 0 |  |  |  | 0 | if ($self->_termIndex($term->{'start_offset'})->[$i]->{'id'} == $term->{'id'}) { | 
| 191 | 0 |  |  |  |  | 0 | splice(@{$self->_termIndex($term->{'start_offset'})}, $i, 1); | 
|  | 0 |  |  |  |  | 0 |  | 
| 192 | 0 |  |  |  |  | 0 | $deleted=1; | 
| 193 |  |  |  |  |  |  | } | 
| 194 | 0 |  |  |  |  | 0 | $i++; | 
| 195 |  |  |  |  |  |  | } while(($i < $max)&&($deleted==0)); | 
| 196 | 0 | 0 |  |  |  | 0 | if ($deleted==0) { | 
| 197 | 0 |  |  |  |  | 0 | $self->_printVerbose("term (" . $term->{'id'} . ") to delete not found in index\n",2); | 
| 198 |  |  |  |  |  |  | } | 
| 199 | 0 |  |  |  |  | 0 | return(0); | 
| 200 |  |  |  |  |  |  | } | 
| 201 |  |  |  |  |  |  |  | 
| 202 |  |  |  |  |  |  | sub _delSemanticType { | 
| 203 | 0 |  |  | 0 |  | 0 | my $self = shift; | 
| 204 | 0 |  |  |  |  | 0 | my $term = shift; | 
| 205 | 0 |  |  |  |  | 0 | my $semf = shift; | 
| 206 |  |  |  |  |  |  |  | 
| 207 | 0 |  |  |  |  | 0 | delete $term->{'semanticTypes'}->{$semf}; | 
| 208 | 0 |  |  |  |  | 0 | return(0); | 
| 209 |  |  |  |  |  |  | } | 
| 210 |  |  |  |  |  |  |  | 
| 211 |  |  |  |  |  |  | sub _modifySemanticType { | 
| 212 | 3 |  |  | 3 |  | 3 | my $self = shift; | 
| 213 | 3 |  |  |  |  | 4 | my $term = shift; | 
| 214 | 3 |  |  |  |  | 4 | my $oldsemf = shift; | 
| 215 | 3 |  |  |  |  | 4 | my $newsemf = shift; | 
| 216 | 3 |  |  |  |  | 3 | my $semf; | 
| 217 |  |  |  |  |  |  |  | 
| 218 |  |  |  |  |  |  | # warn $term->{'semanticUnit'}; | 
| 219 |  |  |  |  |  |  | # if (exists $term->{'semanticTypes'}->{$oldsemf}) { | 
| 220 |  |  |  |  |  |  | # 	warn $oldsemf; | 
| 221 |  |  |  |  |  |  |  | 
| 222 |  |  |  |  |  |  | # } | 
| 223 | 3 |  |  |  |  | 7 | delete $term->{'semanticTypes'}->{$oldsemf}; | 
| 224 | 3 |  |  |  |  | 8 | foreach $semf (split /;/, $newsemf) { | 
| 225 | 4 |  |  |  |  | 9 | $self->_addSemanticType($term, $semf); | 
| 226 |  |  |  |  |  |  | #	$term->{'semanticTypes'}->{$semf} = [split /\//, $semf]; | 
| 227 |  |  |  |  |  |  | } | 
| 228 | 3 |  |  |  |  | 4 | return($newsemf); | 
| 229 |  |  |  |  |  |  | } | 
| 230 |  |  |  |  |  |  |  | 
| 231 |  |  |  |  |  |  | sub _addSemanticType { | 
| 232 | 5 |  |  | 5 |  | 3 | my $self = shift; | 
| 233 | 5 |  |  |  |  | 6 | my $term = shift; | 
| 234 | 5 |  |  |  |  | 6 | my $newsemf = shift; | 
| 235 |  |  |  |  |  |  |  | 
| 236 | 5 |  |  |  |  | 19 | $term->{'semanticTypes'}->{$newsemf} = [split /\//, $newsemf]; | 
| 237 | 5 |  |  |  |  | 11 | return($newsemf); | 
| 238 |  |  |  |  |  |  | } | 
| 239 |  |  |  |  |  |  |  | 
| 240 |  |  |  |  |  |  | # DOC | 
| 241 |  |  |  |  |  |  | sub Question2Query { | 
| 242 | 9 |  |  | 9 | 1 | 19 | my $self = shift; | 
| 243 |  |  |  |  |  |  | # my $format = shift; | 
| 244 | 9 |  |  |  |  | 19 | my $semanticCorrespondance = shift; | 
| 245 |  |  |  |  |  |  | # my $outStr = shift; | 
| 246 |  |  |  |  |  |  |  | 
| 247 |  |  |  |  |  |  | # warn $semanticCorrespondance; | 
| 248 | 9 |  |  |  |  | 80 | $self->semanticCorrespondance($semanticCorrespondance); | 
| 249 |  |  |  |  |  |  |  | 
| 250 | 9 |  |  |  |  | 244 | $self->query(RDF::NLP::SPARQLQuery::Query->new( | 
| 251 |  |  |  |  |  |  | 'verbose' => $self->verbose, | 
| 252 |  |  |  |  |  |  | 'docId' => $self->docId, | 
| 253 |  |  |  |  |  |  | 'language' => $self->language, | 
| 254 |  |  |  |  |  |  | 'sentences' => $self->sentences, | 
| 255 |  |  |  |  |  |  | 'negation' => $self->negation, | 
| 256 |  |  |  |  |  |  | 'union' => $self->union, | 
| 257 |  |  |  |  |  |  | 'aggregation' => $self->aggregation, | 
| 258 |  |  |  |  |  |  | 'semanticCorrespondance' => $self->semanticCorrespondance, | 
| 259 |  |  |  |  |  |  | 'semFeaturesIndex' => $self->semFeaturesIndex, | 
| 260 |  |  |  |  |  |  | 'sortedSemanticUnits' => $self->_sortedSemanticUnits, | 
| 261 |  |  |  |  |  |  | 'config' => $self->config, | 
| 262 |  |  |  |  |  |  | ) | 
| 263 |  |  |  |  |  |  | ); | 
| 264 |  |  |  |  |  |  |  | 
| 265 |  |  |  |  |  |  | # warn $self->query->semanticCorrespondance; | 
| 266 | 9 |  |  |  |  | 45 | $self->questionAbstraction; | 
| 267 | 9 |  |  |  |  | 74 | $self->query->queryConstruction($self->questionTopic); | 
| 268 | 9 |  |  |  |  | 59 | $self->query->queryGeneration; | 
| 269 |  |  |  |  |  |  | #    $$outStr .= $self->query->queryString; | 
| 270 | 9 |  |  |  |  | 37 | return(1); | 
| 271 |  |  |  |  |  |  | } | 
| 272 |  |  |  |  |  |  |  | 
| 273 |  |  |  |  |  |  |  | 
| 274 |  |  |  |  |  |  | # DOC | 
| 275 |  |  |  |  |  |  | sub questionAbstraction  { | 
| 276 | 9 |  |  | 9 | 1 | 19 | my $self = shift; | 
| 277 |  |  |  |  |  |  |  | 
| 278 |  |  |  |  |  |  | # my %aggregation = ('TERM' => { | 
| 279 |  |  |  |  |  |  | # 	'count' => {}, | 
| 280 |  |  |  |  |  |  | # 	'max' => {}, | 
| 281 |  |  |  |  |  |  | # 	'min' => {}, | 
| 282 |  |  |  |  |  |  | # 	'distinct' => {}, | 
| 283 |  |  |  |  |  |  | # 	'per' => {}, | 
| 284 |  |  |  |  |  |  | # 		     }, | 
| 285 |  |  |  |  |  |  | # 		     'QT' => {}, | 
| 286 |  |  |  |  |  |  | # 		     'QTVAR' => {}, | 
| 287 |  |  |  |  |  |  | # 		     'PREDICATE' => {}, | 
| 288 |  |  |  |  |  |  | # 		     'ASK' => 0, | 
| 289 |  |  |  |  |  |  | # 	); | 
| 290 |  |  |  |  |  |  | #    my @union = (); | 
| 291 |  |  |  |  |  |  | #    my $conjunction = 0; | 
| 292 | 9 |  |  |  |  | 20 | my $conjunction2 = 0; | 
| 293 |  |  |  |  |  |  | # my %negation = (); | 
| 294 |  |  |  |  |  |  |  | 
| 295 |  |  |  |  |  |  | #    my $questionTopic; | 
| 296 |  |  |  |  |  |  |  | 
| 297 |  |  |  |  |  |  | # my @wherePart; | 
| 298 |  |  |  |  |  |  | # my @selectPart; | 
| 299 | 9 |  |  |  |  | 18 | my %viewedPredicates; | 
| 300 |  |  |  |  |  |  | my %term2semFeatures; | 
| 301 |  |  |  |  |  |  | #    my %semFeaturesIndex; | 
| 302 | 0 |  |  |  |  | 0 | my %variableSet; | 
| 303 |  |  |  |  |  |  |  | 
| 304 | 0 |  |  |  |  | 0 | my %lastsemf; | 
| 305 | 0 |  |  |  |  | 0 | my $semCat; | 
| 306 | 0 |  |  |  |  | 0 | my $indexCat; | 
| 307 | 0 |  |  |  |  | 0 | my $term; | 
| 308 | 0 |  |  |  |  | 0 | my $root; | 
| 309 | 0 |  |  |  |  | 0 | my $semf; | 
| 310 | 0 |  |  |  |  | 0 | my $i; | 
| 311 | 0 |  |  |  |  | 0 | my $aggregOp; | 
| 312 |  |  |  |  |  |  |  | 
| 313 | 0 |  |  |  |  | 0 | my $line; | 
| 314 |  |  |  |  |  |  |  | 
| 315 |  |  |  |  |  |  | #    my @sortedSemanticUnits = (); | 
| 316 | 9 |  |  |  |  | 39 | $self->_removeLargerExtractedTerms; | 
| 317 | 9 |  |  |  |  | 37 | $self->_contextualRewriting; | 
| 318 |  |  |  |  |  |  | #    $self->_getSortedSemanticUnits(\@sortedSemanticUnits, \$conjunction, \%negation); | 
| 319 | 9 |  |  |  |  | 35 | $self->_getSortedSemanticUnits; # (\$conjunction, \%negation); | 
| 320 |  |  |  |  |  |  |  | 
| 321 | 9 |  |  |  |  | 41 | $self->_detecteNegation; # (\%negation); | 
| 322 |  |  |  |  |  |  |  | 
| 323 |  |  |  |  |  |  | # AGGREGATION/OPERATION FUNCTION? | 
| 324 | 9 |  |  |  |  | 37 | $self->_identifyAggregationOperator;#(\%aggregation); | 
| 325 |  |  |  |  |  |  |  | 
| 326 |  |  |  |  |  |  | # QUESTION TOPIC ? | 
| 327 |  |  |  |  |  |  | # $questionTopic = | 
| 328 | 9 |  |  |  |  | 34 | $self->_getQuestionTopic;#(\%aggregation); | 
| 329 |  |  |  |  |  |  |  | 
| 330 | 9 | 50 |  |  |  | 84 | if (defined $self->questionTopic) { | 
| 331 |  |  |  |  |  |  | # $questionTopic = $self->semanticCorrespondance->{$lang}->{'VARIABLE'}->{$questionTopicCat}; | 
| 332 | 9 |  |  |  |  | 54 | $self->_printVerbose("Question Topic: " . $self->questionTopic . "\n"); | 
| 333 |  |  |  |  |  |  |  | 
| 334 |  |  |  |  |  |  | # @wherePart = @{$self->query->wherePart}; | 
| 335 | 9 |  |  |  |  | 65 | %viewedPredicates = (); | 
| 336 |  |  |  |  |  |  | #	@selectPart = (); | 
| 337 | 9 |  |  |  |  | 21 | %term2semFeatures = (); | 
| 338 |  |  |  |  |  |  | #	%semFeaturesIndex = (); | 
| 339 | 9 |  |  |  |  | 129 | %variableSet = (); | 
| 340 |  |  |  |  |  |  |  | 
| 341 |  |  |  |  |  |  | # Identify role of the semantic elements | 
| 342 | 9 |  |  |  |  | 98 | $self->_printVerbose("\n[LOG] recording semanticTypes (Property vs Predicate)\n"); | 
| 343 |  |  |  |  |  |  |  | 
| 344 | 9 |  |  |  |  | 16 | for($i=0; $i < scalar(@{$self->_sortedSemanticUnits});$i++) { | 
|  | 38 |  |  |  |  | 234 |  | 
| 345 | 29 |  |  |  |  | 67 | $term = $self->_sortedSemanticUnits->[$i]; | 
| 346 | 29 |  |  |  |  | 157 | $self->_printVerbose(">> " . $term->{'semanticUnit'} . " ($conjunction2)\n",2); | 
| 347 | 29 |  |  |  |  | 51 | foreach $semf (keys %{$term->{'semanticTypes'}}) { | 
|  | 29 |  |  |  |  | 86 |  | 
| 348 | 31 |  |  |  |  | 114 | $self->_printVerbose("    " . $semf . "\n",2); | 
| 349 | 31 | 100 |  |  |  | 110 | if (!exists $term2semFeatures{$term->{'semanticUnit'}}) { | 
| 350 | 29 |  |  |  |  | 97 | $term2semFeatures{$term->{'semanticUnit'}} = []; | 
| 351 |  |  |  |  |  |  | } | 
| 352 | 31 |  |  |  |  | 44 | push @{$term2semFeatures{$term->{'semanticUnit'}}}, $semf; | 
|  | 31 |  |  |  |  | 91 |  | 
| 353 |  |  |  |  |  |  |  | 
| 354 | 31 |  |  |  |  | 101 | $semCat = $term->{'semanticTypes'}->{$semf}; | 
| 355 |  |  |  |  |  |  | #		    foreach $semCat (@{$semf->semantic_category}) { | 
| 356 |  |  |  |  |  |  | # warn join('/', @$semCat) . " ($conjunction2)\n"; | 
| 357 | 31 | 100 | 66 |  |  | 36 | if (scalar(@{$semCat}) > 1) { | 
|  | 31 | 100 | 66 |  |  | 85 |  | 
|  | 22 | 100 | 66 |  |  | 227 |  | 
|  |  | 100 | 33 |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 358 | 9 |  |  |  |  | 67 | $self->_printVerbose(join('/', @$semCat) . " ($conjunction2)\n",2); | 
| 359 | 9 | 100 | 66 |  |  | 200 | if ((exists $self->semanticCorrespondance->{$self->language}->{'RESOURCE'}->{$semCat->[0]."/".$semCat->[1]}->{'CORRESP'}) && | 
| 360 |  |  |  |  |  |  | (exists $self->semanticCorrespondance->{$self->language}->{'RESOURCE'}->{$semCat->[0]."/".$semCat->[1]}->{'CORRESP'}->{$semCat->[$#$semCat-1]})) { | 
| 361 | 8 |  |  |  |  | 73 | $indexCat=$self->semanticCorrespondance->{$self->language}->{'RESOURCE'}->{$semCat->[0]."/".$semCat->[1]}->{'CORRESP'}->{$semCat->[$#$semCat-1]}; | 
| 362 | 8 |  |  |  |  | 33 | $root=$semCat->[0]."/".$semCat->[1]; | 
| 363 |  |  |  |  |  |  | } else { | 
| 364 | 1 |  |  |  |  | 5 | my @tmp = @$semCat; | 
| 365 | 1 |  |  |  |  | 2 | pop @tmp; | 
| 366 | 1 |  |  |  |  | 4 | $indexCat=join('/', @tmp); | 
| 367 | 1 |  |  |  |  | 9 | $root=$self->semanticCorrespondance->{$self->language}->{'DEFAULT_ROOT'}; | 
| 368 |  |  |  |  |  |  | } | 
| 369 | 9 |  |  |  |  | 53 | $self->_printVerbose("indexCat: $indexCat ($root)\n",2); | 
| 370 | 9 | 50 |  |  |  | 49 | if (!exists($self->semFeaturesIndex->{$indexCat})) { | 
| 371 | 9 |  |  |  |  | 65 | $self->semFeaturesIndex->{$indexCat}->{'ROOT'} = $root; | 
| 372 | 9 |  |  |  |  | 45 | $self->semFeaturesIndex->{$indexCat}->{'CAT'} = []; | 
| 373 | 9 |  |  |  |  | 44 | $self->semFeaturesIndex->{$indexCat}->{'CAT2'} = {}; | 
| 374 | 9 |  |  |  |  | 42 | $self->semFeaturesIndex->{$indexCat}->{'SEEN_S'} = -1; | 
| 375 | 9 |  |  |  |  | 36 | $self->semFeaturesIndex->{$indexCat}->{'SEEN_O'} = -1; | 
| 376 |  |  |  |  |  |  | } | 
| 377 |  |  |  |  |  |  | # warn ">> $indexCat : " . join("/", @$semCat) . "\n"; | 
| 378 |  |  |  |  |  |  |  | 
| 379 |  |  |  |  |  |  | #			    @lastsemf=@{$document->getAnnotations->getSemanticFeaturesLevel->getElementFromIndex("refid_semantic_unit", $term->{'id'})}; | 
| 380 |  |  |  |  |  |  | # @lastsemf = keys %{$term->{'semanticTypes'}; | 
| 381 | 9 |  |  |  |  | 18 | %lastsemf = %{$term->{'semanticTypes'}}; | 
|  | 9 |  |  |  |  | 48 |  | 
| 382 | 9 |  |  |  |  | 19 | push @{$self->semFeaturesIndex->{$indexCat}->{'CAT'}}, $semCat; | 
|  | 9 |  |  |  |  | 44 |  | 
| 383 | 9 |  |  |  |  | 52 | $self->semFeaturesIndex->{$indexCat}->{'TERM'}->{$term->{'id'}} = $term; | 
| 384 |  |  |  |  |  |  | # warn "   " . $semf . "\n"; | 
| 385 | 9 |  |  |  |  | 72 | $self->semFeaturesIndex->{$indexCat}->{'CAT2'}->{join("/", @$semCat)} = $semCat; | 
| 386 |  |  |  |  |  |  | #warn "=====> " . $indexCat . "\n"; | 
| 387 |  |  |  |  |  |  | } elsif ((scalar(@{$semCat}) == 1) && | 
| 388 | 21 |  |  |  |  | 131 | (exists $self->semanticCorrespondance->{$self->language}->{'CONST'}->{$semCat->[0]})) { | 
| 389 | 1 |  |  |  |  | 5 | $self->_printVerbose("\t\tCONST\n",2); | 
| 390 | 1 |  |  |  |  | 3 | $indexCat=$semCat->[0]; | 
| 391 | 1 | 50 |  |  |  | 7 | if (!exists($self->semFeaturesIndex->{$indexCat})) { | 
| 392 | 1 |  |  |  |  | 6 | $self->semFeaturesIndex->{$indexCat}->{'ROOT'} = "const"; | 
| 393 | 1 |  |  |  |  | 6 | $self->semFeaturesIndex->{$indexCat}->{'CAT'} = []; | 
| 394 | 1 |  |  |  |  | 4 | $self->semFeaturesIndex->{$indexCat}->{'CAT2'} = {}; | 
| 395 | 1 |  |  |  |  | 3 | $self->semFeaturesIndex->{$indexCat}->{'SEEN_S'} = -1; | 
| 396 | 1 |  |  |  |  | 4 | $self->semFeaturesIndex->{$indexCat}->{'SEEN_O'} = -1; | 
| 397 |  |  |  |  |  |  | } | 
| 398 |  |  |  |  |  |  | # warn ">> $indexCat : " . join("/", @$semCat) . "\n"; | 
| 399 | 1 |  |  |  |  | 2 | push @{$self->semFeaturesIndex->{$indexCat}->{'CAT'}}, [$term->{'semanticUnit'}]; | 
|  | 1 |  |  |  |  | 6 |  | 
| 400 | 1 |  |  |  |  | 6 | $self->semFeaturesIndex->{$indexCat}->{'TERM'}->{$term->{'id'}} = $term; | 
| 401 |  |  |  |  |  |  | # warn "   " . $semf . "\n"; | 
| 402 | 1 |  |  |  |  | 10 | $self->semFeaturesIndex->{$indexCat}->{'CAT2'}->{join("/", @$semCat)} = [$term->{'semanticUnit'}]; | 
| 403 |  |  |  |  |  |  |  | 
| 404 |  |  |  |  |  |  | } elsif ((scalar(@{$semCat}) == 1) && | 
| 405 | 20 |  |  |  |  | 125 | ($semCat->[0] eq "STRING")) { | 
| 406 | 1 |  |  |  |  | 5 | $self->_printVerbose("\t\tSTRING\n",2); | 
| 407 | 1 |  |  |  |  | 2 | $indexCat=$semCat->[0]; | 
| 408 | 1 | 50 |  |  |  | 5 | if (!exists($self->semFeaturesIndex->{$indexCat})) { | 
| 409 | 1 |  |  |  |  | 5 | $self->semFeaturesIndex->{$indexCat}->{'ROOT'} = "STRING"; | 
| 410 |  |  |  |  |  |  | # $self->semFeaturesIndex->{$indexCat}->{'TERM'} = []; | 
| 411 | 1 |  |  |  |  | 5 | $self->semFeaturesIndex->{$indexCat}->{'CAT'} = []; | 
| 412 | 1 |  |  |  |  | 6 | $self->semFeaturesIndex->{$indexCat}->{'CAT2'} = {}; | 
| 413 | 1 |  |  |  |  | 4 | $self->semFeaturesIndex->{$indexCat}->{'SEEN_S'} = -1; | 
| 414 | 1 |  |  |  |  | 5 | $self->semFeaturesIndex->{$indexCat}->{'SEEN_O'} = -1; | 
| 415 |  |  |  |  |  |  | } | 
| 416 |  |  |  |  |  |  | # warn ">> $indexCat : " . join("/", @$semCat) . "\n"; | 
| 417 | 1 |  |  |  |  | 1 | push @{$self->semFeaturesIndex->{$indexCat}->{'CAT'}}, [$term->{'canonical_form'}]; | 
|  | 1 |  |  |  |  | 5 |  | 
| 418 | 1 |  |  |  |  | 6 | $self->semFeaturesIndex->{$indexCat}->{'TERM'}->{$term->{'id'}} = $term; | 
| 419 |  |  |  |  |  |  | # warn "   " . $semf . "\n"; | 
| 420 | 1 |  |  |  |  | 9 | $self->semFeaturesIndex->{$indexCat}->{'CAT2'}->{join("/", @$semCat)} = [$term->{'canonical_form'}]; | 
| 421 |  |  |  |  |  |  |  | 
| 422 |  |  |  |  |  |  | } elsif ((scalar(@{$semCat}) == 1) && | 
| 423 | 19 |  |  |  |  | 177 | ($semCat->[0] eq "REGEX")) { | 
| 424 | 1 |  |  |  |  | 3 | $self->_printVerbose("\t\tREGEX\n",2); | 
| 425 | 1 | 50 |  |  |  | 4 | if ($conjunction2 eq 1) { | 
| 426 | 0 |  |  |  |  | 0 | my $semCat2; | 
| 427 |  |  |  |  |  |  | my $semf2; | 
| 428 | 0 |  |  |  |  | 0 | foreach $semf2 (keys %lastsemf) { | 
| 429 | 0 |  |  |  |  | 0 | $self->_printVerbose("\t\t\t -> " . $semf2 . "\n",2); | 
| 430 | 0 |  |  |  |  | 0 | $semCat2 = 	$lastsemf{$semf2}; # $semCat = $term->{'semanticTypes'}->{$semf}; | 
| 431 |  |  |  |  |  |  | #				    foreach $semCat2 (@{$semf2->semantic_category}) { | 
| 432 | 0 |  |  |  |  | 0 | $self->_printVerbose("\t\t\t---> " . join("/", @$semCat2) . "\n",2); | 
| 433 | 0 |  |  |  |  | 0 | my $indexCat2=$self->semanticCorrespondance->{$self->language}->{'RESOURCE'}->{$semCat2->[0]."/".$semCat2->[1]}->{'CORRESP'}->{$semCat2->[$#$semCat2-1]}; | 
| 434 | 0 |  |  |  |  | 0 | my @newSemCat = @$semCat2; | 
| 435 | 0 | 0 |  |  |  | 0 | if ($self->_regexForm == 1) { | 
| 436 | 0 |  |  |  |  | 0 | $newSemCat[$#newSemCat]=":NODEREGEX:".$term->{'semanticUnit'}; | 
| 437 |  |  |  |  |  |  | } else { | 
| 438 | 0 |  |  |  |  | 0 | $newSemCat[$#newSemCat]=":NODEREGEX:".$term->{'canonical_form'}; | 
| 439 |  |  |  |  |  |  | } | 
| 440 | 0 | 0 |  |  |  | 0 | if (!exists($self->semFeaturesIndex->{$indexCat2})) { | 
| 441 | 0 |  |  |  |  | 0 | $self->semFeaturesIndex->{$indexCat2}->{'ROOT'} = "REGEX"; | 
| 442 | 0 |  |  |  |  | 0 | $self->semFeaturesIndex->{$indexCat2}->{'CAT'} = []; | 
| 443 | 0 |  |  |  |  | 0 | $self->semFeaturesIndex->{$indexCat2}->{'CAT2'} = {}; | 
| 444 | 0 |  |  |  |  | 0 | $self->semFeaturesIndex->{$indexCat2}->{'SEEN_S'} = -1; | 
| 445 | 0 |  |  |  |  | 0 | $self->semFeaturesIndex->{$indexCat2}->{'SEEN_O'} = -1; | 
| 446 |  |  |  |  |  |  | } | 
| 447 |  |  |  |  |  |  |  | 
| 448 |  |  |  |  |  |  | # warn ">> indexCat2: $indexCat2 : " . join("/", @newSemCat) . "\n"; | 
| 449 | 0 |  |  |  |  | 0 | push @{$self->semFeaturesIndex->{$indexCat2}->{'CAT'}}, [@newSemCat]; | 
|  | 0 |  |  |  |  | 0 |  | 
| 450 | 0 |  |  |  |  | 0 | $self->semFeaturesIndex->{$indexCat2}->{'TERM'}->{$term->{'id'}} = $term; | 
| 451 |  |  |  |  |  |  | # warn "   " . $lastsemf{$semf2} . "\n"; | 
| 452 | 0 |  |  |  |  | 0 | $self->semFeaturesIndex->{$indexCat2}->{'CAT2'}->{join("/", @newSemCat)} = [@newSemCat]; | 
| 453 |  |  |  |  |  |  | #				    } | 
| 454 |  |  |  |  |  |  | } | 
| 455 |  |  |  |  |  |  | } else { | 
| 456 | 1 |  |  |  |  | 2 | $indexCat="STRINGREGEX"; | 
| 457 | 1 |  |  |  |  | 2 | my @newSemCat; | 
| 458 | 1 | 50 |  |  |  | 4 | if ($self->_regexForm == 1) { | 
| 459 | 1 |  |  |  |  | 5 | @newSemCat = (":LABELREGEX:".$term->{'semanticUnit'}); | 
| 460 |  |  |  |  |  |  | } else { | 
| 461 | 0 |  |  |  |  | 0 | @newSemCat = (":LABELREGEX:".$term->{'canonical_form'}); | 
| 462 |  |  |  |  |  |  | } | 
| 463 | 1 | 50 |  |  |  | 7 | if (!exists($self->semFeaturesIndex->{$indexCat})) { | 
| 464 | 1 |  |  |  |  | 5 | $self->semFeaturesIndex->{$indexCat}->{'ROOT'} = "REGEX"; | 
| 465 | 1 |  |  |  |  | 5 | $self->semFeaturesIndex->{$indexCat}->{'CAT'} = []; | 
| 466 | 1 |  |  |  |  | 5 | $self->semFeaturesIndex->{$indexCat}->{'CAT2'} = {}; | 
| 467 | 1 |  |  |  |  | 4 | $self->semFeaturesIndex->{$indexCat}->{'SEEN_S'} = -1; | 
| 468 | 1 |  |  |  |  | 4 | $self->semFeaturesIndex->{$indexCat}->{'SEEN_O'} = -1; | 
| 469 |  |  |  |  |  |  | } | 
| 470 | 1 |  |  |  |  | 2 | push @{$self->semFeaturesIndex->{$indexCat}->{'CAT'}}, [@newSemCat]; | 
|  | 1 |  |  |  |  | 6 |  | 
| 471 | 1 |  |  |  |  | 5 | $self->semFeaturesIndex->{$indexCat}->{'TERM'}->{$term->{'id'}} = $term; | 
| 472 | 1 |  |  |  |  | 8 | $self->semFeaturesIndex->{$indexCat}->{'CAT2'}->{join("/", @newSemCat)} = [@newSemCat]; | 
| 473 |  |  |  |  |  |  | } | 
| 474 | 1 |  |  |  |  | 3 | $conjunction2 = 0; | 
| 475 |  |  |  |  |  |  |  | 
| 476 |  |  |  |  |  |  | } elsif ((scalar(@{$semCat}) == 1) && | 
| 477 | 19 |  |  |  |  | 62 | ($semCat->[0] eq "conjunction")) { | 
| 478 | 0 |  |  |  |  | 0 | $conjunction2 = 1; | 
| 479 |  |  |  |  |  |  | } elsif (scalar(@{$semCat}) == 1) { | 
| 480 |  |  |  |  |  |  | # $self->_addPredicate('last', $self->semanticCorrespondance->{$self->language}->{'PREDICATE'}->{$indexCat}->{'NAME'}); | 
| 481 |  |  |  |  |  |  | # warn "Predicate: " . $semCat->[0] . "\n"; | 
| 482 | 19 | 100 | 66 |  |  | 210 | if ((!exists $viewedPredicates{$semCat->[0]}) && | 
| 483 |  |  |  |  |  |  | (exists $self->semanticCorrespondance->{$self->language}->{'PREDICATE'}->{$semCat->[0]})) { | 
| 484 |  |  |  |  |  |  | # warn "Predicate: " . $semCat->[0] . "\n"; | 
| 485 | 11 |  |  |  |  | 34 | $viewedPredicates{$semCat->[0]}++; | 
| 486 | 11 |  |  |  |  | 83 | $self->query->_newWherePartLine; | 
| 487 | 11 |  |  |  |  | 88 | $self->query->_addPredicate('last', $semCat->[0]); | 
| 488 | 11 | 50 |  |  |  | 53 | if (exists $self->negation->{$term->{'id'}}) { | 
| 489 | 0 |  |  |  |  | 0 | $self->negation->{$term->{'id'}} = scalar(@{$self->query->wherePart}) - 1; | 
|  | 0 |  |  |  |  | 0 |  | 
| 490 |  |  |  |  |  |  | } | 
| 491 | 11 |  |  |  |  | 19 | foreach $aggregOp (keys %{$self->aggregation->{'TERM'}}) { | 
|  | 11 |  |  |  |  | 56 |  | 
| 492 | 55 | 50 |  |  |  | 410 | if (exists $self->aggregation->{'TERM'}->{$aggregOp}->{$term->{'id'}}) { | 
| 493 | 0 |  |  |  |  | 0 | $self->aggregation->{'TERM'}->{$aggregOp}->{$term->{'id'}} = $semCat->[0]; | 
| 494 | 0 |  |  |  |  | 0 | $self->aggregation->{'PREDICATE'}->{$semCat->[0]} = $aggregOp; # = $term->{'id'}; | 
| 495 | 0 |  |  |  |  | 0 | $self->_printVerbose("aggregation ($aggregOp) for " . $term->{'semanticUnit'} . " at " . $self->aggregation->{'TERM'}->{$aggregOp}->{$term->{'id'}} . " (" . $semCat->[0] . ")\n",2); | 
| 496 |  |  |  |  |  |  | } | 
| 497 |  |  |  |  |  |  | } | 
| 498 |  |  |  |  |  |  | } | 
| 499 |  |  |  |  |  |  | # } | 
| 500 |  |  |  |  |  |  | } | 
| 501 |  |  |  |  |  |  | } | 
| 502 |  |  |  |  |  |  | } | 
| 503 |  |  |  |  |  |  |  | 
| 504 | 9 |  |  |  |  | 45 | $self->_printVerbose("[LOG] Remove Property from semFeaturesIndex\n"); | 
| 505 |  |  |  |  |  |  | # remove Property from semFeaturesIndex | 
| 506 | 9 |  |  |  |  | 13 | foreach $line (@{$self->query->wherePart}) { | 
|  | 9 |  |  |  |  | 92 |  | 
| 507 | 11 |  |  |  |  | 54 | delete $self->semFeaturesIndex->{$line->{'PREDICATE'}->[0]}; | 
| 508 |  |  |  |  |  |  | } | 
| 509 | 9 |  |  |  |  | 61 | $self->query->_internalPrintOfWherePart; # (\@wherePart); | 
| 510 |  |  |  |  |  |  |  | 
| 511 |  |  |  |  |  |  | } else { | 
| 512 | 0 |  |  |  |  | 0 | return(-1); | 
| 513 |  |  |  |  |  |  | } | 
| 514 | 9 |  |  |  |  | 46 | return(0); | 
| 515 |  |  |  |  |  |  | } | 
| 516 |  |  |  |  |  |  |  | 
| 517 |  |  |  |  |  |  |  | 
| 518 |  |  |  |  |  |  | sub _removeLargerExtractedTerms { | 
| 519 | 9 |  |  | 9 |  | 22 | my ($self) = @_; | 
| 520 |  |  |  |  |  |  |  | 
| 521 | 9 |  |  |  |  | 17 | my $term; | 
| 522 |  |  |  |  |  |  | my $subterm; | 
| 523 | 0 |  |  |  |  | 0 | my $offset; | 
| 524 | 0 |  |  |  |  | 0 | my $delete; | 
| 525 | 0 |  |  |  |  | 0 | my @termstodelete; | 
| 526 |  |  |  |  |  |  |  | 
| 527 | 9 |  |  |  |  | 43 | $self->_printVerbose("[LOG] remove larger extracted terms\n"); | 
| 528 |  |  |  |  |  |  |  | 
| 529 | 9 |  |  |  |  | 47 | for $term (@{$self->semanticUnits}) { | 
|  | 9 |  |  |  |  | 40 |  | 
| 530 |  |  |  |  |  |  | # warn $term->{'semanticUnit'} . "\n"; | 
| 531 | 32 |  |  |  |  | 41 | $delete = 0; | 
| 532 | 32 | 100 |  |  |  | 37 | if (scalar(keys %{$term->{'semanticTypes'}}) == 0) { | 
|  | 32 |  |  |  |  | 142 |  | 
| 533 | 1 |  |  |  |  | 3 | $offset = $term->{'start_offset'}; | 
| 534 | 1 |  | 66 |  |  | 2 | do { | 
| 535 |  |  |  |  |  |  | # warn "offset: $offset\n"; | 
| 536 |  |  |  |  |  |  | # warn "=>" . ref($self->_getTerms($offset)) . "\n"; | 
| 537 | 6 | 50 |  |  |  | 14 | if ($self->_getTerms($offset)) { | 
| 538 | 6 |  |  |  |  | 8 | foreach $subterm (@{$self->_getTerms($offset)}) { | 
|  | 6 |  |  |  |  | 9 |  | 
| 539 | 6 | 50 |  |  |  | 23 | if ($subterm->{'id'} != $term->{'id'}) { | 
| 540 |  |  |  |  |  |  | # warn "\t" . $subterm->{'semanticUnit'} . "\n"; | 
| 541 | 0 | 0 |  |  |  | 0 | if (scalar(keys %{$subterm->{'semanticTypes'}}) > 0) { | 
|  | 0 |  |  |  |  | 0 |  | 
| 542 | 0 |  |  |  |  | 0 | $delete = 1; | 
| 543 | 0 |  |  |  |  | 0 | push @termstodelete, $term; | 
| 544 |  |  |  |  |  |  | # warn "\t\t\t " . $term->{'semanticUnit'} . " to delete\n"; | 
| 545 | 0 |  |  |  |  | 0 | last; | 
| 546 |  |  |  |  |  |  | } | 
| 547 |  |  |  |  |  |  | } | 
| 548 |  |  |  |  |  |  | } | 
| 549 |  |  |  |  |  |  | } | 
| 550 | 6 |  |  |  |  | 152 | $offset++; | 
| 551 |  |  |  |  |  |  | } while(($delete == 0) && ($offset <= $term->{'end_offset'})); | 
| 552 |  |  |  |  |  |  | } | 
| 553 |  |  |  |  |  |  |  | 
| 554 |  |  |  |  |  |  | } | 
| 555 | 9 |  |  |  |  | 27 | foreach $term (@termstodelete) { | 
| 556 | 0 |  |  |  |  | 0 | $self->_delSemanticUnit($term); | 
| 557 | 0 |  |  |  |  | 0 | $self->_printVerbose("remove " . $term->{'semanticUnit'} . "\n", 2); | 
| 558 |  |  |  |  |  |  | } | 
| 559 | 9 |  |  |  |  | 33 | $self->_printVerbose("done\n"); | 
| 560 | 9 |  |  |  |  | 16 | return(scalar(@termstodelete)); | 
| 561 |  |  |  |  |  |  | } | 
| 562 |  |  |  |  |  |  |  | 
| 563 |  |  |  |  |  |  | sub _contextualRewriting { | 
| 564 | 9 |  |  | 9 |  | 21 | my ($self) = @_; | 
| 565 |  |  |  |  |  |  |  | 
| 566 | 9 |  |  |  |  | 20 | my $term; | 
| 567 |  |  |  |  |  |  | my $semf; | 
| 568 | 0 |  |  |  |  | 0 | my $newsemf; | 
| 569 | 0 |  |  |  |  | 0 | my $semf2; | 
| 570 | 0 |  |  |  |  | 0 | my $newsemf2; | 
| 571 | 0 |  |  |  |  | 0 | my @tmpnewsemf2; | 
| 572 | 0 |  |  |  |  | 0 | my @tmpnewsemf; | 
| 573 | 0 |  |  |  |  | 0 | my $i; | 
| 574 | 0 |  |  |  |  | 0 | my $j; | 
| 575 | 0 |  |  |  |  | 0 | my $k; | 
| 576 |  |  |  |  |  |  | #    my $lang = $self->language; | 
| 577 | 0 |  |  |  |  | 0 | my $rule; | 
| 578 | 0 |  |  |  |  | 0 | my $rules; | 
| 579 | 0 |  |  |  |  | 0 | my $semCat; | 
| 580 |  |  |  |  |  |  |  | 
| 581 |  |  |  |  |  |  | #    my @sortedSemanticUnits = sort {$a->start_token->getFrom <=> $b->start_token->getFrom} @{$document->getAnnotations->getSemanticUnitLevel->getElements}; | 
| 582 |  |  |  |  |  |  |  | 
| 583 |  |  |  |  |  |  | # foreach $term (@{$self->sortedSemanticUnits}) { | 
| 584 |  |  |  |  |  |  | # 	warn "=> " . $term->{'semanticUnit'} . "\n"; | 
| 585 |  |  |  |  |  |  |  | 
| 586 |  |  |  |  |  |  | # } | 
| 587 |  |  |  |  |  |  |  | 
| 588 | 9 |  |  |  |  | 26 | $self->_printVerbose("[LOG] contextual rewriting\n"); | 
| 589 | 9 |  |  |  |  | 20 | for($i=0; $i < scalar(@{$self->_sortedSemanticUnits}); $i++) { | 
|  | 41 |  |  |  |  | 92 |  | 
| 590 |  |  |  |  |  |  | # warn "\n>>> " . $self->_sortedSemanticUnits->[$i]->{'semanticUnit'} . "\n"; | 
| 591 | 32 |  |  |  |  | 42 | foreach $semf (keys %{$self->_sortedSemanticUnits->[$i]->{'semanticTypes'}}) { | 
|  | 32 |  |  |  |  | 64 |  | 
| 592 |  |  |  |  |  |  | # warn $semf . "\n"; | 
| 593 | 32 | 100 |  |  |  | 204 | if (exists $self->semanticCorrespondance->{$self->language}->{'CTXTL_REWRITE'}->{'RULE'}->{$semf}) { | 
| 594 | 4 | 100 |  |  |  | 33 | if (ref($self->semanticCorrespondance->{$self->language}->{'CTXTL_REWRITE'}->{'RULE'}->{$semf}) eq "HASH") { | 
| 595 | 2 |  |  |  |  | 11 | $rules = [$self->semanticCorrespondance->{$self->language}->{'CTXTL_REWRITE'}->{'RULE'}->{$semf}]; | 
| 596 |  |  |  |  |  |  | } else { | 
| 597 | 2 |  |  |  |  | 15 | $rules = $self->semanticCorrespondance->{$self->language}->{'CTXTL_REWRITE'}->{'RULE'}->{$semf}; | 
| 598 |  |  |  |  |  |  | } | 
| 599 |  |  |  |  |  |  | } else { | 
| 600 | 28 |  |  |  |  | 51 | $rules = []; | 
| 601 |  |  |  |  |  |  | } | 
| 602 |  |  |  |  |  |  | # warn "rules: $rules\n"; | 
| 603 | 32 |  |  |  |  | 142 | for($k=0;$k < scalar(@$rules);$k++) { | 
| 604 |  |  |  |  |  |  | #	    foreach $rule (@$rules) { | 
| 605 | 6 |  |  |  |  | 11 | $rule = $rules->[$k]; | 
| 606 | 6 |  |  |  |  | 44 | for($j=$i+1;$j < scalar(@{$self->_sortedSemanticUnits}); $j++) { | 
|  | 21 |  |  |  |  | 43 |  | 
| 607 | 15 |  |  |  |  | 18 | foreach $semf2 (keys %{$self->_sortedSemanticUnits->[$j]->{'semanticTypes'}}) { | 
|  | 15 |  |  |  |  | 24 |  | 
| 608 |  |  |  |  |  |  | # warn "\t" . $semf2 . " (" . $rule->{'CTXT'} . ")\n"; | 
| 609 |  |  |  |  |  |  | # TEST TO REWRITE with a HASH TABLE | 
| 610 | 16 | 50 | 66 |  |  | 191 | if (($semf2 eq $rule->{'CTXT'}) || (index($semf2, $rule->{'CTXT'} . ":") == 0) || | 
|  |  |  | 66 |  |  |  |  | 
|  |  |  | 33 |  |  |  |  | 
| 611 |  |  |  |  |  |  | (index($semf2, $rule->{'CTXT'} . "/") == 0) || | 
| 612 |  |  |  |  |  |  | (index($semf2, ":" . $rule->{'CTXT'}) > 0)) { | 
| 613 | 2 | 50 |  |  |  | 8 | if ($rule->{'NEWCTXT'} eq "") { | 
|  |  | 50 |  |  |  |  |  | 
| 614 | 0 |  |  |  |  | 0 | $self->_printVerbose($self->_sortedSemanticUnits->[$j]->{'semanticUnit'} . ": del semf " . $semf2 . "\n",2); | 
| 615 | 0 |  |  |  |  | 0 | $self->_delSemanticType($self->_sortedSemanticUnits->[$j],$semf2); | 
| 616 |  |  |  |  |  |  | } elsif ($rule->{'NEWCTXT'} ne "-") { | 
| 617 | 2 |  |  |  |  | 5 | $self->_printVerbose($self->_sortedSemanticUnits->[$j]->{'semanticUnit'} . ": rewrite ctxt " . $semf2 . " into " . $rule->{'NEWCTXT'} . "\n",2); | 
| 618 | 2 |  |  |  |  | 6 | $self->_modifySemanticType($self->_sortedSemanticUnits->[$j], $semf2, $rule->{'NEWCTXT'}); | 
| 619 |  |  |  |  |  |  | # @tmpnewsemf2 = (); | 
| 620 |  |  |  |  |  |  | # foreach $newsemf2 (split /;/, $rule->{'NEWCTXT'}) { | 
| 621 |  |  |  |  |  |  | #     push @tmpnewsemf2, [split /\//, $newsemf2]; | 
| 622 |  |  |  |  |  |  | # } | 
| 623 |  |  |  |  |  |  | # $semf2->semantic_category('list_refid_ontology_node', [@tmpnewsemf2]); | 
| 624 |  |  |  |  |  |  | } else { | 
| 625 | 0 |  |  |  |  | 0 | $self->_printVerbose($self->_sortedSemanticUnits->[$j]->{'semanticUnit'} . ": keep same ctxt (" . $semf2 . ")\n",2); | 
| 626 |  |  |  |  |  |  | } | 
| 627 | 2 |  |  |  |  | 5 | $newsemf = $rule->{'NEWSF'}; | 
| 628 | 2 | 50 |  |  |  | 9 | if ($newsemf eq "") { | 
|  |  | 100 |  |  |  |  |  | 
| 629 | 0 |  |  |  |  | 0 | $self->_printVerbose($self->_sortedSemanticUnits->[$i]->{'semanticUnit'} . ": del semf " . $semf . "\n",2); | 
| 630 | 0 |  |  |  |  | 0 | $self->_delSemanticType($self->_sortedSemanticUnits->[$i],$semf); | 
| 631 |  |  |  |  |  |  | } elsif ($rule->{'NEWSF'} ne "-") { | 
| 632 | 1 |  |  |  |  | 25 | $self->_printVerbose($self->_sortedSemanticUnits->[$i]->{'semanticUnit'} . ": rewrite " . $semf . " into " . $rule->{'NEWSF'} . "\n\n",2); | 
| 633 | 1 |  |  |  |  | 2 | $self->_modifySemanticType($self->_sortedSemanticUnits->[$i], $semf, $rule->{'NEWSF'}); | 
| 634 |  |  |  |  |  |  | # @tmpnewsemf = (); | 
| 635 |  |  |  |  |  |  |  | 
| 636 |  |  |  |  |  |  | # foreach $newsemf (split /;/, $rule->{'NEWSF'}) { | 
| 637 |  |  |  |  |  |  | #     push @tmpnewsemf, [split /\//, $newsemf]; | 
| 638 |  |  |  |  |  |  | # } | 
| 639 |  |  |  |  |  |  | # $semf->semantic_category('list_refid_ontology_node', [@tmpnewsemf]); | 
| 640 | 1 |  |  |  |  | 2 | $j=scalar(@{$self->_sortedSemanticUnits}); | 
|  | 1 |  |  |  |  | 2 |  | 
| 641 | 1 |  |  |  |  | 1 | $k=scalar(@$rules); | 
| 642 | 1 |  |  |  |  | 3 | last; | 
| 643 |  |  |  |  |  |  | } else { | 
| 644 | 1 |  |  |  |  | 3 | $self->_printVerbose($self->_sortedSemanticUnits->[$i]->{'semanticUnit'} . ": keep same semf (" . $semf . ")\n\n",2); | 
| 645 |  |  |  |  |  |  | } | 
| 646 |  |  |  |  |  |  | } | 
| 647 |  |  |  |  |  |  | } | 
| 648 |  |  |  |  |  |  | } | 
| 649 |  |  |  |  |  |  | } | 
| 650 |  |  |  |  |  |  | } | 
| 651 |  |  |  |  |  |  | } | 
| 652 | 9 |  |  |  |  | 29 | $self->_printVerbose("done\n\n"); | 
| 653 |  |  |  |  |  |  | } | 
| 654 |  |  |  |  |  |  |  | 
| 655 |  |  |  |  |  |  | sub _getSortedSemanticUnits { | 
| 656 | 9 |  |  | 9 |  | 19 | my ($self) = @_; | 
| 657 |  |  |  |  |  |  |  | 
| 658 | 9 |  |  |  |  | 17 | my $semf; | 
| 659 |  |  |  |  |  |  | my $term; | 
| 660 | 0 |  |  |  |  | 0 | my @sortedSemanticUnits1; | 
| 661 | 0 |  |  |  |  | 0 | my @sortedSemanticUnits2; | 
| 662 | 0 |  |  |  |  | 0 | my @terms; | 
| 663 |  |  |  |  |  |  |  | 
| 664 | 9 |  |  |  |  | 27 | $self->_printVerbose("[LOG] getSortedSemanticUnit\n"); | 
| 665 |  |  |  |  |  |  |  | 
| 666 | 9 |  |  |  |  | 36 | $self->_reset_sortedSemanticUnits; | 
| 667 |  |  |  |  |  |  |  | 
| 668 |  |  |  |  |  |  | # my @sortedTermList = sort {$a->start_token->getFrom <=> $b->start_token->getFrom} @{$document->getAnnotations->getSemanticUnitLevel->getElements}; | 
| 669 |  |  |  |  |  |  |  | 
| 670 |  |  |  |  |  |  |  | 
| 671 |  |  |  |  |  |  | #    $document->getAnnotations->getSemanticFeaturesLevel->printIndex("refid_semantic_unit"); | 
| 672 | 9 |  |  |  |  | 15 | foreach $term (@{$self->_sortedSemanticUnits}) { | 
|  | 9 |  |  |  |  | 27 |  | 
| 673 | 32 |  |  |  |  | 179 | $self->_printVerbose("-> " . $term->{'semanticUnit'} . " : " . $term->{'id'} .  "\n", 2); | 
| 674 | 32 | 100 |  |  |  | 62 | if (scalar(keys %{$term->{'semanticTypes'}}) > 0) { | 
|  | 32 |  |  |  |  | 99 |  | 
| 675 |  |  |  |  |  |  | # warn "ok\n"; | 
| 676 | 31 |  |  |  |  | 52 | push @sortedSemanticUnits1, $term; | 
| 677 | 31 |  |  |  |  | 39 | foreach $semf (keys %{$term->{'semanticTypes'}}) { | 
|  | 31 |  |  |  |  | 87 |  | 
| 678 |  |  |  |  |  |  | # warn "==> " . $semf . " (" . $term->{'semanticUnit'} . ")\n"; | 
| 679 | 33 | 50 |  |  |  | 146 | if ($semf eq "conjunction") { | 
| 680 | 0 |  |  |  |  | 0 | $self->query->conjunction(1); | 
| 681 |  |  |  |  |  |  | } | 
| 682 |  |  |  |  |  |  | } | 
| 683 |  |  |  |  |  |  | } else { | 
| 684 | 1 |  |  |  |  | 3 | push @sortedSemanticUnits1, $term; | 
| 685 |  |  |  |  |  |  | # REGEX | 
| 686 |  |  |  |  |  |  | # warn "-> " . $term->{'semanticUnit'} . " -> REGEX\n"; | 
| 687 | 1 |  |  |  |  | 3 | $self->_addSemanticType($term, "REGEX"); | 
| 688 |  |  |  |  |  |  | # my $semFeatures = $self->_createSemanticFeaturesFromString("REGEX", $term->{'id'}); | 
| 689 |  |  |  |  |  |  | # if (defined $semFeatures) { | 
| 690 |  |  |  |  |  |  | # 	$document->getAnnotations->addSemanticFeatures($semFeatures); | 
| 691 |  |  |  |  |  |  | # } | 
| 692 |  |  |  |  |  |  | } | 
| 693 |  |  |  |  |  |  | } | 
| 694 |  |  |  |  |  |  | #    push @$sortedSemanticUnits, @sortedSemanticUnits1; | 
| 695 | 9 |  |  |  |  | 23 | foreach $term (@sortedSemanticUnits1) { | 
| 696 |  |  |  |  |  |  | # warn "=> ". $term->{'semanticUnit'} . "\n"; | 
| 697 | 32 | 100 |  |  |  | 74 | if (scalar(@terms) == 0) { | 
| 698 | 9 |  |  |  |  | 28 | @terms = ($term); | 
| 699 |  |  |  |  |  |  | } else { | 
| 700 | 23 | 100 |  |  |  | 76 | if ($term->{'start_offset'} == $terms[$#terms]->{'start_offset'}) { | 
| 701 | 1 |  |  |  |  | 3 | push @terms, $term; | 
| 702 |  |  |  |  |  |  | } else { | 
| 703 | 22 | 100 |  |  |  | 53 | if (scalar(@terms) > 1) { | 
| 704 |  |  |  |  |  |  | # foreach my $term2 (@terms) { | 
| 705 |  |  |  |  |  |  | # 	# warn "\t" . $term2->{'semanticUnit'}. "\n"; | 
| 706 |  |  |  |  |  |  | # } | 
| 707 | 1 |  |  |  |  | 4 | push @sortedSemanticUnits2, $self->_getLargerTerm(\@terms); | 
| 708 |  |  |  |  |  |  | # warn "keep: " . $sortedSemanticUnits->[$#$sortedSemanticUnits]->{'semanticUnit'} . "\n"; | 
| 709 |  |  |  |  |  |  | } else { | 
| 710 | 21 |  |  |  |  | 36 | push @sortedSemanticUnits2, @terms; | 
| 711 |  |  |  |  |  |  | } | 
| 712 | 22 |  |  |  |  | 55 | @terms = ($term); | 
| 713 |  |  |  |  |  |  | } | 
| 714 |  |  |  |  |  |  | } | 
| 715 |  |  |  |  |  |  | } | 
| 716 | 9 | 50 |  |  |  | 34 | if (scalar(@terms) > 1) { | 
| 717 |  |  |  |  |  |  | # foreach my $term2 (@terms) { | 
| 718 |  |  |  |  |  |  | #     warn "\t" . $term2->{'semanticUnit'}. "\n"; | 
| 719 |  |  |  |  |  |  | # } | 
| 720 | 0 |  |  |  |  | 0 | push @sortedSemanticUnits2, $self->_getLargerTerm(\@terms); | 
| 721 |  |  |  |  |  |  | # warn "keep: " . $sortedSemanticUnits->[$#$sortedSemanticUnits]->{'semanticUnit'} . "\n"; | 
| 722 |  |  |  |  |  |  | } else { | 
| 723 | 9 |  |  |  |  | 20 | push @sortedSemanticUnits2, @terms; | 
| 724 |  |  |  |  |  |  | } | 
| 725 |  |  |  |  |  |  |  | 
| 726 | 9 |  |  |  |  | 18 | @terms = (); | 
| 727 | 9 |  |  |  |  | 19 | my @sortedSemanticUnits = (); | 
| 728 | 9 |  |  |  |  | 30 | foreach $term (sort {$a->{'start_offset'} <=> $b->{'start_offset'}} @sortedSemanticUnits2) { | 
|  | 34 |  |  |  |  | 66 |  | 
| 729 | 31 | 100 |  |  |  | 72 | if (scalar(@terms) == 0) { | 
| 730 | 9 |  |  |  |  | 20 | @terms = ($term); | 
| 731 |  |  |  |  |  |  | } else { | 
| 732 | 22 | 100 |  |  |  | 70 | if ($term->{'end_offset'} <= $terms[$#terms]->{'end_offset'}) { | 
| 733 | 2 |  |  |  |  | 9 | push @terms, $term; | 
| 734 |  |  |  |  |  |  | } else { | 
| 735 | 20 |  |  |  |  | 66 | push @sortedSemanticUnits, $self->_getLargerTerm(\@terms); | 
| 736 | 20 |  |  |  |  | 55 | @terms = ($term); | 
| 737 |  |  |  |  |  |  | } | 
| 738 |  |  |  |  |  |  | } | 
| 739 |  |  |  |  |  |  |  | 
| 740 |  |  |  |  |  |  | } | 
| 741 | 9 | 50 |  |  |  | 37 | if (scalar(@terms) > 1) { | 
| 742 |  |  |  |  |  |  | # foreach my $term2 (@terms) { | 
| 743 |  |  |  |  |  |  | #     warn "\t" . $term2->{'semanticUnit'}. "\n"; | 
| 744 |  |  |  |  |  |  | # } | 
| 745 | 0 |  |  |  |  | 0 | push @sortedSemanticUnits, $self->_getLargerTerm(\@terms); | 
| 746 |  |  |  |  |  |  | #	$self->sortedSemanticUnits([$self->_getLargerTerm(\@terms)]); | 
| 747 |  |  |  |  |  |  | # warn "keep: " . $sortedSemanticUnits->[$#$sortedSemanticUnits]->{'semanticUnit'} . "\n"; | 
| 748 |  |  |  |  |  |  | } else { | 
| 749 |  |  |  |  |  |  | #	$self->sortedSemanticUnits([@terms]); | 
| 750 | 9 |  |  |  |  | 20 | push @sortedSemanticUnits, @terms; | 
| 751 |  |  |  |  |  |  | } | 
| 752 |  |  |  |  |  |  |  | 
| 753 | 9 |  |  |  |  | 58 | $self->_sortedSemanticUnits([@sortedSemanticUnits]); | 
| 754 |  |  |  |  |  |  |  | 
| 755 |  |  |  |  |  |  | # warn "---\n"; | 
| 756 |  |  |  |  |  |  | # foreach $term (@{$self->_sortedSemanticUnits}) { | 
| 757 |  |  |  |  |  |  | #     warn "\t" . $term->{'semanticUnit'}. "\n"; | 
| 758 |  |  |  |  |  |  | # } | 
| 759 |  |  |  |  |  |  |  | 
| 760 | 9 |  |  |  |  | 28 | $self->_printVerbose("done\n\n"); | 
| 761 |  |  |  |  |  |  | } | 
| 762 |  |  |  |  |  |  |  | 
| 763 |  |  |  |  |  |  |  | 
| 764 |  |  |  |  |  |  | sub _getLargerTerm { | 
| 765 | 21 |  |  | 21 |  | 31 | my ($self, $terms) = @_; | 
| 766 |  |  |  |  |  |  |  | 
| 767 | 21 |  |  |  |  | 29 | my $largerTerm; | 
| 768 |  |  |  |  |  |  | my $tmpTerm; | 
| 769 |  |  |  |  |  |  |  | 
| 770 | 21 |  |  |  |  | 32 | $largerTerm = $terms->[0]; | 
| 771 |  |  |  |  |  |  |  | 
| 772 | 21 |  |  |  |  | 39 | foreach $tmpTerm (@$terms) { | 
| 773 | 24 | 100 |  |  |  | 91 | if ($tmpTerm->{'id'} != $largerTerm->{'id'})  { | 
| 774 | 3 | 50 | 33 |  |  | 32 | if (($tmpTerm->{'start_offset'} < $largerTerm->{'start_offset'}  ) || | 
| 775 |  |  |  |  |  |  | ($largerTerm->{'end_offset'}  < $tmpTerm->{'end_offset'})) { | 
| 776 | 0 |  |  |  |  | 0 | $largerTerm = $tmpTerm | 
| 777 |  |  |  |  |  |  | } | 
| 778 |  |  |  |  |  |  | } | 
| 779 |  |  |  |  |  |  | } | 
| 780 | 21 |  |  |  |  | 47 | return($largerTerm); | 
| 781 |  |  |  |  |  |  | } | 
| 782 |  |  |  |  |  |  |  | 
| 783 |  |  |  |  |  |  | sub _detecteNegation { | 
| 784 | 9 |  |  | 9 |  | 17 | my ($self) = @_; | 
| 785 |  |  |  |  |  |  |  | 
| 786 | 9 |  |  |  |  | 16 | my $semf; | 
| 787 |  |  |  |  |  |  | my $term; | 
| 788 | 0 |  |  |  |  | 0 | my @terms; | 
| 789 | 0 |  |  |  |  | 0 | my $neg; | 
| 790 |  |  |  |  |  |  |  | 
| 791 | 9 |  |  |  |  | 28 | $self->_printVerbose("[LOG] DetecteNegation\n"); | 
| 792 |  |  |  |  |  |  | # # warn "semf#: " . scalar (@{$document->getAnnotations->getSemanticFeaturesLevel->getElements}) . "\n"; | 
| 793 |  |  |  |  |  |  | # my @sortedTermList = sort {$a->start_token->getFrom <=> $b->start_token->getFrom} @{$document->getAnnotations->getSemanticUnitLevel->getElements}; | 
| 794 |  |  |  |  |  |  |  | 
| 795 | 9 |  |  |  |  | 16 | $neg=0; | 
| 796 | 9 |  |  |  |  | 18 | foreach $term (@{$self->_sortedSemanticUnits}) { | 
|  | 9 |  |  |  |  | 25 |  | 
| 797 |  |  |  |  |  |  | # warn "-> " . $term->{'semanticUnit'} . "\n"; | 
| 798 | 29 | 50 |  |  |  | 39 | if (scalar(keys %{$term->{'semanticTypes'}}) > 0) { | 
|  | 29 |  |  |  |  | 95 |  | 
| 799 | 29 |  |  |  |  | 38 | foreach $semf (keys %{$term->{'semanticTypes'}}) { | 
|  | 29 |  |  |  |  | 80 |  | 
| 800 |  |  |  |  |  |  | # warn "==> " . $semf . " (" . $term->{'semanticUnit'} . ")\n"; | 
| 801 |  |  |  |  |  |  | # foreach $semCat (@{$term->{'semanticTypes'}-{$semf}}) { | 
| 802 |  |  |  |  |  |  | # warn "\t" .  join('/', @$semCat) . "\n"; | 
| 803 | 31 | 100 |  |  |  | 184 | if (exists $self->semanticCorrespondance->{$self->language}->{'PREDICATE'}->{$semf}) { | 
| 804 | 11 | 50 |  |  |  | 89 | if ($neg == 1) { | 
| 805 | 0 |  |  |  |  | 0 | $self->negation->{$term->{'id'}}=""; | 
| 806 | 0 |  |  |  |  | 0 | $self->_printVerbose($term->{'semanticUnit'} . " is negated\n",2); | 
| 807 | 0 |  |  |  |  | 0 | $neg=0; | 
| 808 |  |  |  |  |  |  | } | 
| 809 |  |  |  |  |  |  | } | 
| 810 | 31 | 50 |  |  |  | 140 | if ($semf eq "negation") { | 
| 811 | 0 |  |  |  |  | 0 | $self->_printVerbose("found negation\n",2); | 
| 812 | 0 |  |  |  |  | 0 | $neg=1; | 
| 813 |  |  |  |  |  |  | } | 
| 814 |  |  |  |  |  |  | #		} | 
| 815 |  |  |  |  |  |  | } | 
| 816 |  |  |  |  |  |  | } | 
| 817 |  |  |  |  |  |  | } | 
| 818 |  |  |  |  |  |  | } | 
| 819 |  |  |  |  |  |  |  | 
| 820 |  |  |  |  |  |  | sub _identifyAggregationOperator { | 
| 821 | 9 |  |  | 9 |  | 21 | my ($self) = @_; | 
| 822 |  |  |  |  |  |  |  | 
| 823 | 9 |  |  |  |  | 17 | my $semf; | 
| 824 |  |  |  |  |  |  | my $term; | 
| 825 | 0 |  |  |  |  | 0 | my @sortedSemanticUnits1; | 
| 826 | 0 |  |  |  |  | 0 | my @terms; | 
| 827 | 9 |  |  |  |  | 17 | my @aggregOp = (); | 
| 828 | 9 |  |  |  |  | 16 | my $op; | 
| 829 |  |  |  |  |  |  |  | 
| 830 | 9 |  |  |  |  | 25 | $self->_printVerbose("[LOG] _identifyAggregationOperator\n"); | 
| 831 | 9 |  |  |  |  | 17 | foreach $term (@{$self->_sortedSemanticUnits}) { | 
|  | 9 |  |  |  |  | 25 |  | 
| 832 | 29 | 50 |  |  |  | 33 | if (scalar(keys %{$term->{'semanticTypes'}}) > 0) { | 
|  | 29 |  |  |  |  | 94 |  | 
| 833 | 29 |  |  |  |  | 96 | foreach $semf (keys %{$term->{'semanticTypes'}}) { | 
|  | 29 |  |  |  |  | 71 |  | 
| 834 | 31 |  |  |  |  | 133 | $self->_printVerbose("==> " . $semf . " (" . $term->{'semanticUnit'} . ")\n",2); | 
| 835 |  |  |  |  |  |  | # warn "$semf: " .  $term->{'semanticTypes'}->{$semf} . "\n"; | 
| 836 |  |  |  |  |  |  | #		foreach $semCat (@{$term->{'semanticTypes'}->{$semf}}) { | 
| 837 | 31 | 50 |  |  |  | 137 | if (exists $self->aggregation->{'TERM'}->{$semf}) { | 
| 838 | 0 |  |  |  |  | 0 | push @aggregOp, $semf; | 
| 839 |  |  |  |  |  |  | } | 
| 840 |  |  |  |  |  |  |  | 
| 841 | 31 | 100 |  |  |  | 214 | if (exists $self->semanticCorrespondance->{$self->language}->{'VARIABLE'}->{$semf}) { | 
| 842 | 19 | 50 |  |  |  | 58 | if (scalar(@aggregOp) != 0) { | 
| 843 | 0 |  |  |  |  | 0 | foreach $op (@aggregOp) { | 
| 844 | 0 |  |  |  |  | 0 | $self->aggregation->{'TERM'}->{$op}->{$term->{'id'}}=""; | 
| 845 | 0 |  |  |  |  | 0 | $self->_printVerbose($term->{'semanticUnit'} . " is aggregated ($op)\n",2); | 
| 846 |  |  |  |  |  |  | } | 
| 847 | 0 |  |  |  |  | 0 | @aggregOp = (); | 
| 848 |  |  |  |  |  |  | } | 
| 849 |  |  |  |  |  |  | } | 
| 850 | 31 | 50 |  |  |  | 130 | if ($semf eq "exists") { | 
| 851 | 0 |  |  |  |  | 0 | $self->aggregation->{'ASK'} = 1; | 
| 852 |  |  |  |  |  |  | } | 
| 853 |  |  |  |  |  |  | #		} | 
| 854 |  |  |  |  |  |  | } | 
| 855 |  |  |  |  |  |  | } | 
| 856 |  |  |  |  |  |  | } | 
| 857 |  |  |  |  |  |  | } | 
| 858 |  |  |  |  |  |  |  | 
| 859 |  |  |  |  |  |  | sub _getQuestionTopic { | 
| 860 | 9 |  |  | 9 |  | 17 | my ($self) = @_; | 
| 861 |  |  |  |  |  |  |  | 
| 862 | 9 |  |  |  |  | 18 | my $semf; | 
| 863 | 9 |  |  |  |  | 15 | my $found = 0; | 
| 864 |  |  |  |  |  |  | #    my $questionTopic; | 
| 865 | 9 |  |  |  |  | 16 | my $questionTopicCat; | 
| 866 |  |  |  |  |  |  | my $i; | 
| 867 | 0 |  |  |  |  | 0 | my $aggregOp; | 
| 868 |  |  |  |  |  |  |  | 
| 869 |  |  |  |  |  |  | # warn scalar(@{$self->_sortedSemanticUnits}) . "\n"; | 
| 870 | 9 |  |  |  |  | 19 | $i = 0; | 
| 871 | 9 |  |  |  |  | 29 | do { | 
| 872 | 9 | 50 |  |  |  | 16 | if ($i < scalar(@{$self->_sortedSemanticUnits})) { | 
|  | 9 |  |  |  |  | 22 |  | 
| 873 | 9 |  |  |  |  | 28 | $self->_printVerbose("QT? " . $self->_sortedSemanticUnits->[$i]->{'semanticUnit'} . "\n",2); | 
| 874 | 9 |  |  |  |  | 19 | foreach $semf (keys %{$self->_sortedSemanticUnits->[$i]->{'semanticTypes'}}) { | 
|  | 9 |  |  |  |  | 55 |  | 
| 875 | 10 |  |  |  |  | 43 | $self->_printVerbose($semf . "\n",2); | 
| 876 |  |  |  |  |  |  | # foreach $semCat (@{$self->_sortedSemanticUnits->[$i]->{'semanticTypes'}->{$semf}}) { | 
| 877 |  |  |  |  |  |  | # $self->_printVerbose($self->language . "\n",2); | 
| 878 |  |  |  |  |  |  | # warn "\t" .  join('/', @$semCat) . "\n"; | 
| 879 | 10 |  |  |  |  | 19 | $questionTopicCat = join('/', @{$self->_sortedSemanticUnits->[$i]->{'semanticTypes'}->{$semf}}); | 
|  | 10 |  |  |  |  | 29 |  | 
| 880 | 10 |  |  |  |  | 52 | $self->_printVerbose("questionTopicCat $questionTopicCat\n", 2); | 
| 881 | 10 | 100 |  |  |  | 85 | if (exists $self->semanticCorrespondance->{$self->language}->{'VARIABLE'}->{$questionTopicCat}) { | 
| 882 | 9 |  |  |  |  | 152 | $self->questionTopic($self->semanticCorrespondance->{$self->language}->{'VARIABLE'}->{$questionTopicCat}); | 
| 883 | 9 |  |  |  |  | 67 | $self->_printVerbose("Question Topic: " . $questionTopicCat . " : " . $self->questionTopic . "\n",2); | 
| 884 | 9 |  |  |  |  | 19 | $found = 1; | 
| 885 |  |  |  |  |  |  |  | 
| 886 | 9 |  |  |  |  | 19 | foreach $aggregOp (keys %{$self->aggregation->{'TERM'}}) { | 
|  | 9 |  |  |  |  | 57 |  | 
| 887 | 45 | 50 |  |  |  | 160 | if (exists $self->aggregation->{'TERM'}->{$aggregOp}->{$self->_sortedSemanticUnits->[$i]->{'id'}}) { | 
| 888 | 0 |  |  |  |  | 0 | $self->aggregation->{'TERM'}->{$aggregOp}->{$self->_sortedSemanticUnits->[$i]->{'id'}} = $self->questionTopic; | 
| 889 | 0 |  |  |  |  | 0 | $self->aggregation->{'QT'}->{$aggregOp}->{$self->questionTopic} = $self->_sortedSemanticUnits->[$i]->{'id'}; | 
| 890 | 0 |  |  |  |  | 0 | $self->_printVerbose("aggregation ($aggregOp) for " . $self->_sortedSemanticUnits->[$i]->{'semanticUnit'} . " at " . $self->aggregation->{'TERM'}->{$aggregOp}->{$self->_sortedSemanticUnits->[$i]->{'id'}} . " (" . $self->questionTopic . ")\n",2); | 
| 891 |  |  |  |  |  |  | } | 
| 892 |  |  |  |  |  |  | } | 
| 893 |  |  |  |  |  |  | # } | 
| 894 |  |  |  |  |  |  | } else { | 
| 895 |  |  |  |  |  |  | # warn "no\n"; | 
| 896 |  |  |  |  |  |  | } | 
| 897 |  |  |  |  |  |  | } | 
| 898 | 9 |  |  |  |  | 32 | $i++; | 
| 899 |  |  |  |  |  |  | } | 
| 900 | 9 |  | 33 |  |  | 17 | } while(($i < scalar(@{$self->_sortedSemanticUnits})) && (!$found)); | 
| 901 |  |  |  |  |  |  |  | 
| 902 | 9 |  |  |  |  | 56 | $self->_printVerbose("questionTopic: " . $self->questionTopic . "\n", 2); | 
| 903 |  |  |  |  |  |  |  | 
| 904 | 9 |  |  |  |  | 1018 | return($self->questionTopic); | 
| 905 |  |  |  |  |  |  | } | 
| 906 |  |  |  |  |  |  |  | 
| 907 |  |  |  |  |  |  | sub _printVerbose { | 
| 908 | 294 |  |  | 294 |  | 457 | my($self, $msg, $level) = @_; | 
| 909 |  |  |  |  |  |  |  | 
| 910 | 294 | 100 |  |  |  | 586 | if (!defined $level) { | 
| 911 | 99 |  |  |  |  | 123 | $level = 1; | 
| 912 |  |  |  |  |  |  | } | 
| 913 |  |  |  |  |  |  |  | 
| 914 | 294 | 50 | 33 |  |  | 1354 | if (($self->verbose > 0) && ($self->verbose >= $level)) { | 
| 915 | 0 |  |  |  |  |  | warn "$msg"; | 
| 916 |  |  |  |  |  |  | } | 
| 917 |  |  |  |  |  |  |  | 
| 918 |  |  |  |  |  |  | } | 
| 919 |  |  |  |  |  |  |  | 
| 920 |  |  |  |  |  |  | 1; | 
| 921 |  |  |  |  |  |  |  | 
| 922 |  |  |  |  |  |  | __END__ |