| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Lingua::Ogmios::Annotations::SemanticUnit; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 16 |  |  | 16 |  | 73 | use Lingua::Ogmios::Annotations::Element; | 
|  | 16 |  |  |  |  | 26 |  | 
|  | 16 |  |  |  |  | 462 |  | 
| 4 |  |  |  |  |  |  |  | 
| 5 | 16 |  |  | 16 |  | 73 | use strict; | 
|  | 16 |  |  |  |  | 24 |  | 
|  | 16 |  |  |  |  | 438 |  | 
| 6 | 16 |  |  | 16 |  | 62 | use warnings; | 
|  | 16 |  |  |  |  | 23 |  | 
|  | 16 |  |  |  |  | 38017 |  | 
| 7 |  |  |  |  |  |  |  | 
| 8 |  |  |  |  |  |  | our @ISA = qw(Lingua::Ogmios::Annotations::Element); | 
| 9 |  |  |  |  |  |  |  | 
| 10 |  |  |  |  |  |  | # | 
| 11 |  |  |  |  |  |  | # | 
| 12 |  |  |  |  |  |  | #                         (named_entity | term | undefined)+ > | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  |  | 
| 15 |  |  |  |  |  |  | # | 
| 16 |  |  |  |  |  |  | # | 
| 17 |  |  |  |  |  |  | #                          | refid_word | list_refid_token), | 
| 18 |  |  |  |  |  |  | #                          weights, negation | 
| 19 |  |  |  |  |  |  | #                          form?, canonical_form?)           > | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | # | 
| 22 |  |  |  |  |  |  |  | 
| 23 |  |  |  |  |  |  | # | 
| 24 |  |  |  |  |  |  | # | 
| 25 |  |  |  |  |  |  | #                          | refid_word | list_refid_token), | 
| 26 |  |  |  |  |  |  | #                            form?, canonical_form?)         > | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | # | 
| 29 |  |  |  |  |  |  | # | 
| 30 |  |  |  |  |  |  | #                          refid_word | list_refid_token)? , | 
| 31 |  |  |  |  |  |  | #                          form?, canonical_form?, | 
| 32 |  |  |  |  |  |  | #                          named_entity_type)                > | 
| 33 |  |  |  |  |  |  |  | 
| 34 |  |  |  |  |  |  | # at the creation defined a type (named_netity, term, undefined) | 
| 35 |  |  |  |  |  |  |  | 
| 36 |  |  |  |  |  |  | sub new { | 
| 37 | 0 |  |  | 0 | 0 |  | my ($class, $fields) = @_; | 
| 38 |  |  |  |  |  |  |  | 
| 39 | 0 |  |  |  |  |  | my @refTypes = ('refid_phrase', 'refid_word', 'list_refid_token'); | 
| 40 |  |  |  |  |  |  |  | 
| 41 | 0 | 0 |  |  |  |  | if (!defined $fields->{'id'}) { | 
| 42 | 0 |  |  |  |  |  | $fields->{'id'} = -1; | 
| 43 |  |  |  |  |  |  | } | 
| 44 | 0 | 0 |  |  |  |  | if (!defined $fields->{'type'}) { # named_entity, term or undefined | 
| 45 | 0 |  |  |  |  |  | die("type is not defined"); | 
| 46 |  |  |  |  |  |  | } | 
| 47 | 0 |  |  |  |  |  | my $sem_unit = $class->SUPER::new({ | 
| 48 |  |  |  |  |  |  | 'id' => $fields->{'id'}, | 
| 49 |  |  |  |  |  |  | # 	'form' => $fields->{'form'}, | 
| 50 |  |  |  |  |  |  | } | 
| 51 |  |  |  |  |  |  | ); | 
| 52 |  |  |  |  |  |  |  | 
| 53 | 0 |  |  |  |  |  | my $i = 0; | 
| 54 | 0 |  |  |  |  |  | my $reference_name; | 
| 55 |  |  |  |  |  |  | my $ref; | 
| 56 | 0 |  |  |  |  |  | foreach $ref (@refTypes) { | 
| 57 | 0 | 0 |  |  |  |  | if (defined $fields->{$ref}) { | 
| 58 | 0 |  |  |  |  |  | $reference_name = $ref; | 
| 59 | 0 |  |  |  |  |  | last; | 
| 60 |  |  |  |  |  |  | } | 
| 61 | 0 |  |  |  |  |  | $i++; | 
| 62 |  |  |  |  |  |  | } | 
| 63 | 0 | 0 |  |  |  |  | if ($i == scalar(@refTypes)) { | 
| 64 | 0 |  |  |  |  |  | die("reference (list) is not defined"); | 
| 65 |  |  |  |  |  |  | } | 
| 66 |  |  |  |  |  |  |  | 
| 67 |  |  |  |  |  |  | #     if ((!defined $fields->{'refid_phrase'}) || | 
| 68 |  |  |  |  |  |  | # 	(!defined $fields->{'refid_word'}) || | 
| 69 |  |  |  |  |  |  | # 	(!defined $fields->{'list_refid_token'})) | 
| 70 |  |  |  |  |  |  | #     { | 
| 71 |  |  |  |  |  |  | # 	die("reference (list) is not defined"); | 
| 72 |  |  |  |  |  |  |  | 
| 73 |  |  |  |  |  |  | #     } | 
| 74 |  |  |  |  |  |  |  | 
| 75 | 0 |  |  |  |  |  | bless ($sem_unit,$class); | 
| 76 |  |  |  |  |  |  |  | 
| 77 |  |  |  |  |  |  | #     warn "=>>$reference_name\n"; | 
| 78 |  |  |  |  |  |  |  | 
| 79 | 0 |  |  |  |  |  | $sem_unit->reference($reference_name, $fields->{$reference_name}); | 
| 80 |  |  |  |  |  |  |  | 
| 81 | 0 | 0 |  |  |  |  | if (defined $fields->{'form'}) { | 
| 82 | 0 |  |  |  |  |  | $sem_unit->setForm($fields->{'form'}); | 
| 83 |  |  |  |  |  |  | } | 
| 84 | 0 | 0 |  |  |  |  | if (defined $fields->{'weights'}) { | 
| 85 | 0 |  |  |  |  |  | $sem_unit->weights($fields->{'weights'}); | 
| 86 |  |  |  |  |  |  | } | 
| 87 |  |  |  |  |  |  |  | 
| 88 | 0 | 0 |  |  |  |  | if (defined $fields->{'negation'}) { | 
| 89 | 0 |  |  |  |  |  | $sem_unit->negation($fields->{'negation'}); | 
| 90 |  |  |  |  |  |  | } | 
| 91 |  |  |  |  |  |  |  | 
| 92 | 0 | 0 |  |  |  |  | if (defined $fields->{'log_id'}) { | 
| 93 | 0 |  |  |  |  |  | $sem_unit->setLogId($fields->{'log_id'}); | 
| 94 |  |  |  |  |  |  | } | 
| 95 |  |  |  |  |  |  |  | 
| 96 | 0 |  |  |  |  |  | $sem_unit->type($fields->{'type'}); | 
| 97 |  |  |  |  |  |  |  | 
| 98 | 0 | 0 |  |  |  |  | if (defined $fields->{'canonical_form'}) { | 
| 99 | 0 |  |  |  |  |  | $sem_unit->canonical_form($fields->{'canonical_form'}); | 
| 100 |  |  |  |  |  |  | } | 
| 101 |  |  |  |  |  |  |  | 
| 102 | 0 | 0 |  |  |  |  | if ($sem_unit->isNamedEntity) { | 
| 103 | 0 |  |  |  |  |  | $sem_unit->NEtype($fields->{'named_entity_type'}); | 
| 104 |  |  |  |  |  |  | } | 
| 105 |  |  |  |  |  |  |  | 
| 106 | 0 |  |  |  |  |  | return($sem_unit); | 
| 107 |  |  |  |  |  |  | } | 
| 108 |  |  |  |  |  |  |  | 
| 109 |  |  |  |  |  |  | sub negation { | 
| 110 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 111 |  |  |  |  |  |  |  | 
| 112 | 0 | 0 |  |  |  |  | if (@_) { | 
| 113 | 0 |  |  |  |  |  | $self->{'negation'} = shift; | 
| 114 |  |  |  |  |  |  | } | 
| 115 | 0 |  |  |  |  |  | return($self->{'negation'}); | 
| 116 |  |  |  |  |  |  | } | 
| 117 |  |  |  |  |  |  | sub printWeights { | 
| 118 | 0 |  |  | 0 | 0 |  | my ($self, $fh) = @_; | 
| 119 | 0 |  |  |  |  |  | my $weight_name; | 
| 120 |  |  |  |  |  |  |  | 
| 121 | 0 | 0 |  |  |  |  | if (!defined $fh) { | 
| 122 | 0 |  |  |  |  |  | $fh = \*STDERR; | 
| 123 |  |  |  |  |  |  | } | 
| 124 |  |  |  |  |  |  |  | 
| 125 | 0 | 0 |  |  |  |  | if (defined $self->weights) { | 
| 126 | 0 |  |  |  |  |  | foreach $weight_name (keys %{$self->weights}) { | 
|  | 0 |  |  |  |  |  |  | 
| 127 | 0 |  |  |  |  |  | print $fh "$weight_name : " . $self->weight($weight_name) . "\n"; | 
| 128 |  |  |  |  |  |  | } | 
| 129 |  |  |  |  |  |  | } | 
| 130 |  |  |  |  |  |  | } | 
| 131 |  |  |  |  |  |  |  | 
| 132 |  |  |  |  |  |  | sub weights { | 
| 133 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 134 |  |  |  |  |  |  |  | 
| 135 | 0 | 0 |  |  |  |  | if (defined @_) { | 
| 136 | 0 |  |  |  |  |  | my $weights = shift; | 
| 137 |  |  |  |  |  |  |  | 
| 138 |  |  |  |  |  |  | # 	warn "$weights\n"; | 
| 139 |  |  |  |  |  |  |  | 
| 140 | 0 |  |  |  |  |  | my $weight_name; | 
| 141 | 0 |  |  |  |  |  | foreach $weight_name (keys %$weights) { | 
| 142 |  |  |  |  |  |  | # warn "$weight_name\n"; | 
| 143 | 0 |  |  |  |  |  | $self->{'weights'}->{$weight_name} = $weights->{$weight_name}; | 
| 144 |  |  |  |  |  |  | #  	    warn "$weight_name : " . $self->weights->{$weight_name} .  "\n"; | 
| 145 |  |  |  |  |  |  | } | 
| 146 |  |  |  |  |  |  | } | 
| 147 |  |  |  |  |  |  |  | 
| 148 | 0 |  |  |  |  |  | return($self->{'weights'}); | 
| 149 |  |  |  |  |  |  | } | 
| 150 |  |  |  |  |  |  |  | 
| 151 |  |  |  |  |  |  | sub sortedWeightValues { | 
| 152 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 153 |  |  |  |  |  |  |  | 
| 154 | 0 |  |  |  |  |  | my @w; | 
| 155 |  |  |  |  |  |  | my $weight_name; | 
| 156 |  |  |  |  |  |  |  | 
| 157 | 0 |  |  |  |  |  | foreach $weight_name (sort keys %{$self->weights}) { | 
|  | 0 |  |  |  |  |  |  | 
| 158 | 0 |  |  |  |  |  | push @w, $self->weight($weight_name); | 
| 159 |  |  |  |  |  |  | } | 
| 160 | 0 |  |  |  |  |  | return(@w); | 
| 161 |  |  |  |  |  |  | } | 
| 162 |  |  |  |  |  |  |  | 
| 163 |  |  |  |  |  |  | sub numberOfWeights { | 
| 164 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 165 |  |  |  |  |  |  |  | 
| 166 | 0 |  |  |  |  |  | return(scalar(keys %{$self->weights})); | 
|  | 0 |  |  |  |  |  |  | 
| 167 |  |  |  |  |  |  | } | 
| 168 |  |  |  |  |  |  |  | 
| 169 |  |  |  |  |  |  | sub existsWeight { | 
| 170 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 171 | 0 |  |  |  |  |  | my $weight_name = shift; | 
| 172 |  |  |  |  |  |  |  | 
| 173 | 0 |  |  |  |  |  | return(exists($self->{'weights'}->{$weight_name})); | 
| 174 |  |  |  |  |  |  | } | 
| 175 |  |  |  |  |  |  |  | 
| 176 |  |  |  |  |  |  | sub incr_weight { | 
| 177 | 0 |  |  | 0 | 0 |  | my ($self, $weight_name, $step) = @_; | 
| 178 |  |  |  |  |  |  |  | 
| 179 | 0 | 0 |  |  |  |  | if (!defined($self->weight($weight_name))) { | 
| 180 | 0 |  |  |  |  |  | $self->weight($weight_name, 0); | 
| 181 |  |  |  |  |  |  | } | 
| 182 | 0 |  |  |  |  |  | $self->weight($weight_name, $self->weight($weight_name) + 1); | 
| 183 | 0 |  |  |  |  |  | return($self->weight($weight_name)); | 
| 184 |  |  |  |  |  |  | } | 
| 185 |  |  |  |  |  |  |  | 
| 186 |  |  |  |  |  |  | sub weight { | 
| 187 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 188 |  |  |  |  |  |  |  | 
| 189 | 0 | 0 |  |  |  |  | if (!defined $self->weights) { | 
| 190 | 0 | 0 |  |  |  |  | if (scalar(@_) > 0) { | 
| 191 | 0 |  |  |  |  |  | $self->{'weights'} = {}; | 
| 192 |  |  |  |  |  |  | } else { | 
| 193 | 0 |  |  |  |  |  | return(undef); | 
| 194 |  |  |  |  |  |  | } | 
| 195 |  |  |  |  |  |  | } | 
| 196 | 0 |  |  |  |  |  | my $weight_name = shift; | 
| 197 |  |  |  |  |  |  |  | 
| 198 |  |  |  |  |  |  | # warn "--> " .  $weight_name . "\n"; | 
| 199 |  |  |  |  |  |  | # warn "\tOK\n"; | 
| 200 |  |  |  |  |  |  | # warn scalar(@_) . "\n"; | 
| 201 | 0 | 0 |  |  |  |  | if (@_) { | 
| 202 |  |  |  |  |  |  | # warn "\t  In\n"; | 
| 203 | 0 |  |  |  |  |  | $self->{'weights'}->{$weight_name} = shift; | 
| 204 |  |  |  |  |  |  | } | 
| 205 |  |  |  |  |  |  | # warn "\t\t" . $self->{'weights'}->{$weight_name} . "\n"; | 
| 206 | 0 |  |  |  |  |  | return($self->{'weights'}->{$weight_name}); | 
| 207 |  |  |  |  |  |  | } | 
| 208 |  |  |  |  |  |  |  | 
| 209 |  |  |  |  |  |  | sub newNamedEntity { | 
| 210 | 0 |  |  | 0 | 0 |  | my ($class, $fields) = @_; | 
| 211 |  |  |  |  |  |  |  | 
| 212 | 0 | 0 |  |  |  |  | if (!defined $fields->{'named_entity_type'}) { | 
| 213 | 0 |  |  |  |  |  | die("named_entity_type is not defined"); | 
| 214 |  |  |  |  |  |  | } | 
| 215 |  |  |  |  |  |  |  | 
| 216 | 0 |  |  |  |  |  | $fields->{'type'} = "named_entity"; | 
| 217 |  |  |  |  |  |  |  | 
| 218 | 0 |  |  |  |  |  | return($class->new($fields)); | 
| 219 |  |  |  |  |  |  |  | 
| 220 |  |  |  |  |  |  | } | 
| 221 |  |  |  |  |  |  |  | 
| 222 |  |  |  |  |  |  | sub newTerm { | 
| 223 | 0 |  |  | 0 | 0 |  | my ($class, $fields) = @_; | 
| 224 |  |  |  |  |  |  |  | 
| 225 | 0 |  |  |  |  |  | $fields->{'type'} = "term"; | 
| 226 |  |  |  |  |  |  |  | 
| 227 | 0 |  |  |  |  |  | return($class->new($fields)); | 
| 228 |  |  |  |  |  |  | } | 
| 229 |  |  |  |  |  |  |  | 
| 230 |  |  |  |  |  |  | sub newUndefinedSemanticUnit { | 
| 231 | 0 |  |  | 0 | 0 |  | my ($class, $fields) = @_; | 
| 232 |  |  |  |  |  |  |  | 
| 233 | 0 |  |  |  |  |  | $fields->{'type'} = "undefined"; | 
| 234 |  |  |  |  |  |  |  | 
| 235 | 0 |  |  |  |  |  | return($class->new($fields)); | 
| 236 |  |  |  |  |  |  | } | 
| 237 |  |  |  |  |  |  |  | 
| 238 |  |  |  |  |  |  | # type canonical_form reference NEtype isNamedEntoty | 
| 239 |  |  |  |  |  |  |  | 
| 240 |  |  |  |  |  |  | sub canonical_form { | 
| 241 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 242 |  |  |  |  |  |  |  | 
| 243 | 0 | 0 |  |  |  |  | $self->{'canonical_form'} = shift if @_; | 
| 244 | 0 |  |  |  |  |  | return $self->{'canonical_form'}; | 
| 245 |  |  |  |  |  |  | } | 
| 246 |  |  |  |  |  |  |  | 
| 247 |  |  |  |  |  |  | sub exists_canonical_form { | 
| 248 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 249 |  |  |  |  |  |  |  | 
| 250 | 0 |  |  |  |  |  | return exists($self->{'canonical_form'}); | 
| 251 |  |  |  |  |  |  | } | 
| 252 |  |  |  |  |  |  |  | 
| 253 |  |  |  |  |  |  | sub type { | 
| 254 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 255 |  |  |  |  |  |  |  | 
| 256 | 0 | 0 |  |  |  |  | $self->{'type'} = shift if @_; | 
| 257 | 0 |  |  |  |  |  | return $self->{'type'}; | 
| 258 |  |  |  |  |  |  | } | 
| 259 |  |  |  |  |  |  |  | 
| 260 |  |  |  |  |  |  | sub reference_name { | 
| 261 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 262 |  |  |  |  |  |  |  | 
| 263 | 0 | 0 |  |  |  |  | $self->{'reference'} = shift if @_; | 
| 264 | 0 |  |  |  |  |  | return $self->{'reference'}; | 
| 265 |  |  |  |  |  |  |  | 
| 266 |  |  |  |  |  |  | } | 
| 267 |  |  |  |  |  |  |  | 
| 268 |  |  |  |  |  |  | sub reference { | 
| 269 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 270 | 0 |  |  |  |  |  | my $ref; | 
| 271 |  |  |  |  |  |  | my $elt; | 
| 272 |  |  |  |  |  |  |  | 
| 273 | 0 | 0 | 0 |  |  |  | if ((@_) && (scalar @_ == 2)) { | 
| 274 | 0 |  |  |  |  |  | $self->{'reference'} = shift; | 
| 275 | 0 |  |  |  |  |  | $ref = shift; | 
| 276 |  |  |  |  |  |  |  | 
| 277 |  |  |  |  |  |  | #	warn "term ref: " . ref($ref) . "\n"; | 
| 278 | 0 | 0 |  |  |  |  | if (ref($ref)  eq "ARRAY") { | 
| 279 | 0 |  |  |  |  |  | $self->{$self->{'reference'}} = []; | 
| 280 | 0 |  |  |  |  |  | foreach $elt (@$ref) { | 
| 281 | 0 |  |  |  |  |  | push @{$self->{$self->{'reference'}}}, $elt; | 
|  | 0 |  |  |  |  |  |  | 
| 282 |  |  |  |  |  |  | } | 
| 283 |  |  |  |  |  |  | } else { # it's a single string | 
| 284 |  |  |  |  |  |  | # 	    warn $self->{'reference'}  . ": $ref\n"; | 
| 285 | 0 |  |  |  |  |  | $self->{$self->{'reference'}} = $ref; | 
| 286 |  |  |  |  |  |  |  | 
| 287 |  |  |  |  |  |  | } | 
| 288 |  |  |  |  |  |  | } | 
| 289 | 0 |  |  |  |  |  | return($self->{$self->{'reference'}}); | 
| 290 |  |  |  |  |  |  | } | 
| 291 |  |  |  |  |  |  |  | 
| 292 |  |  |  |  |  |  |  | 
| 293 |  |  |  |  |  |  | sub getReferenceSize { | 
| 294 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 295 |  |  |  |  |  |  |  | 
| 296 | 0 | 0 |  |  |  |  | if ($self->reference_name eq "list_refid_token") { | 
| 297 | 0 |  |  |  |  |  | return(scalar(@{$self->{$self->{'reference'}}})); | 
|  | 0 |  |  |  |  |  |  | 
| 298 |  |  |  |  |  |  | } | 
| 299 | 0 | 0 |  |  |  |  | if ($self->reference_name eq "refid_word") { | 
| 300 | 0 |  |  |  |  |  | return(1); | 
| 301 |  |  |  |  |  |  | } | 
| 302 | 0 | 0 |  |  |  |  | if ($self->reference_name eq "refid_phrase") { | 
| 303 | 0 |  |  |  |  |  | return(1); | 
| 304 |  |  |  |  |  |  | } | 
| 305 |  |  |  |  |  |  | } | 
| 306 |  |  |  |  |  |  |  | 
| 307 |  |  |  |  |  |  | sub getReferenceWordSize { | 
| 308 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 309 |  |  |  |  |  |  |  | 
| 310 | 0 |  |  |  |  |  | my $elmt; | 
| 311 | 0 | 0 |  |  |  |  | if ($self->reference_name eq "list_refid_token") { | 
| 312 | 0 |  |  |  |  |  | return(scalar(@{$self->{$self->{'reference'}}})); | 
|  | 0 |  |  |  |  |  |  | 
| 313 |  |  |  |  |  |  | } | 
| 314 | 0 | 0 |  |  |  |  | if ($self->reference_name eq "refid_word") { | 
| 315 | 0 |  |  |  |  |  | return(1); | 
| 316 |  |  |  |  |  |  | } | 
| 317 | 0 | 0 |  |  |  |  | if ($self->reference_name eq "refid_phrase") { | 
| 318 | 0 |  |  |  |  |  | my $wordCount = 0; | 
| 319 | 0 |  |  |  |  |  | foreach $elmt ($self->reference->getElementList) { | 
| 320 | 0 | 0 |  |  |  |  | if (ref($elmt) eq "Lingua::Ogmios::Annotations::Word") { | 
| 321 | 0 |  |  |  |  |  | $wordCount++; | 
| 322 |  |  |  |  |  |  | } | 
| 323 |  |  |  |  |  |  | } | 
| 324 |  |  |  |  |  |  |  | 
| 325 | 0 |  |  |  |  |  | return($wordCount); | 
| 326 |  |  |  |  |  |  | } | 
| 327 |  |  |  |  |  |  | } | 
| 328 |  |  |  |  |  |  |  | 
| 329 |  |  |  |  |  |  | sub getReferenceTokenSize { | 
| 330 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 331 |  |  |  |  |  |  |  | 
| 332 | 0 | 0 |  |  |  |  | if ($self->reference_name eq "list_refid_token") { | 
| 333 | 0 |  |  |  |  |  | return(scalar(@{$self->{$self->{'reference'}}})); | 
|  | 0 |  |  |  |  |  |  | 
| 334 |  |  |  |  |  |  | } | 
| 335 | 0 | 0 |  |  |  |  | if ($self->reference_name eq "refid_word") { | 
| 336 | 0 |  |  |  |  |  | return($self->reference->getReferenceSize); | 
| 337 |  |  |  |  |  |  | } | 
| 338 | 0 | 0 |  |  |  |  | if ($self->reference_name eq "refid_phrase") { | 
| 339 | 0 |  |  |  |  |  | return($self->reference->getReferenceSize); | 
| 340 |  |  |  |  |  |  | } | 
| 341 |  |  |  |  |  |  | } | 
| 342 |  |  |  |  |  |  |  | 
| 343 |  |  |  |  |  |  | sub getReference { | 
| 344 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 345 |  |  |  |  |  |  |  | 
| 346 | 0 |  |  |  |  |  | return($self->{$self->{'reference'}}); | 
| 347 |  |  |  |  |  |  | } | 
| 348 |  |  |  |  |  |  |  | 
| 349 |  |  |  |  |  |  | # sub equals_ref { | 
| 350 |  |  |  |  |  |  | #     my ($self, $element) = @_; | 
| 351 |  |  |  |  |  |  |  | 
| 352 |  |  |  |  |  |  | #     my $i; | 
| 353 |  |  |  |  |  |  | #     while(($i getReference)) && ($i getReference)) && | 
| 354 |  |  |  |  |  |  | # 	  ($self->getReference->[$i].equals($element->getReference->[$i]))) { | 
| 355 |  |  |  |  |  |  | # 	$i++ | 
| 356 |  |  |  |  |  |  | #     }; | 
| 357 |  |  |  |  |  |  | #     if ($i < scalar($self->getReference)) { | 
| 358 |  |  |  |  |  |  | # 	return(0); | 
| 359 |  |  |  |  |  |  | #     } else { | 
| 360 |  |  |  |  |  |  | # 	return(1); | 
| 361 |  |  |  |  |  |  | #     } | 
| 362 |  |  |  |  |  |  |  | 
| 363 |  |  |  |  |  |  | # } | 
| 364 |  |  |  |  |  |  |  | 
| 365 |  |  |  |  |  |  | sub getElementFormList { | 
| 366 | 0 |  |  | 0 | 0 |  | my ($self) = @_; | 
| 367 |  |  |  |  |  |  |  | 
| 368 | 0 |  |  |  |  |  | my $element; | 
| 369 |  |  |  |  |  |  | my @elements; | 
| 370 |  |  |  |  |  |  |  | 
| 371 |  |  |  |  |  |  |  | 
| 372 |  |  |  |  |  |  | #     warn ref($self->getReference) . "\n"; | 
| 373 |  |  |  |  |  |  |  | 
| 374 | 0 | 0 |  |  |  |  | if (ref($self->getReference) eq "ARRAY") { | 
| 375 | 0 |  |  |  |  |  | foreach $element (@{$self->getReference}) { | 
|  | 0 |  |  |  |  |  |  | 
| 376 |  |  |  |  |  |  | # 	    warn "\t". $element->getElementFormList . "\n"; | 
| 377 | 0 |  |  |  |  |  | push @elements, $element->getElementFormList; | 
| 378 |  |  |  |  |  |  | } | 
| 379 |  |  |  |  |  |  | } else { | 
| 380 | 0 |  |  |  |  |  | push @elements, $self->getReference->getElementFormList; | 
| 381 |  |  |  |  |  |  | } | 
| 382 | 0 |  |  |  |  |  | return(@elements); | 
| 383 |  |  |  |  |  |  | } | 
| 384 |  |  |  |  |  |  |  | 
| 385 |  |  |  |  |  |  |  | 
| 386 |  |  |  |  |  |  | sub NEtype { | 
| 387 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 388 |  |  |  |  |  |  |  | 
| 389 | 0 | 0 |  |  |  |  | $self->{'named_entity_type'} = shift if @_; | 
| 390 | 0 |  |  |  |  |  | return $self->{'named_entity_type'}; | 
| 391 |  |  |  |  |  |  | } | 
| 392 |  |  |  |  |  |  |  | 
| 393 |  |  |  |  |  |  | sub isNamedEntity { | 
| 394 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 395 |  |  |  |  |  |  |  | 
| 396 | 0 | 0 | 0 |  |  |  | if ((exists $self->{'type'}) && ($self->{'type'} eq "named_entity")) { | 
| 397 | 0 |  |  |  |  |  | return 1; | 
| 398 |  |  |  |  |  |  | } else { | 
| 399 | 0 |  |  |  |  |  | return 0; | 
| 400 |  |  |  |  |  |  | } | 
| 401 |  |  |  |  |  |  | } | 
| 402 |  |  |  |  |  |  |  | 
| 403 |  |  |  |  |  |  | sub isTerm { | 
| 404 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 405 |  |  |  |  |  |  |  | 
| 406 | 0 | 0 | 0 |  |  |  | if ((exists $self->{'type'}) && ($self->{'type'} eq "term")) { | 
| 407 | 0 |  |  |  |  |  | return(1); | 
| 408 |  |  |  |  |  |  | } else { | 
| 409 | 0 |  |  |  |  |  | return(0); | 
| 410 |  |  |  |  |  |  | } | 
| 411 |  |  |  |  |  |  | } | 
| 412 |  |  |  |  |  |  |  | 
| 413 |  |  |  |  |  |  | sub end_token { | 
| 414 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 415 |  |  |  |  |  |  |  | 
| 416 |  |  |  |  |  |  | #    warn $self->reference_name . "\n"; | 
| 417 |  |  |  |  |  |  |  | 
| 418 | 0 | 0 |  |  |  |  | if ($self->reference_name eq "list_refid_token") { | 
| 419 | 0 |  |  |  |  |  | return($self->reference->[$#{$self->reference}]); | 
|  | 0 |  |  |  |  |  |  | 
| 420 |  |  |  |  |  |  | } | 
| 421 |  |  |  |  |  |  | #if list_refid token -> OK | 
| 422 |  |  |  |  |  |  | # if refid word -> word->start_token | 
| 423 |  |  |  |  |  |  | # OR | 
| 424 |  |  |  |  |  |  | # if refid phrase -> phrase->start_token | 
| 425 | 0 | 0 | 0 |  |  |  | if (($self->reference_name eq "refid_word") || | 
| 426 |  |  |  |  |  |  | ($self->reference_name eq "refid_phrase")){ | 
| 427 | 0 |  |  |  |  |  | return($self->reference->end_token); | 
| 428 |  |  |  |  |  |  | } | 
| 429 | 0 |  |  |  |  |  | return(undef); | 
| 430 |  |  |  |  |  |  | } | 
| 431 |  |  |  |  |  |  |  | 
| 432 |  |  |  |  |  |  | sub start_token { | 
| 433 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 434 |  |  |  |  |  |  |  | 
| 435 |  |  |  |  |  |  |  | 
| 436 | 0 | 0 |  |  |  |  | if ($self->reference_name eq "list_refid_token") { | 
| 437 | 0 |  |  |  |  |  | return($self->reference->[0]); | 
| 438 |  |  |  |  |  |  | } | 
| 439 |  |  |  |  |  |  | #if list_refid token -> OK | 
| 440 |  |  |  |  |  |  |  | 
| 441 |  |  |  |  |  |  | # if refid word -> word->start_token | 
| 442 |  |  |  |  |  |  | # OR | 
| 443 |  |  |  |  |  |  | # if refid phrase -> phrase->start_token | 
| 444 | 0 | 0 | 0 |  |  |  | if (($self->reference_name eq "refid_word") || | 
| 445 |  |  |  |  |  |  | ($self->reference_name eq "refid_phrase")){ | 
| 446 | 0 |  |  |  |  |  | return($self->reference->start_token); | 
| 447 |  |  |  |  |  |  | } | 
| 448 | 0 |  |  |  |  |  | return(undef); | 
| 449 |  |  |  |  |  |  | } | 
| 450 |  |  |  |  |  |  |  | 
| 451 |  |  |  |  |  |  | # Not check | 
| 452 |  |  |  |  |  |  | sub preceeds { | 
| 453 | 0 |  |  | 0 | 0 |  | my ($self, $elt, $wordLimit, $document) = @_; | 
| 454 |  |  |  |  |  |  |  | 
| 455 | 0 |  |  |  |  |  | my $token = $self->end_token; | 
| 456 |  |  |  |  |  |  |  | 
| 457 | 0 |  |  |  |  |  | my $wordcount = 0; | 
| 458 |  |  |  |  |  |  |  | 
| 459 | 0 |  | 0 |  |  |  | do { | 
| 460 | 0 |  |  |  |  |  | do { | 
| 461 | 0 |  |  |  |  |  | $token = $token->next; | 
| 462 |  |  |  |  |  |  | } while($token->isSep); | 
| 463 |  |  |  |  |  |  |  | 
| 464 | 0 | 0 | 0 |  |  |  | if ((!$token->equals($elt->end_token)) && ($wordcount < $wordLimit)){ | 
| 465 | 0 | 0 |  |  |  |  | if (scalar(@{$document->getAnnotations->getWordLevel->getElementByToken($token)}) > 0) { | 
|  | 0 |  |  |  |  |  |  | 
| 466 | 0 |  |  |  |  |  | $token = $document->getAnnotations->getWordLevel->getElementByToken($token)->end_token; | 
| 467 | 0 |  |  |  |  |  | $wordcount++; | 
| 468 |  |  |  |  |  |  | } | 
| 469 |  |  |  |  |  |  | } | 
| 470 |  |  |  |  |  |  | } while((!$token->equals($elt->end_token)) && ($wordcount < $wordLimit)); | 
| 471 |  |  |  |  |  |  |  | 
| 472 | 0 | 0 |  |  |  |  | if ($token->equals($elt->end_token)) { | 
| 473 | 0 |  |  |  |  |  | return(1); | 
| 474 |  |  |  |  |  |  | } else { | 
| 475 | 0 |  |  |  |  |  | return(0); | 
| 476 |  |  |  |  |  |  | } | 
| 477 |  |  |  |  |  |  | } | 
| 478 |  |  |  |  |  |  |  | 
| 479 |  |  |  |  |  |  |  | 
| 480 |  |  |  |  |  |  | sub getPreceedingTerm { | 
| 481 | 0 |  |  | 0 | 0 |  | my ($self, $wordLimit, $document) = @_; | 
| 482 |  |  |  |  |  |  |  | 
| 483 | 0 |  |  |  |  |  | my $token = $self->start_token; | 
| 484 |  |  |  |  |  |  |  | 
| 485 | 0 |  |  |  |  |  | my $wordcount = 0; | 
| 486 |  |  |  |  |  |  |  | 
| 487 | 0 |  |  |  |  |  | my $lastword; | 
| 488 |  |  |  |  |  |  |  | 
| 489 |  |  |  |  |  |  | #     warn "go in ($token)\n"; | 
| 490 | 0 |  |  |  |  |  | do { | 
| 491 | 0 |  |  |  |  |  | do { | 
| 492 | 0 |  |  |  |  |  | $token = $token->previous; | 
| 493 |  |  |  |  |  |  | } while($token->isSep); | 
| 494 |  |  |  |  |  |  |  | 
| 495 |  |  |  |  |  |  | # 	warn "not sep $token\n"; | 
| 496 | 0 | 0 |  |  |  |  | if (scalar(@{$document->getAnnotations->getSemanticUnitLevel->getElementByToken($token)}) > 0) { | 
|  | 0 |  |  |  |  |  |  | 
| 497 |  |  |  |  |  |  | #      warn "go out\n"; | 
| 498 | 0 |  |  |  |  |  | return($document->getAnnotations->getSemanticUnitLevel->getElementByToken($token)->[0]); | 
| 499 |  |  |  |  |  |  | } else { | 
| 500 |  |  |  |  |  |  | #  	    warn "Go\n"; | 
| 501 | 0 |  | 0 |  |  |  | while((defined $token->previous) && (scalar(@{$document->getAnnotations->getWordLevel->getElementByToken($token)})== 0)) { | 
|  | 0 |  |  |  |  |  |  | 
| 502 |  |  |  |  |  |  | # 	    warn "go\n"; | 
| 503 | 0 |  |  |  |  |  | $token = $token->previous; | 
| 504 |  |  |  |  |  |  | } | 
| 505 |  |  |  |  |  |  | # 	    warn "GO\n"; | 
| 506 |  |  |  |  |  |  |  | 
| 507 | 0 | 0 | 0 |  |  |  | if ((defined $token->previous) && (scalar(@{$document->getAnnotations->getWordLevel->getElementByToken($token)}) > 0)) { | 
|  | 0 |  |  |  |  |  |  | 
| 508 |  |  |  |  |  |  | # 	    warn "GO2\n"; | 
| 509 | 0 |  |  |  |  |  | $token = $document->getAnnotations->getWordLevel->getElementByToken($token)->[0]->start_token; | 
| 510 | 0 |  |  |  |  |  | $lastword = $document->getAnnotations->getWordLevel->getElementByToken($token)->[0]; | 
| 511 | 0 | 0 |  |  |  |  | if (scalar(@{$document->getAnnotations->getSemanticUnitLevel->getElementByToken($token)}) > 0) { | 
|  | 0 |  |  |  |  |  |  | 
| 512 |  |  |  |  |  |  | #      warn "go out\n"; | 
| 513 | 0 |  |  |  |  |  | return($document->getAnnotations->getSemanticUnitLevel->getElementByToken($token)->[0]); | 
| 514 |  |  |  |  |  |  | }#     warn "go out\n"; | 
| 515 |  |  |  |  |  |  | # 	    warn "GO3\n"; | 
| 516 | 0 |  |  |  |  |  | $wordcount++; | 
| 517 |  |  |  |  |  |  | } else { | 
| 518 |  |  |  |  |  |  | #     warn "go out\n"; | 
| 519 | 0 |  |  |  |  |  | return($lastword); | 
| 520 |  |  |  |  |  |  | } | 
| 521 |  |  |  |  |  |  | # 	    warn "GO4\n"; | 
| 522 |  |  |  |  |  |  |  | 
| 523 |  |  |  |  |  |  | } | 
| 524 |  |  |  |  |  |  | } while($wordcount < $wordLimit); | 
| 525 | 0 | 0 |  |  |  |  | if (scalar(@{$document->getAnnotations->getSemanticUnitLevel->getElementByToken($token)}) > 0) { | 
|  | 0 |  |  |  |  |  |  | 
| 526 |  |  |  |  |  |  | #      warn "go out\n"; | 
| 527 | 0 |  |  |  |  |  | return($document->getAnnotations->getSemanticUnitLevel->getElementByToken($token)->[0]); | 
| 528 |  |  |  |  |  |  | }#     warn "go out\n"; | 
| 529 | 0 |  |  |  |  |  | return($lastword); | 
| 530 |  |  |  |  |  |  |  | 
| 531 |  |  |  |  |  |  | } | 
| 532 |  |  |  |  |  |  |  | 
| 533 |  |  |  |  |  |  |  | 
| 534 |  |  |  |  |  |  | sub XMLout { | 
| 535 | 0 |  |  | 0 | 0 |  | my ($self, $order) = @_; | 
| 536 |  |  |  |  |  |  |  | 
| 537 | 0 |  |  |  |  |  | my $str; | 
| 538 |  |  |  |  |  |  |  | 
| 539 | 0 |  |  |  |  |  | $str = "\t\t\n"; | 
| 540 | 0 |  |  |  |  |  | $str .= $self->SUPER::XMLout($self->type, $order); | 
| 541 | 0 |  |  |  |  |  | $str .= "\t\n"; | 
| 542 |  |  |  |  |  |  |  | 
| 543 | 0 |  |  |  |  |  | return($str); | 
| 544 |  |  |  |  |  |  | } | 
| 545 |  |  |  |  |  |  |  | 
| 546 |  |  |  |  |  |  |  | 
| 547 |  |  |  |  |  |  | sub getSemanticFeatureFC { | 
| 548 | 0 |  |  | 0 | 0 |  | my ($self, $document) = @_; | 
| 549 |  |  |  |  |  |  |  | 
| 550 | 0 | 0 |  |  |  |  | if (defined ($document->getAnnotations->getSemanticFeaturesLevel->getElementFromIndex("refid_semantic_unit", $self->getId)->[0]))  { | 
| 551 | 0 |  |  |  |  |  | return($document->getAnnotations->getSemanticFeaturesLevel->getElementFromIndex("refid_semantic_unit", $self->getId)->[0]->first_node_first_semantic_category); | 
| 552 |  |  |  |  |  |  | } else { | 
| 553 | 0 |  |  |  |  |  | return(undef); | 
| 554 |  |  |  |  |  |  | } | 
| 555 |  |  |  |  |  |  | } | 
| 556 |  |  |  |  |  |  |  | 
| 557 |  |  |  |  |  |  | sub getSemanticFeature1 { | 
| 558 | 0 |  |  | 0 | 0 |  | my ($self, $document) = @_; | 
| 559 |  |  |  |  |  |  |  | 
| 560 | 0 | 0 |  |  |  |  | if (defined ($document->getAnnotations->getSemanticFeaturesLevel->getElementFromIndex("refid_semantic_unit", $self->getId)->[0]))  { | 
| 561 | 0 |  |  |  |  |  | return($document->getAnnotations->getSemanticFeaturesLevel->getElementFromIndex("refid_semantic_unit", $self->getId)->[0]); | 
| 562 |  |  |  |  |  |  | } else { | 
| 563 | 0 |  |  |  |  |  | return(undef); | 
| 564 |  |  |  |  |  |  | } | 
| 565 |  |  |  |  |  |  | } | 
| 566 |  |  |  |  |  |  |  | 
| 567 |  |  |  |  |  |  | sub SemanticFeatureFCEquals { | 
| 568 | 0 |  |  | 0 | 0 |  | my ($self, $document, $semTypeValue) = @_; | 
| 569 |  |  |  |  |  |  |  | 
| 570 |  |  |  |  |  |  | # warn $self->getForm . "\n"; | 
| 571 |  |  |  |  |  |  | # warn "semTypeValue: $semTypeValue\n"; | 
| 572 |  |  |  |  |  |  | # warn "semTypeValue: " . $self->getSemanticFeatureFC($document) ."\n"; | 
| 573 |  |  |  |  |  |  |  | 
| 574 | 0 | 0 | 0 |  |  |  | if ((defined ($self->getSemanticFeatureFC($document))) && | 
| 575 |  |  |  |  |  |  | ($self->getSemanticFeatureFC($document) eq $semTypeValue)) { | 
| 576 |  |  |  |  |  |  | # 	($document->getAnnotations->getSemanticFeaturesLevel->getElementFromIndex("refid_semantic_unit", $self->getId)->[0]->first_node_first_semantic_category eq $semTypeValue)) { | 
| 577 | 0 |  |  |  |  |  | return(1); | 
| 578 |  |  |  |  |  |  | } else { | 
| 579 | 0 |  |  |  |  |  | return(0); | 
| 580 |  |  |  |  |  |  | } | 
| 581 |  |  |  |  |  |  | } | 
| 582 |  |  |  |  |  |  |  | 
| 583 |  |  |  |  |  |  |  | 
| 584 |  |  |  |  |  |  | sub equalsAtTokenLevel { | 
| 585 | 0 |  |  | 0 | 0 |  | my ($self, $element) = @_; | 
| 586 |  |  |  |  |  |  |  | 
| 587 | 0 | 0 | 0 |  |  |  | if (($self->start_token->equals($element->start_token)) && | 
| 588 |  |  |  |  |  |  | ($self->end_token->equals($element->end_token))) | 
| 589 |  |  |  |  |  |  | { | 
| 590 | 0 |  |  |  |  |  | return(1); | 
| 591 |  |  |  |  |  |  | } else { | 
| 592 | 0 |  |  |  |  |  | return(0); | 
| 593 |  |  |  |  |  |  | } | 
| 594 |  |  |  |  |  |  | } | 
| 595 |  |  |  |  |  |  |  | 
| 596 |  |  |  |  |  |  | sub getLemmaString { | 
| 597 | 0 |  |  | 0 | 0 |  | my ($self, $document) = @_; | 
| 598 |  |  |  |  |  |  |  | 
| 599 | 0 |  |  |  |  |  | my $elmt; | 
| 600 |  |  |  |  |  |  | my $lemma; | 
| 601 |  |  |  |  |  |  |  | 
| 602 | 0 | 0 |  |  |  |  | if ($self->reference_name eq "list_refid_token") { | 
| 603 |  |  |  |  |  |  | # warn "TOKEN: " . $self->{'reference'} . "\n"; | 
| 604 |  |  |  |  |  |  | # warn join(':', @{$self->{$self->{'reference'}}}) . "\n"; | 
| 605 |  |  |  |  |  |  | #     	return(scalar(@{$self->{$self->{'reference'}}})); | 
| 606 | 0 |  |  |  |  |  | foreach $elmt (@{$self->{$self->{'reference'}}}) { | 
|  | 0 |  |  |  |  |  |  | 
| 607 | 0 |  |  |  |  |  | $lemma .= $elmt->getContent; | 
| 608 |  |  |  |  |  |  | } | 
| 609 |  |  |  |  |  |  | } | 
| 610 | 0 | 0 |  |  |  |  | if ($self->reference_name eq "refid_word") { | 
| 611 |  |  |  |  |  |  | #	warn $self->reference . "\n"; | 
| 612 | 0 |  |  |  |  |  | $lemma .= $self->reference->getLemma($document)->canonical_form; | 
| 613 |  |  |  |  |  |  | } | 
| 614 | 0 | 0 |  |  |  |  | if ($self->reference_name eq "refid_phrase") { | 
| 615 |  |  |  |  |  |  | # warn "PHRASE: " . $self->reference . "\n"; | 
| 616 | 0 |  |  |  |  |  | $lemma .= $self->reference->getLemmaString($document); | 
| 617 |  |  |  |  |  |  | } | 
| 618 | 0 |  |  |  |  |  | return($lemma); | 
| 619 |  |  |  |  |  |  | } | 
| 620 |  |  |  |  |  |  |  | 
| 621 |  |  |  |  |  |  |  | 
| 622 |  |  |  |  |  |  | 1; | 
| 623 |  |  |  |  |  |  |  | 
| 624 |  |  |  |  |  |  | __END__ |