|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # TestifiedTermParser.yp  | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #   | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  used to generate Lingua::YaTeA::TestifiedTermParser.pm  | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  Use: yapp -m 'Lingua::YaTeA::TestifiedTermParser' -o lib/Lingua/YaTeA/TestifiedTermParser.pm lib/Lingua/YaTeA/TestifiedTermParser.yp  | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Parse::Yapp input grammar for parsing the yatea testified terms  | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 %{  | 
| 
13
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
1456
 | 
     use Lingua::YaTeA;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
    | 
| 
14
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
165
 | 
     use Data::Dumper;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
219
 | 
    | 
| 
15
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
36
 | 
     use warnings;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
112
 | 
    | 
| 
16
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
24
 | 
     use UNIVERSAL;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
    | 
| 
17
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
211
 | 
     use Scalar::Util qw(blessed);  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10241
 | 
    | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my @words;  | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $word;  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $item;  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my @infos;  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my @IF;  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my @POS;  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my @LF;  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $src;  | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my @lex_items;  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $testified;  | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $i;  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $tree;  | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $node_set;  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $node;  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $edge;  | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $index = 0;  | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my @uncomplete;  | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $level = 0;  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $num_line =1;  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 %}  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 %%  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 input:  #empty  | 
| 
40
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
0
  
 | 
 
 | 
         |   input line  {  #print STDERR "\n INPUT  \n";  | 
| 
 
 | 
 
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
    | 
| 
41
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                       }  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ;  | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 line:     '\n' { # print "VIDE: " . $_[1]   | 
| 
45
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
           $num_line++;   | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           | testified {   | 
| 
48
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 	      $num_line++;   | 
| 
49
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      @lex_items = ();  | 
| 
50
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      @words= ();  | 
| 
51
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      my $testified;  | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      # print STDERR "=>$_[1]\n";  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      my $testified_infos;  | 
| 
54
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      if($_[0]->YYData->{TTS}->getTestifiedInfos(\$testified_infos,\@IF,\@POS,\@LF,$src,\@lex_items,$_[0]->YYData->{MATCH},$_[0]->YYData->{FILTERING_LEXICON},$_[0]->YYData->{TAGSET}) == 1) {  | 
| 
55
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  if(scalar @lex_items > 1) {    | 
| 
56
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		      $testified = Lingua::YaTeA::MultiWordTestifiedTerm->new($testified_infos->{"NUM_CONTENT_WORDS"},\@lex_items,$_[0]->YYData->{TAGSET},$src,$_[0]->YYData->{MATCH});  | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  }  | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      }                 | 
| 
59
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 	      if ((blessed($testified)) && ($testified->isa('Lingua::YaTeA::TestifiedTerm'))) {  | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  #print STDERR "ajout tt: " . $testified->getIF . "\n";  | 
| 
61
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  $_[0]->YYData->{TTS}->addTestified($testified);  | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    | 
| 
63
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 		  if ((blessed($testified)) && ($testified->isa('Lingua::YaTeA::MultiWordTestifiedTerm'))) {                 | 
| 
64
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		      $tree =  Lingua::YaTeA::Tree->new;  | 
| 
65
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		      $tree->setNodeSet($node_set);  | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		      # $tree->print($testified_infos->{"WORDS"});  | 
| 
67
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		      $tree->setIndexSet($testified->getIndexSet);  | 
| 
68
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		      $tree->setHead;  | 
| 
69
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		      $testified->addTree($tree);  | 
| 
70
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		      $testified->setParsingMethod("USER");  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  }  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      }  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      # print "fin creation :" . $testified->getIF . "\n";                 | 
| 
74
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      $level = 0;  | 
| 
75
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      $index = 0;  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }                             | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ;  | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 testified: {  | 
| 
81
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
                 $node_set = Lingua::YaTeA::NodeSet->new;  | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            }             | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
            OPEN parsing END infos { #print "trouve testified2 $_[1]\n";  | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            }  | 
| 
85
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
            | error '\nTESTIFIED: ' { $_[0]->YYErrok }  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ;  | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 infos: INFOS {  | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            # print "infos $_[1]\n";  | 
| 
90
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
            @infos = split /\t/, $_[1];  | 
| 
91
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            @IF = split / /, $infos[0];  | 
| 
92
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            @POS = split / /, $infos[1];  | 
| 
93
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            @LF = split / /, $infos[2];  | 
| 
94
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            $src = $infos[3];  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        }  | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ;  | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 parsing: CANDIDATE PREP DET CANDIDATE  | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          | CANDIDATE PREP CANDIDATE  | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          | CANDIDATE PREP PREP CANDIDATE  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          | CANDIDATE DET CANDIDATE  | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
          | CANDIDATE CANDIDATE {   | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            # print STDERR "PARSING $_[1]\n"  | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            }  | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          |   | 
| 
108
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
          | error '\nPARSING: ' { $_[0]->YYErrok }  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ;  | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 PREP: WORD P_STATUS {   | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        # print STDERR "PREP $_[1] $_[2]\n";  | 
| 
114
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
          $node->{"PREP"} = Lingua::YaTeA::TermLeaf->new($index);   | 
| 
115
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          $index++;  | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       };  | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 DET: WORD D_STATUS {   | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        # print STDERR "DET $_[1] $_[2]\n";  | 
| 
120
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
        $node->{"DET"} = Lingua::YaTeA::TermLeaf->new($index);  | 
| 
121
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        $index++;  | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 CANDIDATE: WORD C_STATUS{   | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            # print STDERR "CANDIDATE1 $_[1] $_[2]\n";  | 
| 
126
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
            $edge = Lingua::YaTeA::TermLeaf->new($index);  | 
| 
127
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            $node->addEdge($edge,$_[2]);  | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            # print "ajout du edge :" ;  | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            # print Dumper($edge) . "\n";  | 
| 
130
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            $index++;  | 
| 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            }  | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
            | OPEN parsing CLOSE {   | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              #print STDERR "CANDIDATE2 $_[1]\n";  | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              }  | 
| 
135
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
            | error '\nCANDIDATE: ' { $_[0]->YYErrok }  | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ;  | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 OPEN: OPEN_TAG {   | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                # print STDERR "OPEN $_[1]\n";  | 
| 
140
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
                if ($level == 0)  | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 {  | 
| 
142
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $node = Lingua::YaTeA::RootNode->new($level);  | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 else  | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 {  | 
| 
146
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $node = Lingua::YaTeA::InternalNode->new($level);  | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
148
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $node_set->addNode($node);  | 
| 
149
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                      push @uncomplete, $node;  | 
| 
150
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $level++;  | 
| 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
152
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
              |        error '\nOPEN: ' { $_[0]->YYErrok }  | 
| 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ;  | 
| 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
156
 | 
 
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 END: END_TAG {   | 
| 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              # print STDERR "END $_[1]\n";   | 
| 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              }  | 
| 
159
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
              |        error '\nEND: ' { $_[0]->YYErrok }  | 
| 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ;  | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 CLOSE: END_TAG C_STATUS {   | 
| 
163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        # print STDERR "CLOSE_TAG $_[1] $_[2]\n";  | 
| 
164
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
                 pop @uncomplete;  | 
| 
165
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          $node->linkToFather(\@uncomplete,$_[2]);  | 
| 
166
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          $node = $uncomplete[$#uncomplete];  | 
| 
167
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          $level--;          | 
| 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
169
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
         |        error '\nCLOSE: ' { $_[0]->YYErrok }  | 
| 
170
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ;  | 
| 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
173
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 %%  | 
| 
174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _Error {  | 
| 
176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         exists $_[0]->YYData->{ERRMSG}  | 
| 
177
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
     and do {  | 
| 
178
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         print $_[0]->YYData->{ERRMSG};  | 
| 
179
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         delete $_[0]->YYData->{ERRMSG};  | 
| 
180
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         return;  | 
| 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     };  | 
| 
182
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print  "EXPECT: ";  | 
| 
183
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print $_[0]->YYExpect . "\n";  | 
| 
184
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print  "CURTOK: ";  | 
| 
185
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print "-" . $_[0]->YYCurtok . "-\n";  | 
| 
186
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print  "CURVAL: ";  | 
| 
187
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print $_[0]->YYCurval . "\n";  | 
| 
188
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print  "Lexer: ";  | 
| 
189
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print Dumper($_[0]->YYLexer) . "\n";  | 
| 
190
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    print "Syntax error.\n";  | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _Lexer {  | 
| 
194
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
     my($parser)=shift;  | 
| 
195
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $fh = $parser->YYData->{FH};  | 
| 
196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      | 
| 
198
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $open = '(\()';     | 
| 
199
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $word = $parser->YYData->{WORD};;  | 
| 
200
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $close = '(\)<=[MH]>)';  | 
| 
201
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $end = '(\))';  | 
| 
202
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $d_status = '<=(D)>';  | 
| 
203
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $p_status = '<=(P)>';  | 
| 
204
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $c_status = '<=([MH])>';  | 
| 
205
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $infos = '\t(.+)';  | 
| 
206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      $parser->YYData->{INPUT}  | 
| 
209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      or  $parser->YYData->{INPUT} = <$fh>  | 
| 
210
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
     or  return('',undef);  | 
| 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
212
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      $parser->YYData->{INPUT}=~s/^[ \t]*#.*//;  | 
| 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
215
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for ($parser->YYData->{INPUT}) {  | 
| 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         #print "TEST-" .$parser->YYData->{INPUT}. "-\n";  | 
| 
217
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         s/^$open\s*// and return ('OPEN_TAG', $1);  | 
| 
218
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         s/^$end// and return('END_TAG', $1);  | 
| 
219
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         s/^$word\s*// and return ('WORD', $1);  | 
| 
220
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         s/^$c_status\s*// and return ('C_STATUS', $1);  | 
| 
221
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         s/^$d_status\s*// and return ('D_STATUS', $1);  | 
| 
222
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         s/^$p_status\s*// and return ('P_STATUS', $1);  | 
| 
223
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         s/^$close\s*// and return('CLOSE_TAG', $1);          | 
| 
224
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         s/^$infos\s*// and return('INFOS', $1, $2);  | 
| 
225
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         s/^.+//s  and return($1,$1);  | 
| 
226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME  | 
| 
230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Lingua::YaTeA::TestifiedTermParser - Perl extension for the parser of testified term file (based on Parse::Yapp)  | 
| 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SYNOPSIS  | 
| 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   use Lingua::YaTeA::TestifiedTermParser;  | 
| 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $fh = FileHandle->new("<$file_path");  | 
| 
238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $parser = Lingua::YaTeA::TestifiedTermParser->new();  | 
| 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $parser->YYData->{TTS} = $this;  | 
| 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $parser->YYData->{WORD} = $word_characters_regexp;  | 
| 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $parser->YYData->{TAGSET} = $tag_set;  | 
| 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $parser->YYData->{MATCH} = $match_type;  | 
| 
245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $parser->YYData->{FH} = $fh;  | 
| 
246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $parser->YYData->{FILTERING_LEXICON} = $filtering_lexicon_h;  | 
| 
247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $parser->YYParse(yylex => \&Lingua::YaTeA::ParsingPatternParser::_Lexer, yyerror => \&Lingua::YaTeA::ParsingPatternParser::_Error);  | 
| 
249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DESCRIPTION  | 
| 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The module implements a parser for analysing testified term file.    | 
| 
254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The parser takes into account several information: the word character  | 
| 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 list (field C) i.e. all the possible characters in a word, the  | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Part-of-Speech tagset (field C), the type of matching (field  | 
| 
258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C), the file handler to read (field C), and the lexicon of  | 
| 
259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the corpus (field C).  | 
| 
260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 METHODS  | 
| 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 _Error()  | 
| 
264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     _Error($error_objet);  | 
| 
266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The method is used to manage the parsing error and prints a message  | 
| 
268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 explaining the error.  | 
| 
269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 _Lexer()  | 
| 
271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     _Lexer($parser_info);  | 
| 
273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The method applies the parser on the data contains in the structure  | 
| 
275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C<$parser_info> (field C).  | 
| 
276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SEE ALSO  | 
| 
278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Sophie Aubin and Thierry Hamon. Improving Term Extraction with  | 
| 
280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Terminological Resources. In Advances in Natural Language Processing  | 
| 
281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (5th International Conference on NLP, FinTAL 2006). pages  | 
| 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 380-387. Tapio Salakoski, Filip Ginter, Sampo Pyysalo, Tapio Pahikkala  | 
| 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (Eds). August 2006. LNAI 4139.  | 
| 
284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 AUTHOR  | 
| 
287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Thierry Hamon  and Sophie Aubin   | 
| 
289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 COPYRIGHT AND LICENSE  | 
| 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Copyright (C) 2005 by Thierry Hamon and Sophie Aubin  | 
| 
293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This library is free software; you can redistribute it and/or modify  | 
| 
295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 it under the same terms as Perl itself, either Perl version 5.8.6 or,  | 
| 
296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 at your option, any later version of Perl 5 you may have available.  | 
| 
297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  |