|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Lingua::YaTeA;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
219026
 | 
 use strict;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
163
 | 
    | 
| 
4
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
31
 | 
 use warnings;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
142
 | 
    | 
| 
5
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
3196
 | 
 use utf8;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
74
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
    | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =encoding utf8  | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME  | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Lingua::YaTeA - Perl extension for extracting terms from a corpus and providing a syntactic analysis in a head-modifier format.  | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SYNOPSIS  | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 use Lingua::YaTeA;  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my %config = Lingua::YaTeA::load_config($rcfile);  | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $yatea = Lingua::YaTeA->new($config{"OPTIONS"}, \%config);  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $corpus = Lingua::YaTeA::Corpus->new($corpus_path,$yatea->getOptionSet,$yatea->getMessageSet);  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $yatea->termExtraction($corpus);  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DESCRIPTION  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This module is the main module of the software named YaTeA. It aims at  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 extracting noun phrases that look like terms from a corpus.  It  | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 provides their syntactic analysis in a head-modifier representation.  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 As an input, the term extractor requires a corpus which has been  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 segmented into words and sentences, lemmatized and tagged with  | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 part-of-speech (POS) information. The input file is encoded in  | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 UTF-8. The implementation of this term extractor allows to process  | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 large corpora.  Data provided with YaTeA allow to extract terms from  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 English and French texts.  But new linguistic features can be  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 integrated to extract terms from another language. Moreover,  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 linguistic features can be modified or created for a sub-language or  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 tagset.  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 For the use of YaTeA, see the documentation with the script C.  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The main strategy of analysis of the term candidates is based on the  | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 exploitation of simple parsing patterns and endogenous  | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 disambiguation. Exogenous disambiguation is also made possible for the  | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 identification and the analysis of term candidates by the use of  | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 external resources, I lists of testified terms.  | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 ANALYSIS: ENDOGENOUS AND EXOGENOUS DISAMBIGUATION  | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Endogenous disambiguation consists in the exploitation of intermediate  | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 chunking and parsing results for the parsing of a given Maximal Noun  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Phrase (MNP). This feature allows the parse of complex noun phrases  | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 using a limited number of simple parsing patterns (80 patterns  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 containing a maximum of 3 content words in the experiments described  | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 below). All the MNPs corresponding to parsing patterns are parsed  | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 first. In a second step, remaining unparsed MNPs are processed using  | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the results of the first step as I.  An  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 I is a subsequence (contiguous or not) of a MNP  | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 that corresponds to a shorter term candidate that was parsed during  | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the first step of the parsing process. This subsequence along with its  | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 internal analysis is used as an anchor in the parsing of the  | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 MNP. Islands are used to simplify the POS sequence of the MNP for  | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 which no parsing pattern was found. The subsequence covered by the  | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 island is reduced to its syntactic head. In addition, islands increase  | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the degree of reliability of the parse. When no resource is provided  | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and as there is no parsing pattern defined for the complete POS  | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sequence "NN NN NN of NN" corresponding to the term candidate  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 "Northern blot analysis of cwlH", the progressive method is  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 applied. In such a case, the TC is bracketed from the right to the  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 left, which results in a poor quality analysis. When considering the  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 island of reliability "northern blot analysis", the correct bracketing  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is found.  | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 METHODS  | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 load_config()  | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     load_config($rcfile);  | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The method loads the configuration of the NLP Platform by reading the  | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 configuration file given in argument. It returns the hashtable  | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 containing the configuration.  | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 new()  | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     new($command_line_options_h,$system_config_h);  | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The methods creates a new term extractor and sets oprtions from the  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 command line (C<$commend_line_options_h>) and options defined in the  | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 hashtable (C<$system_config_h>) given by address. The methods returns  | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the created object.  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 termExtraction()  | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     termExtraction($corpus);  | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This method applies a extraction process on the corpus C<$corpus>  | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 given as parameter, and stores results in the directories specified in  | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the configuration files.  | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 setOptions()  | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     setOptions($command_line_options_h);  | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This method creates an option set. It sets the options defined in the  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 hashtable C<$command_line_options_h> (given by reference) and checks  | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 if the C parameter is defined in the configuration.  | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 setConfigFiles()  | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     setConfigFiles($this,$system_config_h);  | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 setLocaleFiles()  | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     setLocaleFiles($this,$system_config_h);  | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 addOptionsFromFile()  | 
| 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     addOptionsFromFile($this);  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 setMessageSet()  | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     setMessageSet($this,$system_config_h);  | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 setTagSet()  | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     setTagSet($this);  | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 setParsingPatterns()  | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     setParsingPatterns($this);  | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 setChunkingDataSet()  | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     setChunkingDataSet($this);  | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 setForbiddenStructureSet()  | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     setForbiddenStructureSet($this);  | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 loadTestifiedTerms()  | 
| 
152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     loadTestifiedTerms($this,$process_counter_r,$corpus,$sentence_boundary,$document_boundary,$match_type,$message_set,$display_language);  | 
| 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 setTestifiedTermSet()  | 
| 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     setTestifiedTermSet($this,$filtering_lexicon_h,$sentence_boundary,$match_type);  | 
| 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 getTestifiedTermSet()  | 
| 
164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     getTestifiedTermSet($this);  | 
| 
166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 getFSSet()  | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     getFSSet($this);  | 
| 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 getConfigFileSet  | 
| 
176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     getConfigFileSet($this);  | 
| 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 getLocaleFileSet()  | 
| 
182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     getLocaleFileSet($this);  | 
| 
184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 getResultFileSet()  | 
| 
188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     getResultFileSet($this);  | 
| 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 getOptionSet()  | 
| 
194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     getOptionSet($this);  | 
| 
196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This method returns the field C.  | 
| 
198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 getTagSet()  | 
| 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     getTagSet($this);  | 
| 
202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 getChunkingDataSet()  | 
| 
206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     getChunkingDataSet($this);  | 
| 
208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 getParsingPatternSet()  | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     getParsingPatternSet($this);  | 
| 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 getMessageSet()  | 
| 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     getMessageSet($this);  | 
| 
219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 getTestifiedSet()  | 
| 
223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     getTestifiedSet($this);  | 
| 
225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 addMessageSetFile()  | 
| 
229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     addMessageSetFile($this);  | 
| 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 displayExtractionResults()  | 
| 
235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     displayExtractionResults($this,$phrase_set,$corpus,$message_set,$display_language,$default_output);  | 
| 
237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 CONFIGURATION  | 
| 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The configuration file of YaTeA is divided into two sections:  | 
| 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over   | 
| 
246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * Section C  | 
| 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over  | 
| 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *   | 
| 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C : directory containing the configuration files according to the language  | 
| 
254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *   | 
| 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C : directory containing the environment files according to the language  | 
| 
258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *   | 
| 
260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C : directory where are stored the results (probably not useful)  | 
| 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * Section C  | 
| 
266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over  | 
| 
268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *   | 
| 
270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C I : Definition of the language of the  | 
| 
272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 corpus. Values are either C (French - TreeTagger output - TagSet  | 
| 
273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ),  | 
| 
274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C (French - output of Flemm analyser or C (English -  | 
| 
275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 TreeTagger or GeniaTagger output - PennTreeBank Tagset)  | 
| 
276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *   | 
| 
279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C I : Specification of a name for the current version  | 
| 
281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of the analysis. Results are gathered in a specific directory of this  | 
| 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 name and result files also carry this suffix  | 
| 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C : set the path to the directory that will contain the  | 
| 
287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  results for the current corpus (default: working directory)  | 
| 
288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *   | 
| 
290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C I : Name of a file containing a list of testified  | 
| 
292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 terms. The testified terms have to provided in the TreeTagger output  | 
| 
293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 format.  | 
| 
294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *   | 
| 
296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C : all occurrences of monolexical phrases  | 
| 
298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 are considered as term candidates. The value is 0 or 1.  | 
| 
299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *   | 
| 
301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
302
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C : occurrences of monolexical term  | 
| 
303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 candidates that appear in complex term candidates are also displayed. The value is 0 or 1.  | 
| 
304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *   | 
| 
306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C [loose or strict] :  | 
| 
308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over  | 
| 
310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *   | 
| 
312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C : testified terms match either inflected or lemmatized forms of each word  | 
| 
314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *   | 
| 
316
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C : testified terms match the combination of inflected form and POS tag of each word  | 
| 
318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *   | 
| 
320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 unspecified option: testified terms match match inflected forms of words  | 
| 
322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *   | 
| 
326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C : display of the parsed term candidates in XML format. The  | 
| 
328
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 value is 0 or 1.  | 
| 
329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *   | 
| 
331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C : display of a list of terms and sub-terms along with  | 
| 
333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 their frequency. To display only term candidates containing more than  | 
| 
334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 one word (multi-word term candidates), specify the value C.  | 
| 
335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 All term candidates will be displayed , monolexical and multi-word  | 
| 
336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 term candidates with the value C, or if any value is specified.  | 
| 
337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
338
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *   | 
| 
339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C : displays of the corpus marked with phrases in a  | 
| 
341
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 HTML file along with the indication that they are term candidates or  | 
| 
342
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 not. The value is 0 or 1.  | 
| 
343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
344
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *   | 
| 
345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C : annotation of the corpus with term candidates in a  | 
| 
347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 XML format compatible with the BioLG software. The value is 0 or 1.  | 
| 
348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *   | 
| 
350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
351
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C : annotation of the corpus with testified terms in a  | 
| 
352
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 XML format compatible with the BioLG software. The value is 0 or 1.  | 
| 
353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (http://www.it.utu.fi/biolg/, biological tuned version of the Link  | 
| 
354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Grammar Parser)  | 
| 
355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *   | 
| 
357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
358
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C : creation of a BioLG compatible XML version  | 
| 
359
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of the corpus with PoS tags marked form each word. The value is 0 or 1.  | 
| 
360
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *   | 
| 
362
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C : displays informations on parsed phrases (i.e. term  | 
| 
364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 candidates) in a text format. The value is 0 or 1.  | 
| 
365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
366
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *   | 
| 
368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C : only annotate testified terms (no acquisition). The  | 
| 
370
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 value is 0 or 1.  | 
| 
371
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
372
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *   | 
| 
373
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
374
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C : term candidates are displayed in  | 
| 
375
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 TreeTagger output format. Term separator is the sentence boundary tag  | 
| 
376
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C. To extract only term candidates containing more than one  | 
| 
377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 word (multi-word term candidates), specify the option C.   | 
| 
378
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 All term candidates will be displayed , monolexical and multi-word  | 
| 
379
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 term candidates with the value C, or if any value is specified.  | 
| 
380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
382
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 CONTRIBUTORS  | 
| 
386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
387
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over  | 
| 
388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
390
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Charlotte Roze has defined the configuration files to process a corpus  | 
| 
392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 tagged with Flemm  | 
| 
393
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
394
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *   | 
| 
395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Wiktoria Golik, Robert Bossy and Claire Nédellec (MIG/INRA) have  | 
| 
397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 corrected bugs and improve the mapping of testified terms.  | 
| 
398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
399
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
402
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SEE ALSO  | 
| 
403
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Sophie Aubin and Thierry Hamon. Improving Term Extraction with  | 
| 
405
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Terminological Resources. In Advances in Natural Language Processing  | 
| 
406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (5th International Conference on NLP, FinTAL 2006). pages  | 
| 
407
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 380-387. Tapio Salakoski, Filip Ginter, Sampo Pyysalo, Tapio Pahikkala  | 
| 
408
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (Eds). August 2006. LNAI 4139.  | 
| 
409
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
410
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 AUTHORS  | 
| 
411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
412
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Thierry Hamon  and Sophie Aubin   | 
| 
413
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
414
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 LICENSE  | 
| 
415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Copyright (C) 2005 by Thierry Hamon and Sophie Aubin  | 
| 
417
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
418
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This library is free software; you can redistribute it and/or modify  | 
| 
419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 it under the same terms as Perl itself, either Perl version 5.8.6 or,  | 
| 
420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 at your option, any later version of Perl 5 you may have available.  | 
| 
421
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
422
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
423
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
424
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
425
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
2828
 | 
 use Data::Dumper;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26507
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
323
 | 
    | 
| 
426
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
2073
 | 
 use Lingua::YaTeA::ParsingPatternRecordSet;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
77
 | 
    | 
| 
427
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
2475
 | 
 use Lingua::YaTeA::OptionSet;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
56
 | 
    | 
| 
428
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
2156
 | 
 use Lingua::YaTeA::Option;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
70
 | 
    | 
| 
429
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
2171
 | 
 use Lingua::YaTeA::FileSet;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
56
 | 
    | 
| 
430
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
2205
 | 
 use Lingua::YaTeA::MessageSet;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
53
 | 
    | 
| 
431
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
2078
 | 
 use Lingua::YaTeA::TagSet;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
59
 | 
    | 
| 
432
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
1782
 | 
 use Lingua::YaTeA::ChunkingDataSet;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
45
 | 
    | 
| 
433
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
2216
 | 
 use Lingua::YaTeA::ForbiddenStructureSet;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
55
 | 
    | 
| 
434
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
3009
 | 
 use Lingua::YaTeA::PhraseSet;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
81
 | 
    | 
| 
435
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
2649
 | 
 use Lingua::YaTeA::TestifiedTermSet;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
87
 | 
    | 
| 
436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
437
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
 
 | 
3948
 | 
 use Config::General;  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
129145
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16778
 | 
    | 
| 
438
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
439
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $VERSION='0.626';  | 
| 
440
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
441
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $process_counter = 1;  | 
| 
442
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub load_config   | 
| 
444
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
445
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
446
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
431
 | 
     my ($rcfile) = @_;  | 
| 
447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
448
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Read de configuration file  | 
| 
449
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
450
 | 
4
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
42
 | 
     if ((! defined $rcfile) || ($rcfile eq "")) {  | 
| 
451
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$rcfile = "/usr/etc/yatea/yatea.rc";      | 
| 
452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
453
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
454
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
48
 | 
     my $conf = new Config::General('-ConfigFile' => $rcfile,  | 
| 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				   '-InterPolateVars' => 1,  | 
| 
456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				   '-InterPolateEnv' => 1  | 
| 
457
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				   );  | 
| 
458
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
459
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22582
 | 
     my %config = $conf->getall;  | 
| 
460
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    `mkdir -p $config{'ALVISTMP'}`; # to put in a specific method  | 
| 
461
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
124
 | 
     return(%config);  | 
| 
462
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
463
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
464
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
465
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new  | 
| 
466
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
467
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
692
 | 
     my ($class,$command_line_options_h,$system_config_h) = @_;  | 
| 
468
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
     my $this = {};  | 
| 
469
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
     bless ($this,$class);  | 
| 
470
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
     $this->{OPTION_SET} = ();  | 
| 
471
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
     $this->{CONFIG_FILE_SET} = ();  | 
| 
472
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
     $this->{LOCALE_FILE_SET} = ();  | 
| 
473
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
     $this->{MESSAGE_SET} = ();  | 
| 
474
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
     $this->{TAG_SET} = ();  | 
| 
475
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
     $this->{PARSING_PATTERN_SET} = ();  | 
| 
476
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
     $this->{CHUNKING_DATA_SET} = ();  | 
| 
477
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
     $this->{FS_SET} = ();  | 
| 
478
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
     $this->{TESTIFIED_SET} = ();  | 
| 
479
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
     $this->setOptions($command_line_options_h);;  | 
| 
480
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
52
 | 
     $this->setConfigFiles($system_config_h);  | 
| 
481
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
     $this->addOptionsFromFile;  | 
| 
482
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
     $this->setLocaleFiles($system_config_h);  | 
| 
483
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
     $this->setMessageSet;  | 
| 
484
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
     $this->getOptionSet->handleOptionDependencies($this->getMessageSet);  | 
| 
485
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
     $this->getOptionSet->setDefaultOutputPath;  | 
| 
486
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
     $this->setTagSet;  | 
| 
487
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
     $this->setParsingPatterns;  | 
| 
488
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
     $this->setChunkingDataSet;  | 
| 
489
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
     $this->setForbiddenStructureSet;  | 
| 
490
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
     return $this;  | 
| 
491
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
492
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
493
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
494
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
495
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub termExtraction  | 
| 
497
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
498
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
19
 | 
     my ($this,$corpus) = @_;  | 
| 
499
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
     my $sentence_boundary = $this->getOptionSet->getSentenceBoundary;  | 
| 
500
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
     my $document_boundary = $this->getOptionSet->getDocumentBoundary;  | 
| 
501
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
     my $debug_fh = FileHandle->new(">".$corpus->getOutputFileSet->getFile('debug')->getPath);;  | 
| 
502
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
230
 | 
     binmode($debug_fh, ":utf8");  | 
| 
503
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
     $this->loadTestifiedTerms(\$process_counter,$corpus,$sentence_boundary,$document_boundary,$this->getOptionSet->MatchTypeValue,$this->getMessageSet,$this->getOptionSet->getDisplayLanguage);  | 
| 
504
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
506
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
     print STDERR $process_counter++ . ") " . ($this->getMessageSet->getMessage('LOAD_CORPUS')->getContent($this->getOptionSet->getDisplayLanguage)) . "\n";  | 
| 
507
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
508
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
509
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    warn "Language: " . $this->getOptionSet->getLanguage . "\n";  | 
| 
510
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
511
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
     $corpus->read($sentence_boundary,$document_boundary,$this->getFSSet,$this->getTestifiedTermSet,$this->getOptionSet->MatchTypeValue,$this->getMessageSet,$this->getOptionSet->getDisplayLanguage, $this->getOptionSet->getLanguage,$debug_fh);  | 
| 
512
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
513
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
98
 | 
     my $phrase_set = Lingua::YaTeA::PhraseSet->new;  | 
| 
514
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
515
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
     print STDERR $process_counter++ . ") " . ($this->getMessageSet->getMessage('CHUNKING')->getContent($this->getOptionSet->getDisplayLanguage)) . "\n";  | 
| 
516
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
     $corpus->chunk($phrase_set,$sentence_boundary,$document_boundary,$this->getChunkingDataSet,$this->getFSSet,$this->getTagSet,$this->getParsingPatternSet,$this->getTestifiedTermSet,$this->getOptionSet,$debug_fh);  | 
| 
517
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
518
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
     my $fh = FileHandle->new(">".$corpus->getOutputFileSet->getFile('unparsed')->getPath);  | 
| 
519
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
506
 | 
     binmode($fh, ":utf8");  | 
| 
520
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
     $phrase_set->printPhrases($fh);  | 
| 
521
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
522
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #     print STDERR Dumper($phrase_set);  | 
| 
523
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
524
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
     $phrase_set->printChunkingStatistics($this->getMessageSet,$this->getOptionSet->getDisplayLanguage);  | 
| 
525
 | 
2
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
14
 | 
     if ((! defined $this->getOptionSet->getOption('annotate-only')) || ($this->getOptionSet->getOption('annotate-only')->getValue() == 0))  | 
| 
526
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
527
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
 	$phrase_set->sortUnparsed;  | 
| 
528
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
529
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
 	print STDERR $process_counter++ . ") " . ($this->getMessageSet->getMessage('PARSING')->getContent($this->getOptionSet->getDisplayLanguage)) . "\n";  | 
| 
530
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
531
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
 	$phrase_set->parseProgressively($this->getTagSet,$this->getOptionSet->getParsingDirection,$this->getParsingPatternSet,$this->getChunkingDataSet,$corpus->getLexicon,$corpus->getSentenceSet,$this->getMessageSet,$this->getOptionSet->getDisplayLanguage,$debug_fh);  | 
| 
532
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
533
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
 	$phrase_set->printParsingStatistics($this->getMessageSet,$this->getOptionSet->getDisplayLanguage);  | 
| 
534
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
535
 | 
2
 | 
  
  0
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
16
 | 
 	if(  | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
536
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    ((defined $this->getOptionSet->getOption('xmlout')) && ($this->getOptionSet->getOption('xmlout') == 1))  | 
| 
537
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    ||  | 
| 
538
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    ((defined $this->getOptionSet->getOption('termList')) && ($this->getOptionSet->getOption('termList') ne ""))  | 
| 
539
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    ||  | 
| 
540
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    ((defined $this->getOptionSet->getOption('printChunking')) && ($this->getOptionSet->getOption('printChunking')) == 1)   | 
| 
541
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    ||  | 
| 
542
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    ((defined $this->getOptionSet->getOption('TC-for-BioLG')) && ($this->getOptionSet->getOption('TC-for-BioLG')) == 1)  | 
| 
543
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    ||  | 
| 
544
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    ((defined $this->getOptionSet->getOption('TTG-style-term-candidates')) && ($this->getOptionSet->getOption('TTG-style-term-candidates') ne ""))  | 
| 
545
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    ||  | 
| 
546
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    ($this->getOptionSet->getDefaultOutput == 1)  | 
| 
547
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    )  | 
| 
548
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
549
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
 	    $phrase_set->addTermCandidates($this->getOptionSet);  | 
| 
550
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
 	    $corpus->makeDDW($phrase_set->getTermCandidates,$debug_fh);  | 
| 
551
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
552
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
553
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
554
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
     print STDERR $process_counter++ . ") " . ($this->getMessageSet->getMessage('RESULTS')->getContent($this->getOptionSet->getDisplayLanguage)) . "\n";  | 
| 
555
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
556
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
     $this->displayExtractionResults($phrase_set,$corpus,$this->getMessageSet,$this->getOptionSet->getDisplayLanguage,$this->getOptionSet->getDefaultOutput,$debug_fh);  | 
| 
557
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
833
 | 
     return(0);  | 
| 
558
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
559
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
560
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
561
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
562
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub setOptions  | 
| 
563
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
564
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
14
 | 
     my ($this,$command_line_options_h) = @_;  | 
| 
565
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
     my $options;  | 
| 
566
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
567
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
     $this->{OPTION_SET} = Lingua::YaTeA::OptionSet->new;  | 
| 
568
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
569
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
     $this->getOptionSet->addOptionSet($command_line_options_h,$this->getMessageSet,"EN");  | 
| 
570
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
     $this->getOptionSet->checkCompulsory("language");  | 
| 
571
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
572
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
573
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
574
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub setConfigFiles  | 
| 
575
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
576
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
16
 | 
     my ($this,$system_config_h) = @_;  | 
| 
577
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
     my $config_files;  | 
| 
578
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
     my $language = $this->getOptionSet->getLanguage;  | 
| 
579
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    print STDERR Dumper(%$system_config_h);  | 
| 
580
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
     my $repository = $system_config_h->{'DefaultConfig'}->{CONFIG_DIR} . "/" . $language;  | 
| 
581
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      | 
| 
582
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30
 | 
     my @file_names = ("Options","ForbiddenStructures","ChunkingFrontiers","ChunkingExceptions","CleaningFrontiers","CleaningExceptions","ParsingPatterns","TagSet","LGPmapping");  | 
| 
583
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
584
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
     $this->{CONFIG_FILE_SET} = Lingua::YaTeA::FileSet->new($repository);  | 
| 
585
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
586
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
     $this->getConfigFileSet->checkRepositoryExists;  | 
| 
587
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
588
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
42
 | 
     $this->getConfigFileSet->addFiles($this->getConfigFileSet->getRepository,\@file_names);  | 
| 
589
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
590
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
591
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
592
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub setLocaleFiles  | 
| 
593
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
594
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
11
 | 
     my ($this,$system_config_h) = @_;  | 
| 
595
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     my $config_files;  | 
| 
596
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
     my $repository = $system_config_h->{'DefaultConfig'}->{LOCALE_DIR} . "/";  | 
| 
597
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
     my @file_names = ("Messages");  | 
| 
598
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
599
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
     $this->{LOCALE_FILE_SET} = Lingua::YaTeA::FileSet->new($repository);  | 
| 
600
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
     $this->getLocaleFileSet->checkRepositoryExists;  | 
| 
601
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
42
 | 
     $this->addMessageSetFile;  | 
| 
602
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
603
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
604
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
605
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub addOptionsFromFile  | 
| 
606
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
607
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
19
 | 
     my ($this) = @_;  | 
| 
608
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
609
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
     $this->getOptionSet->readFromFile($this->getConfigFileSet->getFile("Options"));  | 
| 
610
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
611
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
612
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub setMessageSet  | 
| 
613
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
614
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
15
 | 
     my ($this,$system_config_h) = @_;  | 
| 
615
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      | 
| 
616
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
617
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
147
 | 
    $this->{MESSAGE_SET} = Lingua::YaTeA::MessageSet->new($this->getLocaleFileSet->getFile("Messages"),$this->getOptionSet->getDisplayLanguage);  | 
| 
618
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
619
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
620
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
621
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub setTagSet  | 
| 
622
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
623
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
11
 | 
     my ($this) = @_;  | 
| 
624
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
     $this->{TAG_SET} = Lingua::YaTeA::TagSet->new($this->getConfigFileSet->getFile("TagSet")->getPath);  | 
| 
625
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    print STDERR "Tagset loaded\n"  | 
| 
626
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
627
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
628
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub setParsingPatterns  | 
| 
629
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
630
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
13
 | 
     my ($this) = @_;  | 
| 
631
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
     $this->{PARSING_PATTERN_SET} = Lingua::YaTeA::ParsingPatternRecordSet->new($this->getConfigFileSet->getFile("ParsingPatterns")->getPath,$this->getTagSet,$this->getMessageSet,$this->getOptionSet->getDisplayLanguage);  | 
| 
632
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    print STDERR "Parsing Patterns loaded\n";  | 
| 
633
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
634
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
635
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub setChunkingDataSet  | 
| 
636
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
637
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
17
 | 
     my ($this) = @_;  | 
| 
638
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
     $this->{CHUNKING_DATA_SET} = Lingua::YaTeA::ChunkingDataSet->new($this->getConfigFileSet);  | 
| 
639
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    print STDERR "Chunking Data loaded\n";  | 
| 
640
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
641
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
643
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub setForbiddenStructureSet  | 
| 
644
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
645
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
13
 | 
     my ($this) = @_;  | 
| 
646
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
     $this->{FS_SET} = Lingua::YaTeA::ForbiddenStructureSet->new($this->getConfigFileSet->getFile("ForbiddenStructures")->getPath);   | 
| 
647
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    print STDERR "Forbidden Structures loaded\n"  | 
| 
648
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
649
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
650
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
651
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub loadTestifiedTerms  | 
| 
652
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
653
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
18
 | 
     my ($this,$process_counter_r,$corpus,$sentence_boundary,$document_boundary,$match_type,$message_set,$display_language) = @_;  | 
| 
654
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      | 
| 
655
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     my $filtering_lexicon_h;  | 
| 
656
 | 
2
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
7
 | 
     if	($this->getOptionSet->optionExists('termino'))  | 
| 
657
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
658
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	print STDERR "\n" . $$process_counter_r++ . ") " . $message_set->getMessage('LOADING_TESTIFIED')->getContent($display_language) . "\n";  | 
| 
659
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
 	$filtering_lexicon_h = $corpus->preLoadLexicon($sentence_boundary,$document_boundary,$match_type);  | 
| 
660
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	$this->setTestifiedTermSet($filtering_lexicon_h,$sentence_boundary,$match_type);  | 
| 
661
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38
 | 
 	print STDERR "\t" . $Lingua::YaTeA::TestifiedTerm::id . ($message_set->getMessage('TESTIFIED_LOADED')->getContent($display_language)) . "\n";  | 
| 
662
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
 	$this->getTestifiedTermSet->changeKeyToID;  | 
| 
663
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
664
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else  | 
| 
665
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
666
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # (($this->getOptionSet->getOption('TT-for-BioLG')->getValue() == 1) &&  | 
| 
667
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  	# creation of an empty set of Testified Terms  | 
| 
668
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# TTforLGp can be used to build a XML version of the corpus compatible with BioLG, even if no testified terms are provided  | 
| 
669
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# if ($this->getOptionSet->optionExists('TT-for-BioLG'))  | 
| 
670
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# {  | 
| 
671
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	    $this->{TESTIFIED_SET} = Lingua::YaTeA::TestifiedTermSet->new;   | 
| 
672
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# }  | 
| 
673
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
674
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
675
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
676
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
677
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub setTestifiedTermSet  | 
| 
678
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
679
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
5
 | 
     my ($this,$filtering_lexicon_h,$sentence_boundary,$match_type) = @_;  | 
| 
680
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
     my $file_path;  | 
| 
681
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
     $this->{TESTIFIED_SET} = Lingua::YaTeA::TestifiedTermSet->new;   | 
| 
682
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
683
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
     $file_path = $this->getOptionSet->getOption('termino')->getValue; # modified by Thierry Hamon 05/02/2007  | 
| 
684
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #     foreach $file_path (@{$this->getOptionSet->getOption('termino')->getValue})  | 
| 
685
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #     {  | 
| 
686
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	$this->getTestifiedSet->addSubset($file_path,$filtering_lexicon_h,$sentence_boundary,$match_type,$this->getTagSet);    | 
| 
687
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #     }  | 
| 
688
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
689
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
690
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
691
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getTestifiedTermSet  | 
| 
692
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
693
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
  
1
  
 | 
22
 | 
     my ($this) = @_;  | 
| 
694
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
40
 | 
     return $this->{TESTIFIED_SET};  | 
| 
695
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
696
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
697
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
698
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
699
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getFSSet  | 
| 
700
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
701
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
12
 | 
     my ($this) = @_;  | 
| 
702
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
     return $this->{FS_SET};  | 
| 
703
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
704
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
705
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getConfigFileSet  | 
| 
706
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
707
 | 
32
 | 
 
 | 
 
 | 
  
32
  
 | 
  
1
  
 | 
68
 | 
     my ($this) = @_;  | 
| 
708
 | 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
174
 | 
     return $this->{CONFIG_FILE_SET};  | 
| 
709
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
710
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
711
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getLocaleFileSet  | 
| 
712
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
713
 | 
16
 | 
 
 | 
 
 | 
  
16
  
 | 
  
1
  
 | 
32
 | 
     my ($this) = @_;  | 
| 
714
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
58
 | 
     return $this->{LOCALE_FILE_SET};  | 
| 
715
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
716
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
717
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getResultFileSet  | 
| 
718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
719
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
     my ($this) = @_;  | 
| 
720
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return $this->{RESULT_FILE_SET};  | 
| 
721
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
722
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
723
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
724
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getOptionSet  | 
| 
725
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
726
 | 
146
 | 
 
 | 
 
 | 
  
146
  
 | 
  
1
  
 | 
319
 | 
     my ($this) = @_;  | 
| 
727
 | 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
778
 | 
     return $this->{OPTION_SET};  | 
| 
728
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
729
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
730
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getTagSet  | 
| 
731
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
732
 | 
11
 | 
 
 | 
 
 | 
  
11
  
 | 
  
1
  
 | 
27
 | 
     my ($this) = @_;  | 
| 
733
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
60
 | 
     return $this->{TAG_SET};  | 
| 
734
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
735
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
736
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getChunkingDataSet  | 
| 
737
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
738
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
21
 | 
     my ($this) = @_;  | 
| 
739
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
     return $this->{CHUNKING_DATA_SET};  | 
| 
740
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
741
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
742
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getParsingPatternSet  | 
| 
743
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
744
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
15
 | 
     my ($this) = @_;  | 
| 
745
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
     return $this->{PARSING_PATTERN_SET};  | 
| 
746
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
747
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
748
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getMessageSet  | 
| 
749
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
750
 | 
41
 | 
 
 | 
 
 | 
  
41
  
 | 
  
1
  
 | 
105
 | 
     my ($this) = @_;  | 
| 
751
 | 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
236
 | 
     return $this->{MESSAGE_SET};  | 
| 
752
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
753
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
754
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub getTestifiedSet  | 
| 
755
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
756
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
4
 | 
     my ($this) = @_;  | 
| 
757
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     return $this->{TESTIFIED_SET};  | 
| 
758
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
759
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
760
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub addMessageSetFile  | 
| 
761
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
762
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
13
 | 
     my ($this) = @_;  | 
| 
763
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
     my $repository = $this->getLocaleFileSet->getRepository;  | 
| 
764
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
765
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
     my $display_language = $this->getOptionSet->getLanguage; # default  | 
| 
766
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
767
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # if the language of message display is different from that of the processed text, the Messages file is searched in a different sister repository  | 
| 
768
 | 
4
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
16
 | 
     if(  | 
| 
769
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	($this->getOptionSet->optionExists('MESSAGE_DISPLAY'))  | 
| 
770
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	&&  | 
| 
771
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	($this->getOptionSet->getDisplayLanguage ne $this->getOptionSet->getLanguage)  | 
| 
772
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	)  | 
| 
773
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
774
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$display_language = $this->getOptionSet->getDisplayLanguage;  | 
| 
775
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
776
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
777
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
     $repository .=  $display_language ;  | 
| 
778
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
     $this->getLocaleFileSet->addFile($repository, 'Messages');  | 
| 
779
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
780
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
781
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub displayExtractionResults  | 
| 
782
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
783
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
10
 | 
     my ($this,$phrase_set,$corpus,$message_set,$display_language,$default_output,$debug_fh) = @_;  | 
| 
784
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
785
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
786
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
787
 | 
2
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
15
 | 
     if ((defined $this->getOptionSet->getOption('debug')) && ($this->getOptionSet->getOption('debug')->getValue() == 1))  | 
| 
788
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
789
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	print STDERR "\t-" . ($this->getMessageSet->getMessage('DISPLAY_RAW')->getContent($this->getOptionSet->getDisplayLanguage)) . "\'". $corpus->getOutputFileSet->getFile('debug')->getPath . "'\n";  | 
| 
790
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$phrase_set->printPhrases($debug_fh);  | 
| 
791
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$phrase_set->printUnparsable($corpus->getOutputFileSet->getFile('unparsable'));  | 
| 
792
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #	$phrase_set->printUnparsed($corpus->getOutputFileSet->getFile('unparsed'));  | 
| 
793
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
794
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
795
 | 
2
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
23
 | 
     if   | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
796
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	(  | 
| 
797
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	 ((defined $this->getOptionSet->getOption('xmlout')) && ($this->getOptionSet->getOption('xmlout')->getValue() == 1))  | 
| 
798
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	 ||  | 
| 
799
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	 ($default_output == 1)  | 
| 
800
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	)  | 
| 
801
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
802
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
 	print STDERR "\t-" . ($this->getMessageSet->getMessage('DISPLAY_TC_XML')->getContent($this->getOptionSet->getDisplayLanguage)) . "\'". $corpus->getOutputFileSet->getFile('candidates')->getPath . "'\n";  | 
| 
803
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
 	$phrase_set->printTermCandidatesXML($corpus->getOutputFileSet->getFile("candidates"),$this->getTagSet);  | 
| 
804
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
805
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
806
 | 
2
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
28
 | 
     if ((defined $this->getOptionSet->getOption('printChunking')) && ($this->getOptionSet->getOption('printChunking')->getValue() == 1))  | 
| 
807
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
808
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	print STDERR "\t-" . ($this->getMessageSet->getMessage('DISPLAY_CORPUS_PHRASES')->getContent($this->getOptionSet->getDisplayLanguage)) . "\'". $corpus->getOutputFileSet->getFile('candidatesAndUnparsedInCorpus')->getPath . "'\n";  | 
| 
809
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
810
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$corpus->printCandidatesAndUnparsedInCorpus($phrase_set->getTermCandidates,$phrase_set->getUnparsable,$corpus->getOutputFileSet->getFile('candidatesAndUnparsedInCorpus'),$this->getOptionSet->getSentenceBoundary,$this->getOptionSet->getDocumentBoundary,$this->getOptionSet->getOption('COLOR_BLIND'));  | 
| 
811
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
812
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
813
 | 
2
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
14
 | 
     if ((defined $this->getOptionSet->getOption('TC-for-BioLG')) && ($this->getOptionSet->getOption('TC-for-BioLG')->getValue() == 1))   | 
| 
814
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
815
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	print STDERR "\t-" . ($this->getMessageSet->getMessage('TC_FOR_LGP')->getContent($this->getOptionSet->getDisplayLanguage)) . "\'". $corpus->getOutputFileSet->getFile('TCforBioLG')->getPath . "'\n";  | 
| 
816
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$corpus->printCorpusForLGPwithTCs($phrase_set->getTermCandidates,$corpus->getOutputFileSet->getFile('TCforBioLG'),$this->getOptionSet->getSentenceBoundary,$this->getOptionSet->getDocumentBoundary,$this->getConfigFileSet->getFile("LGPmapping"),$this->getOptionSet->getChainedLinks,$this->getTagSet);  | 
| 
817
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
818
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
819
 | 
2
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
9
 | 
     if ((defined $this->getOptionSet->getOption('TT-for-BioLG')) && ($this->getOptionSet->getOption('TT-for-BioLG')->getValue() == 1))  | 
| 
820
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
821
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	print STDERR "\t-" . ($this->getMessageSet->getMessage('TT_FOR_LGP')->getContent($this->getOptionSet->getDisplayLanguage)) . "\'". $corpus->getOutputFileSet->getFile('TTforBioLG')->getPath . "'\n";  | 
| 
822
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$corpus->printCorpusForLGPwithTTs($this->getTestifiedTermSet->getTestifiedTerms,$corpus->getOutputFileSet->getFile('TTforBioLG'),$this->getOptionSet->getSentenceBoundary,$this->getOptionSet->getDocumentBoundary,$this->getConfigFileSet->getFile("LGPmapping"),$this->getOptionSet->getParsingDirection,$this->getOptionSet->getChainedLinks,$this->getTagSet);  | 
| 
823
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
824
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
825
 | 
2
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
12
 | 
     if ((defined $this->getOptionSet->getOption('XML-corpus-for-BioLG')) && ($this->getOptionSet->getOption('XML-corpus-for-BioLG')->getValue() == 1))  | 
| 
826
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
827
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	print STDERR "\t-" . ($this->getMessageSet->getMessage('XML_FOR_BIOLG')->getContent($this->getOptionSet->getDisplayLanguage)) . "\'". $corpus->getOutputFileSet->getFile('corpusForBioLG')->getPath . "'\n";  | 
| 
828
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$corpus->printCorpusForBioLG($corpus->getOutputFileSet->getFile('corpusForBioLG'),$this->getOptionSet->getSentenceBoundary,$this->getOptionSet->getDocumentBoundary,$this->getOptionSet->getChainedLinks,$this->getTagSet);  | 
| 
829
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
830
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
831
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
346
 | 
     if (defined $this->getOptionSet->getOption('termList'))  | 
| 
832
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
833
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
 	print STDERR "\t-" . ($this->getMessageSet->getMessage('DISPLAY_TERM_LIST')->getContent($this->getOptionSet->getDisplayLanguage)) . "\'". $corpus->getOutputFileSet->getFile('termList')->getPath . "'\n";  | 
| 
834
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
 	$phrase_set->printTermList($corpus->getOutputFileSet->getFile('termList'),$this->getOptionSet->getTermListStyle);  | 
| 
835
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
836
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
837
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    warn $this->getOptionSet->getOption('TTG-style-term-candidates')->getValue() . "\n";  | 
| 
838
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
839
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
29
 | 
     if (defined $this->getOptionSet->getOption('TTG-style-term-candidates'))  | 
| 
840
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
841
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
 	print STDERR "\t-" . ($this->getMessageSet->getMessage('TTG_TERM_CANDIDATES')->getContent($this->getOptionSet->getDisplayLanguage)) . "\'". $corpus->getOutputFileSet->getFile('termCandidates')->getPath . "'\n";  | 
| 
842
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
 	$phrase_set->printTermCandidatesTTG($corpus->getOutputFileSet->getFile("termCandidates"),$this->getOptionSet->getTTGStyle);  | 
| 
843
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }   | 
| 
844
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
845
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
19
 | 
      if (defined $this->getOptionSet->getOption('bootstrap'))  | 
| 
846
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      {  | 
| 
847
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	 print STDERR "\t-" . ($this->getMessageSet->getMessage('DISPLAY_BOOTSTRAP')->getContent($this->getOptionSet->getDisplayLanguage)) . "\'". $corpus->getOutputFileSet->getFile('parsedTerms')->getPath . "'\n";  | 
| 
848
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$phrase_set->printBootstrapList($corpus->getOutputFileSet->getFile('parsedTerms'),$corpus->getName);  | 
| 
849
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      }  | 
| 
850
 | 
2
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
10
 | 
     if ((defined $this->getOptionSet->getOption('XML-corpus-raw')) && ($this->getOptionSet->getOption('XML-corpus-raw')->getValue() == 1))  | 
| 
851
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
852
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	print STDERR "\t-" . ($this->getMessageSet->getMessage('DISPLAY_CORPUS_RAW')->getContent($this->getOptionSet->getDisplayLanguage)) . "\'". $corpus->getOutputFileSet->getFile('corpusRaw')->getPath . "'\n";  | 
| 
853
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
854
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$corpus->printXMLRawCorpus($corpus->getOutputFileSet->getFile('corpusRaw'),$this->getOptionSet->getSentenceBoundary,$this->getOptionSet->getDocumentBoundary);  | 
| 
855
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
856
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
     return(0);  | 
| 
857
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
858
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
859
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
860
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # To specify several files, repeat the -termino switch  | 
| 
861
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # for each  | 
| 
862
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
863
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  |