| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Treex::Core::Scenario; |
|
2
|
|
|
|
|
|
|
$Treex::Core::Scenario::VERSION = '2.20210102'; |
|
3
|
12
|
|
|
12
|
|
347398
|
use Moose; |
|
|
12
|
|
|
|
|
1399128
|
|
|
|
12
|
|
|
|
|
117
|
|
|
4
|
12
|
|
|
12
|
|
84048
|
use Treex::Core::Common; |
|
|
12
|
|
|
|
|
48
|
|
|
|
12
|
|
|
|
|
135
|
|
|
5
|
12
|
|
|
12
|
|
67836
|
use File::Basename; |
|
|
12
|
|
|
|
|
35
|
|
|
|
12
|
|
|
|
|
930
|
|
|
6
|
12
|
|
|
12
|
|
92
|
use File::Slurp; |
|
|
12
|
|
|
|
|
30
|
|
|
|
12
|
|
|
|
|
751
|
|
|
7
|
12
|
|
|
12
|
|
6205
|
use File::chdir; |
|
|
12
|
|
|
|
|
19640
|
|
|
|
12
|
|
|
|
|
1437
|
|
|
8
|
12
|
|
|
12
|
|
107
|
use Digest::MD5 qw(md5_hex); |
|
|
12
|
|
|
|
|
33
|
|
|
|
12
|
|
|
|
|
45185
|
|
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
#use Parse::RecDescent 1.967003; now using standalone version |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
has from_file => ( |
|
13
|
|
|
|
|
|
|
is => 'ro', |
|
14
|
|
|
|
|
|
|
isa => 'Str', |
|
15
|
|
|
|
|
|
|
predicate => '_has_from_file', |
|
16
|
|
|
|
|
|
|
documentation => q(Path to file with scenario), |
|
17
|
|
|
|
|
|
|
); |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
has from_string => ( |
|
20
|
|
|
|
|
|
|
is => 'ro', |
|
21
|
|
|
|
|
|
|
isa => 'Str', |
|
22
|
|
|
|
|
|
|
predicate => '_has_from_string', |
|
23
|
|
|
|
|
|
|
documentation => q(String with scenario), |
|
24
|
|
|
|
|
|
|
); |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
has scenario_string => ( |
|
27
|
|
|
|
|
|
|
is => 'ro', |
|
28
|
|
|
|
|
|
|
isa => 'Str', |
|
29
|
|
|
|
|
|
|
builder => '_build_scenario_string', |
|
30
|
|
|
|
|
|
|
lazy => 1, |
|
31
|
|
|
|
|
|
|
); |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
has block_items => ( |
|
34
|
|
|
|
|
|
|
is => 'ro', |
|
35
|
|
|
|
|
|
|
isa => 'ArrayRef[HashRef]', |
|
36
|
|
|
|
|
|
|
builder => 'parse_scenario_string', |
|
37
|
|
|
|
|
|
|
init_arg => undef, |
|
38
|
|
|
|
|
|
|
lazy => 1, |
|
39
|
|
|
|
|
|
|
); |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
has loaded_blocks => ( |
|
42
|
|
|
|
|
|
|
is => 'ro', |
|
43
|
|
|
|
|
|
|
isa => 'ArrayRef[Treex::Core::Block]', |
|
44
|
|
|
|
|
|
|
builder => '_build_loaded_blocks', |
|
45
|
|
|
|
|
|
|
predicate => 'is_initialized', |
|
46
|
|
|
|
|
|
|
lazy => 1, |
|
47
|
|
|
|
|
|
|
init_arg => undef, |
|
48
|
|
|
|
|
|
|
); |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
has document_reader => ( |
|
51
|
|
|
|
|
|
|
is => 'rw', |
|
52
|
|
|
|
|
|
|
does => 'Treex::Core::DocumentReader', |
|
53
|
|
|
|
|
|
|
predicate => '_has_document_reader', |
|
54
|
|
|
|
|
|
|
writer => '_set_document_reader', |
|
55
|
|
|
|
|
|
|
init_arg => undef, |
|
56
|
|
|
|
|
|
|
documentation => 'DocumentReader starts every scenario and reads a stream of documents.' |
|
57
|
|
|
|
|
|
|
); |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
has writers => ( |
|
60
|
|
|
|
|
|
|
is => 'rw', |
|
61
|
|
|
|
|
|
|
does => 'ArrayRef[Treex::Block::Write::BaseWriter]', |
|
62
|
|
|
|
|
|
|
default => sub { [] } |
|
63
|
|
|
|
|
|
|
); |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
has _global_params => ( |
|
66
|
|
|
|
|
|
|
is => 'ro', |
|
67
|
|
|
|
|
|
|
isa => 'HashRef[Str]', |
|
68
|
|
|
|
|
|
|
traits => ['Hash'], |
|
69
|
|
|
|
|
|
|
default => sub { {} }, |
|
70
|
|
|
|
|
|
|
handles => { |
|
71
|
|
|
|
|
|
|
get_global_param => 'get', |
|
72
|
|
|
|
|
|
|
set_global_param => 'set', |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
#get_global_param_names => 'keys', |
|
75
|
|
|
|
|
|
|
#set_verbose => [ set => 'verbose' ], |
|
76
|
|
|
|
|
|
|
#get_verbose => [ get => 'verbose' ], |
|
77
|
|
|
|
|
|
|
#set_language => [ set => 'language' ], |
|
78
|
|
|
|
|
|
|
#get_language => [ get => 'language' ], |
|
79
|
|
|
|
|
|
|
#... ? |
|
80
|
|
|
|
|
|
|
}, |
|
81
|
|
|
|
|
|
|
); |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
has parser => ( |
|
84
|
|
|
|
|
|
|
is => 'ro', |
|
85
|
|
|
|
|
|
|
isa => 'Parse::RecDescent::_Runtime', |
|
86
|
|
|
|
|
|
|
init_arg => undef, |
|
87
|
|
|
|
|
|
|
builder => '_build_parser', |
|
88
|
|
|
|
|
|
|
documentation => q{Parses treex scenarios} |
|
89
|
|
|
|
|
|
|
); |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
has runner => ( |
|
92
|
|
|
|
|
|
|
is => 'ro', |
|
93
|
|
|
|
|
|
|
isa => 'Treex::Core::Run', |
|
94
|
|
|
|
|
|
|
writer => '_set_runner', |
|
95
|
|
|
|
|
|
|
weak_ref => 1, |
|
96
|
|
|
|
|
|
|
documentation => 'Treex::Core::Run instance in which the scenario is running', |
|
97
|
|
|
|
|
|
|
); |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
has cache => ( |
|
100
|
|
|
|
|
|
|
is => 'rw', |
|
101
|
|
|
|
|
|
|
isa => 'Maybe[Cache::Memcached]', |
|
102
|
|
|
|
|
|
|
builder => '_build_cache', |
|
103
|
|
|
|
|
|
|
); |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
sub _build_scenario_string { |
|
106
|
15
|
|
|
15
|
|
319
|
my $self = shift; |
|
107
|
14
|
100
|
|
|
|
480
|
if ( $self->_has_from_file ) { |
|
|
|
50
|
|
|
|
|
|
|
108
|
10
|
|
|
|
|
289
|
return $self->_load_scenario_file( $self->from_file ); |
|
109
|
|
|
|
|
|
|
} |
|
110
|
|
|
|
|
|
|
elsif ( $self->_has_from_string ) { |
|
111
|
5
|
|
|
|
|
135
|
return $self->from_string; |
|
112
|
|
|
|
|
|
|
} |
|
113
|
1
|
|
|
|
|
3
|
log_fatal("You have to provide from_file or from_string attribute"); |
|
114
|
|
|
|
|
|
|
} |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
my %sequence = (); |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
sub _build_loaded_blocks { |
|
119
|
12
|
|
|
12
|
|
58
|
my $self = shift; |
|
120
|
12
|
|
|
|
|
34
|
my @block_items = @{ $self->block_items }; |
|
|
12
|
|
|
|
|
365
|
|
|
121
|
10
|
|
|
|
|
45
|
my $block_count = scalar @block_items; |
|
122
|
10
|
|
|
|
|
31
|
my $i = 0; |
|
123
|
10
|
|
|
|
|
25
|
my @loaded_blocks; |
|
124
|
|
|
|
|
|
|
|
|
125
|
10
|
|
|
|
|
39
|
my $sequence_from = 0; |
|
126
|
10
|
|
|
|
|
32
|
my $sequence_hash = ""; |
|
127
|
10
|
|
|
|
|
27
|
foreach my $block_item (@block_items) { |
|
128
|
21
|
|
|
|
|
63
|
$i++; |
|
129
|
21
|
|
|
|
|
55
|
my $params = ''; |
|
130
|
21
|
50
|
|
|
|
76
|
if ( $block_item->{block_parameters} ) { |
|
131
|
21
|
|
|
|
|
73
|
$params = join ' ', @{ $block_item->{block_parameters} }; |
|
|
21
|
|
|
|
|
83
|
|
|
132
|
|
|
|
|
|
|
} |
|
133
|
21
|
|
|
|
|
185
|
log_info("Loading block $block_item->{block_name} $params ($i/$block_count)"); |
|
134
|
21
|
|
|
|
|
129
|
my $new_block = $self->_load_block($block_item); |
|
135
|
|
|
|
|
|
|
|
|
136
|
19
|
100
|
|
|
|
123
|
if ( $new_block->does('Treex::Core::DocumentReader') ) { |
|
|
|
100
|
|
|
|
|
|
|
137
|
8
|
50
|
|
|
|
3852
|
log_fatal("Only one DocumentReader per scenario is permitted ($block_item->{block_name})") |
|
138
|
|
|
|
|
|
|
if $self->_has_document_reader; |
|
139
|
8
|
|
|
|
|
287
|
$self->_set_document_reader($new_block); |
|
140
|
|
|
|
|
|
|
} |
|
141
|
|
|
|
|
|
|
elsif ( $new_block->isa('Treex::Block::Write::BaseWriter') ) { |
|
142
|
1
|
|
|
|
|
552
|
push( @{ $self->writers }, $new_block ); |
|
|
1
|
|
|
|
|
31
|
|
|
143
|
1
|
|
|
|
|
4
|
push @loaded_blocks, $new_block; # duplicity |
|
144
|
|
|
|
|
|
|
} |
|
145
|
|
|
|
|
|
|
else { |
|
146
|
10
|
50
|
|
|
|
4328
|
if ( ref($new_block) eq "Treex::Core::CacheBlock" ) { |
|
|
|
50
|
|
|
|
|
|
|
147
|
0
|
|
|
|
|
0
|
$sequence{$sequence_from}{from} = $sequence_from; |
|
148
|
0
|
|
|
|
|
0
|
$sequence{$sequence_from}{to} = $i; |
|
149
|
0
|
|
|
|
|
0
|
$sequence{$sequence_from}{hash} = $sequence_hash; |
|
150
|
|
|
|
|
|
|
|
|
151
|
0
|
|
|
|
|
0
|
$sequence{$i}{_from} = $sequence_from; |
|
152
|
0
|
|
|
|
|
0
|
$sequence_from = $i; |
|
153
|
0
|
|
|
|
|
0
|
push( @{ $sequence{$sequence_from}{block} }, $new_block->get_hash() ); |
|
|
0
|
|
|
|
|
0
|
|
|
154
|
0
|
|
|
|
|
0
|
$sequence_hash = $new_block->get_hash(); |
|
155
|
|
|
|
|
|
|
} |
|
156
|
|
|
|
|
|
|
elsif ($self->cache) { |
|
157
|
0
|
|
|
|
|
0
|
$sequence_hash = md5_hex( $sequence_hash . $new_block->get_hash() ); |
|
158
|
0
|
0
|
|
|
|
0
|
if ( defined( $sequence{$sequence_from} ) ) { |
|
159
|
0
|
|
|
|
|
0
|
push( @{ $sequence{$sequence_from}{block} }, $new_block->get_hash() ); |
|
|
0
|
|
|
|
|
0
|
|
|
160
|
|
|
|
|
|
|
} |
|
161
|
|
|
|
|
|
|
} |
|
162
|
|
|
|
|
|
|
|
|
163
|
10
|
|
|
|
|
44
|
push @loaded_blocks, $new_block; |
|
164
|
|
|
|
|
|
|
} |
|
165
|
|
|
|
|
|
|
} |
|
166
|
|
|
|
|
|
|
|
|
167
|
8
|
|
|
|
|
49
|
log_info('ALL BLOCKS SUCCESSFULLY LOADED.'); |
|
168
|
8
|
|
|
|
|
297
|
return \@loaded_blocks; |
|
169
|
|
|
|
|
|
|
} |
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
sub _load_parser { |
|
172
|
14
|
|
|
15
|
|
35
|
my $self = shift; |
|
173
|
14
|
|
|
|
|
6500
|
require Treex::Core::ScenarioParser; |
|
174
|
14
|
|
|
|
|
131
|
return Treex::Core::ScenarioParser->new(); |
|
175
|
|
|
|
|
|
|
} |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
sub _my_dir { |
|
178
|
0
|
|
|
1
|
|
0
|
return dirname( (caller)[1] ); |
|
179
|
|
|
|
|
|
|
} |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub _build_parser { |
|
182
|
14
|
|
|
15
|
|
14712
|
my $self = shift; |
|
183
|
14
|
|
|
|
|
37
|
my $parser; |
|
184
|
14
|
50
|
|
|
|
39
|
eval { |
|
185
|
14
|
|
|
|
|
65
|
$parser = $self->_load_parser(); |
|
186
|
14
|
|
|
|
|
143
|
1; |
|
187
|
|
|
|
|
|
|
} and return $parser; |
|
188
|
0
|
|
|
|
|
0
|
log_info("Cannot find precompiled scenario parser, trying to build it from grammar"); |
|
189
|
0
|
|
|
|
|
0
|
my $dir = $self->_my_dir(); #get module's directory |
|
190
|
0
|
|
|
|
|
0
|
my $file = "$dir/ScenarioParser.rdg"; #find grammar file |
|
191
|
0
|
0
|
|
|
|
0
|
log_fatal("Cannot find grammar file") if !-e $file; |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
#in fact we should never reach this |
|
194
|
0
|
|
|
|
|
0
|
log_warn('We should NOT reach this place. Treex distribution may be corrupted.'); |
|
195
|
|
|
|
|
|
|
|
|
196
|
0
|
|
|
|
|
0
|
my $grammar = read_file($file); #load it |
|
197
|
|
|
|
|
|
|
eval { |
|
198
|
0
|
|
|
|
|
0
|
log_info("Trying to precompile it for you"); |
|
199
|
0
|
|
|
|
|
0
|
require Parse::RecDescent; |
|
200
|
0
|
|
|
|
|
0
|
local $CWD = $dir; |
|
201
|
0
|
|
|
|
|
0
|
Parse::RecDescent->Precompile( { -standalone => 1 }, $grammar, 'Treex::Core::ScenarioParser' ); |
|
202
|
0
|
|
|
|
|
0
|
$parser = $self->_load_parser(); |
|
203
|
0
|
|
|
|
|
0
|
1; |
|
204
|
0
|
0
|
0
|
|
|
0
|
} or eval { |
|
205
|
0
|
|
|
|
|
0
|
log_info("Cannot precompile, loading directly from grammar. Consider precompiling it manually"); |
|
206
|
0
|
|
|
|
|
0
|
require Parse::RecDescent; |
|
207
|
0
|
|
|
|
|
0
|
$parser = Parse::RecDescent->new($grammar); #create parser |
|
208
|
0
|
|
|
|
|
0
|
1; |
|
209
|
|
|
|
|
|
|
} or log_fatal("Cannot create Scenario parser"); |
|
210
|
0
|
|
|
|
|
0
|
return $parser; |
|
211
|
|
|
|
|
|
|
} |
|
212
|
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
sub _build_cache { |
|
214
|
14
|
|
|
15
|
|
30785
|
my $self = shift; |
|
215
|
|
|
|
|
|
|
|
|
216
|
14
|
50
|
33
|
|
|
433
|
if ( $self->runner && $self->runner->cache ) { |
|
217
|
|
|
|
|
|
|
|
|
218
|
0
|
|
|
|
|
0
|
require Treex::Core::CacheBlock; |
|
219
|
0
|
|
|
|
|
0
|
require Treex::Tool::Memcached::Memcached; |
|
220
|
|
|
|
|
|
|
|
|
221
|
0
|
|
|
|
|
0
|
return Treex::Tool::Memcached::Memcached::get_connection( |
|
222
|
|
|
|
|
|
|
"documents-cache" |
|
223
|
|
|
|
|
|
|
); |
|
224
|
|
|
|
|
|
|
} |
|
225
|
|
|
|
|
|
|
|
|
226
|
14
|
|
|
|
|
52
|
return; |
|
227
|
|
|
|
|
|
|
} |
|
228
|
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
sub _load_scenario_file { |
|
230
|
10
|
|
|
11
|
|
31
|
my ( $self, $scenario_filename ) = @_; |
|
231
|
10
|
|
|
|
|
70
|
log_info "Loading scenario description $scenario_filename"; |
|
232
|
10
|
50
|
|
|
|
60
|
my $scenario_string = read_file( $scenario_filename, binmode => ':utf8', err_mode => 'quiet' ) |
|
233
|
|
|
|
|
|
|
or log_fatal "Can't open scenario file $scenario_filename"; |
|
234
|
10
|
|
|
|
|
1947
|
return $scenario_string; |
|
235
|
|
|
|
|
|
|
} |
|
236
|
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
sub parse_scenario_string { |
|
238
|
14
|
|
|
14
|
1
|
36
|
my $self = shift; |
|
239
|
14
|
|
|
|
|
420
|
my $scenario_string = $self->scenario_string; |
|
240
|
14
|
|
|
|
|
417
|
my $from_file = $self->from_file; |
|
241
|
|
|
|
|
|
|
|
|
242
|
14
|
|
|
|
|
409
|
my $parsed = $self->parser->startrule( $scenario_string, 1, $from_file ); |
|
243
|
14
|
100
|
|
|
|
70
|
log_fatal("Cannot parse the scenario: $scenario_string") if !defined $parsed; |
|
244
|
12
|
|
|
|
|
610
|
return $parsed; |
|
245
|
|
|
|
|
|
|
} |
|
246
|
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
# reverse of parse_scenario_string, used in Treex::Core::Run for treex --dump |
|
248
|
|
|
|
|
|
|
sub construct_scenario_string { |
|
249
|
3
|
|
|
3
|
1
|
3676
|
my $self = shift; |
|
250
|
3
|
|
|
|
|
8
|
my %args = @_; |
|
251
|
3
|
|
|
|
|
8
|
my $multiline = $args{multiline}; |
|
252
|
3
|
|
|
|
|
6
|
my @block_items = @{ $self->block_items }; |
|
|
3
|
|
|
|
|
92
|
|
|
253
|
3
|
100
|
|
|
|
10
|
my $delim = $multiline ? qq{\n} : q{ }; |
|
254
|
3
|
|
|
|
|
6
|
my @block_strings; |
|
255
|
3
|
|
|
|
|
10
|
foreach my $block_item (@block_items) { |
|
256
|
7
|
|
|
|
|
15
|
my $name = $block_item->{block_name}; |
|
257
|
7
|
|
|
|
|
11
|
my @parameters = map { _add_quotes($_) } @{ $block_item->{block_parameters} }; |
|
|
2
|
|
|
|
|
9
|
|
|
|
7
|
|
|
|
|
19
|
|
|
258
|
7
|
100
|
|
|
|
37
|
$name =~ s{^Treex::Block::}{} or $name = "::$name"; #strip leading Treex::Block:: or add leading :: |
|
259
|
7
|
|
|
|
|
12
|
my $params; |
|
260
|
7
|
100
|
|
|
|
18
|
if ( scalar @parameters ) { |
|
261
|
2
|
|
|
|
|
9
|
$params = q{ } . join q{ }, @parameters; |
|
262
|
|
|
|
|
|
|
} |
|
263
|
|
|
|
|
|
|
else { |
|
264
|
5
|
|
|
|
|
9
|
$params = q{}; |
|
265
|
|
|
|
|
|
|
} |
|
266
|
7
|
|
|
|
|
21
|
push @block_strings, $name . $params; |
|
267
|
|
|
|
|
|
|
} |
|
268
|
3
|
|
|
|
|
49
|
return join $delim, @block_strings; |
|
269
|
|
|
|
|
|
|
} |
|
270
|
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
sub get_required_files { |
|
272
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
273
|
0
|
|
|
|
|
0
|
my @block_items = @{ $self->block_items }; |
|
|
0
|
|
|
|
|
0
|
|
|
274
|
0
|
|
|
|
|
0
|
my @required_files; |
|
275
|
0
|
|
|
|
|
0
|
foreach my $block_item (@block_items) { |
|
276
|
0
|
|
|
|
|
0
|
my $block = $self->_load_block($block_item); |
|
277
|
|
|
|
|
|
|
push @required_files, |
|
278
|
|
|
|
|
|
|
map { |
|
279
|
0
|
|
|
|
|
0
|
$block_item->{block_name} . "\t" . $_; |
|
|
0
|
|
|
|
|
0
|
|
|
280
|
|
|
|
|
|
|
} $block->get_required_share_files(); |
|
281
|
|
|
|
|
|
|
} |
|
282
|
0
|
|
|
|
|
0
|
return @required_files; |
|
283
|
|
|
|
|
|
|
} |
|
284
|
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
sub _add_quotes { # adding quotes only if param. value contains a space |
|
286
|
2
|
|
|
2
|
|
7
|
my ($block_parameter) = @_; |
|
287
|
2
|
|
|
|
|
14
|
my ( $name, $value ) = split /=/, $block_parameter, 2; |
|
288
|
2
|
50
|
|
|
|
12
|
if ( $value =~ /\s/ ) { |
|
289
|
0
|
|
|
|
|
0
|
my $res_string = "$name="; |
|
290
|
|
|
|
|
|
|
|
|
291
|
0
|
0
|
0
|
|
|
0
|
if ( $value =~ /'/ && $value !~ /"/ ) { |
|
292
|
0
|
|
|
|
|
0
|
$res_string .= '"' . $value . '"'; |
|
293
|
|
|
|
|
|
|
} else { |
|
294
|
0
|
|
|
|
|
0
|
$value =~ s/'/\\'/g; |
|
295
|
0
|
|
|
|
|
0
|
$res_string .= "'" . $value . "'"; |
|
296
|
|
|
|
|
|
|
} |
|
297
|
0
|
|
|
|
|
0
|
return $res_string; |
|
298
|
|
|
|
|
|
|
} |
|
299
|
2
|
|
|
|
|
9
|
return $block_parameter; |
|
300
|
|
|
|
|
|
|
} |
|
301
|
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
sub load_blocks { |
|
303
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
304
|
0
|
|
|
|
|
0
|
$self->loaded_blocks; #just access lazy attribute |
|
305
|
0
|
|
|
|
|
0
|
return; |
|
306
|
|
|
|
|
|
|
} |
|
307
|
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
sub init { |
|
309
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
310
|
0
|
|
|
|
|
0
|
$self->load_blocks(); |
|
311
|
0
|
|
|
|
|
0
|
return; |
|
312
|
|
|
|
|
|
|
} |
|
313
|
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
sub _load_block { |
|
315
|
20
|
|
|
20
|
|
61
|
my ( $self, $block_item ) = @_; |
|
316
|
20
|
|
|
|
|
54
|
my $block_name = $block_item->{block_name}; |
|
317
|
20
|
|
|
|
|
32
|
my $new_block; |
|
318
|
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
# Initialize with global (scenario) parameters |
|
320
|
20
|
|
|
|
|
36
|
my %params = ( %{ $self->_global_params }, scenario => $self ); |
|
|
20
|
|
|
|
|
745
|
|
|
321
|
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
# which can be overriden by (local) block parameters. |
|
323
|
20
|
|
|
|
|
46
|
foreach my $param ( @{ $block_item->{block_parameters} } ) { |
|
|
20
|
|
|
|
|
65
|
|
|
324
|
12
|
|
|
|
|
65
|
my ( $name, $value ) = split /=/, $param, 2; |
|
325
|
12
|
|
|
|
|
49
|
$params{$name} = $value; |
|
326
|
|
|
|
|
|
|
} |
|
327
|
|
|
|
|
|
|
|
|
328
|
20
|
100
|
|
2
|
|
2126
|
eval "use $block_name; 1;" or log_fatal "Can't use block $block_name !\n$@\n"; |
|
|
2
|
|
|
2
|
|
1387
|
|
|
|
2
|
|
|
2
|
|
12
|
|
|
|
2
|
|
|
|
|
50
|
|
|
|
2
|
|
|
|
|
649
|
|
|
|
2
|
|
|
|
|
9
|
|
|
|
2
|
|
|
|
|
61
|
|
|
|
2
|
|
|
|
|
20
|
|
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
53
|
|
|
329
|
19
|
50
|
|
|
|
101
|
eval { |
|
330
|
19
|
|
|
|
|
153
|
$new_block = $block_name->new( \%params ); |
|
331
|
19
|
|
|
|
|
237
|
1; |
|
332
|
|
|
|
|
|
|
} or log_fatal "Treex::Core::Scenario->new: error when initializing block $block_name\n\nEVAL ERROR:\t$@"; |
|
333
|
|
|
|
|
|
|
|
|
334
|
19
|
0
|
33
|
|
|
598
|
if ( $self->cache && $params{'use_cache'} ) { |
|
335
|
0
|
|
|
|
|
0
|
$new_block = Treex::Core::CacheBlock->new( { block => $new_block, cache => $self->cache } ); |
|
336
|
|
|
|
|
|
|
} |
|
337
|
|
|
|
|
|
|
|
|
338
|
19
|
|
|
|
|
81
|
return $new_block; |
|
339
|
|
|
|
|
|
|
} |
|
340
|
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
sub run { |
|
342
|
1
|
|
|
1
|
1
|
1864
|
my ($self) = @_; |
|
343
|
1
|
|
|
|
|
3
|
my $number_of_blocks = @{ $self->loaded_blocks }; |
|
|
1
|
|
|
|
|
40
|
|
|
344
|
1
|
50
|
|
|
|
42
|
log_fatal('No DocumentReader supplied') if !$self->_has_document_reader; |
|
345
|
1
|
|
|
|
|
32
|
my $reader = $self->document_reader; |
|
346
|
1
|
|
50
|
|
|
16
|
my $number_of_documents = $reader->number_of_documents_per_this_job() || '?'; |
|
347
|
1
|
|
|
|
|
3
|
my $document_number = 0; |
|
348
|
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
#if ( $self->cache ) { |
|
350
|
|
|
|
|
|
|
# $document_number = $self->_run_with_cache( $reader, $number_of_blocks, $number_of_documents ); |
|
351
|
|
|
|
|
|
|
#} |
|
352
|
|
|
|
|
|
|
#else { |
|
353
|
1
|
|
|
|
|
8
|
$document_number = $self->_run_without_cache( $reader, $number_of_blocks, $number_of_documents ); |
|
354
|
|
|
|
|
|
|
#} |
|
355
|
|
|
|
|
|
|
|
|
356
|
1
|
50
|
|
|
|
15
|
log_info "Processed $document_number document" |
|
357
|
|
|
|
|
|
|
. ( $document_number == 1 ? '' : 's' ); |
|
358
|
1
|
|
|
|
|
10
|
return 1; |
|
359
|
|
|
|
|
|
|
} |
|
360
|
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
sub _run_with_cache { |
|
362
|
|
|
|
|
|
|
|
|
363
|
0
|
|
|
0
|
|
0
|
my ( $self, $reader, $number_of_blocks, $number_of_documents ) = @_; |
|
364
|
0
|
|
|
|
|
0
|
my $document_number = 0; |
|
365
|
|
|
|
|
|
|
|
|
366
|
0
|
|
|
|
|
0
|
while ( my $document = $reader->next_document_for_this_job() ) { |
|
367
|
0
|
|
|
|
|
0
|
$document_number++; |
|
368
|
0
|
|
|
|
|
0
|
my $doc_name = $document->full_filename; |
|
369
|
0
|
|
|
|
|
0
|
my $doc_from = $document->loaded_from; |
|
370
|
0
|
|
|
|
|
0
|
log_info "Document $document_number/$number_of_documents $doc_name loaded from $doc_from"; |
|
371
|
0
|
|
|
|
|
0
|
my $block_number = 0; |
|
372
|
0
|
|
|
|
|
0
|
my $skip_to = 0; |
|
373
|
0
|
|
|
|
|
0
|
my $process = 0; |
|
374
|
0
|
|
|
|
|
0
|
my $skip_from = 0; |
|
375
|
0
|
|
|
|
|
0
|
my $from_hash = ""; |
|
376
|
0
|
|
|
|
|
0
|
my $document_last_hash = ""; |
|
377
|
0
|
|
|
|
|
0
|
foreach my $block ( @{ $self->loaded_blocks } ) { |
|
|
0
|
|
|
|
|
0
|
|
|
378
|
0
|
|
|
|
|
0
|
$block_number++; |
|
379
|
0
|
|
|
|
|
0
|
$process = 1; |
|
380
|
0
|
0
|
|
|
|
0
|
if ( $block_number < $skip_to ) { |
|
|
|
0
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
# we know that there are identical, so we can skip them |
|
383
|
0
|
|
|
|
|
0
|
log_info "Skipping block $block_number/$number_of_blocks " . ref($block); |
|
384
|
0
|
|
|
|
|
0
|
$process = 0; |
|
385
|
|
|
|
|
|
|
} |
|
386
|
|
|
|
|
|
|
elsif ( $block_number == $skip_to ) { |
|
387
|
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
# this is border Cache block -> we have to check whether next sequence is also same |
|
389
|
0
|
|
|
|
|
0
|
$skip_from = $block_number + 1; |
|
390
|
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
# following sequence is same => we can continue with skipping |
|
392
|
0
|
0
|
0
|
|
|
0
|
if ($sequence{$skip_from}{'to'} |
|
393
|
|
|
|
|
|
|
&& |
|
394
|
|
|
|
|
|
|
$self->_is_known_sequence( $sequence{$skip_from}{'hash'}, $document->get_hash() ) |
|
395
|
|
|
|
|
|
|
) |
|
396
|
|
|
|
|
|
|
{ |
|
397
|
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
#log_warn("\tskip from " . $sequence{$skip_from}{from} . ' to ' . $sequence{$skip_from}{to}); |
|
399
|
0
|
|
|
|
|
0
|
$skip_to = $sequence{$skip_from}{to} - 1; |
|
400
|
0
|
|
|
|
|
0
|
$from_hash = $document->get_hash(); |
|
401
|
0
|
|
|
|
|
0
|
$process = 0; |
|
402
|
|
|
|
|
|
|
} |
|
403
|
|
|
|
|
|
|
else { |
|
404
|
0
|
|
|
|
|
0
|
$document_last_hash = $document->get_hash(); |
|
405
|
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
#$document->set_hash(md5_hex($document->get_hash() . $block->get_hash())); |
|
407
|
0
|
|
|
|
|
0
|
my $full_hash = $document->get_hash(); |
|
408
|
0
|
|
|
|
|
0
|
$document = $self->cache->get($full_hash); |
|
409
|
|
|
|
|
|
|
|
|
410
|
0
|
0
|
|
|
|
0
|
if ( !$document ) { |
|
411
|
0
|
|
|
|
|
0
|
log_fatal("Document - $full_hash is missing!!!"); |
|
412
|
|
|
|
|
|
|
} |
|
413
|
0
|
|
|
|
|
0
|
$process = 2; |
|
414
|
|
|
|
|
|
|
} |
|
415
|
|
|
|
|
|
|
} |
|
416
|
|
|
|
|
|
|
|
|
417
|
0
|
0
|
|
|
|
0
|
if ( $process == 1 ) { |
|
418
|
0
|
|
|
|
|
0
|
log_info "Applying block $block_number/$number_of_blocks " . ref($block); |
|
419
|
|
|
|
|
|
|
|
|
420
|
0
|
0
|
|
|
|
0
|
$block->process_start if !$block->is_started; |
|
421
|
0
|
|
|
|
|
0
|
$block->_set_is_started(1); |
|
422
|
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
#log_info("Document-hash: " . $document->get_hash()); |
|
424
|
0
|
|
|
|
|
0
|
$skip_from = $block_number + 1; |
|
425
|
0
|
|
|
|
|
0
|
my $status = $block->process_document($document); |
|
426
|
0
|
0
|
0
|
|
|
0
|
if (defined($status) |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
427
|
|
|
|
|
|
|
&& |
|
428
|
|
|
|
|
|
|
$status == $Treex::Core::Block::DOCUMENT_FROM_CACHE && |
|
429
|
|
|
|
|
|
|
$sequence{$skip_from}{'to'} && |
|
430
|
|
|
|
|
|
|
$self->_is_known_sequence( $sequence{$skip_from}{'hash'}, $document->get_hash() ) |
|
431
|
|
|
|
|
|
|
) |
|
432
|
|
|
|
|
|
|
{ |
|
433
|
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
#log_warn("\tskip from " . $sequence{$skip_from}{from} . ' to ' . $sequence{$skip_from}{to}); |
|
435
|
0
|
|
|
|
|
0
|
$skip_to = $sequence{$skip_from}{to} - 1; |
|
436
|
0
|
|
|
|
|
0
|
$skip_from = $block_number + 1; |
|
437
|
0
|
|
|
|
|
0
|
$from_hash = $document->get_hash(); |
|
438
|
|
|
|
|
|
|
} |
|
439
|
|
|
|
|
|
|
} |
|
440
|
|
|
|
|
|
|
|
|
441
|
0
|
|
|
|
|
0
|
$document_last_hash = $document->get_hash(); |
|
442
|
0
|
|
|
|
|
0
|
$document->set_hash( md5_hex( $document->get_hash() . $block->get_hash() ) ); |
|
443
|
|
|
|
|
|
|
|
|
444
|
0
|
0
|
|
|
|
0
|
if ( ref($block) eq "Treex::Core::CacheBlock" ) { |
|
445
|
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
# cache block => mark this path as known |
|
447
|
0
|
|
|
|
|
0
|
my $id = $block_number + 1; |
|
448
|
0
|
|
|
|
|
0
|
my $from = $sequence{$id}{'_from'}; |
|
449
|
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
# the first sequence has no document |
|
451
|
0
|
0
|
|
|
|
0
|
if ( defined( $sequence{$from}{'document'} ) ) { |
|
452
|
0
|
|
|
|
|
0
|
$self->_set_known_sequence( $sequence{$from}{'hash'}, $sequence{$from}{'document'} ); |
|
453
|
|
|
|
|
|
|
} |
|
454
|
|
|
|
|
|
|
|
|
455
|
0
|
|
|
|
|
0
|
$sequence{$id}{'document'} = $document_last_hash; |
|
456
|
|
|
|
|
|
|
} |
|
457
|
|
|
|
|
|
|
} |
|
458
|
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
# this actually marks the document as successfully done in parallel processing (if this line |
|
460
|
|
|
|
|
|
|
# does not appear in the output, the parallel process will fail -- it must appear at any errorlevel, |
|
461
|
|
|
|
|
|
|
# therefore not using log_info or similiar) |
|
462
|
0
|
0
|
|
|
|
0
|
if ( $self->document_reader->jobindex ) { |
|
463
|
0
|
|
|
|
|
0
|
print STDERR "Document $document_number/$number_of_documents $doc_name: [success].\n"; |
|
464
|
|
|
|
|
|
|
} |
|
465
|
|
|
|
|
|
|
} |
|
466
|
|
|
|
|
|
|
|
|
467
|
0
|
|
|
|
|
0
|
log_info "Applying process_end"; |
|
468
|
|
|
|
|
|
|
|
|
469
|
0
|
|
|
|
|
0
|
foreach my $block ( @{ $self->loaded_blocks } ) { |
|
|
0
|
|
|
|
|
0
|
|
|
470
|
0
|
0
|
|
|
|
0
|
$block->process_end() if ( $block->is_started ); |
|
471
|
|
|
|
|
|
|
} |
|
472
|
|
|
|
|
|
|
|
|
473
|
0
|
|
|
|
|
0
|
return $document_number; |
|
474
|
|
|
|
|
|
|
} |
|
475
|
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
sub _is_known_sequence { |
|
477
|
0
|
|
|
0
|
|
0
|
my ( $self, $sequence_hash, $document_hash ) = @_; |
|
478
|
0
|
|
|
|
|
0
|
my $hash = md5_hex( $sequence_hash, $document_hash ); |
|
479
|
0
|
|
|
|
|
0
|
return $self->cache->get($hash); |
|
480
|
|
|
|
|
|
|
} |
|
481
|
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
sub _set_known_sequence { |
|
483
|
0
|
|
|
0
|
|
0
|
my ( $self, $sequence_hash, $document_hash ) = @_; |
|
484
|
0
|
|
|
|
|
0
|
my $hash = md5_hex( $sequence_hash, $document_hash ); |
|
485
|
0
|
|
|
|
|
0
|
$self->cache->set( $hash, 1 ); |
|
486
|
|
|
|
|
|
|
|
|
487
|
0
|
|
|
|
|
0
|
return; |
|
488
|
|
|
|
|
|
|
} |
|
489
|
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
sub _run_without_cache { |
|
491
|
|
|
|
|
|
|
|
|
492
|
1
|
|
|
1
|
|
4
|
my ( $self, $reader, $number_of_blocks, $number_of_documents ) = @_; |
|
493
|
1
|
|
|
|
|
2
|
my $document_number = 0; |
|
494
|
|
|
|
|
|
|
|
|
495
|
1
|
|
|
|
|
6
|
$self->start(); |
|
496
|
|
|
|
|
|
|
|
|
497
|
1
|
|
|
|
|
16
|
while ( my $document = $reader->next_document_for_this_job() ) { |
|
498
|
1
|
|
|
|
|
3
|
$document_number++; |
|
499
|
1
|
|
|
|
|
14
|
my $doc_name = $document->full_filename; |
|
500
|
1
|
|
|
|
|
30
|
my $doc_from = $document->loaded_from; |
|
501
|
1
|
|
|
|
|
10
|
log_info "Document $document_number/$number_of_documents $doc_name loaded from $doc_from"; |
|
502
|
1
|
|
|
|
|
3
|
my $block_number = 0; |
|
503
|
1
|
|
|
|
|
5
|
foreach my $block ( @{ $self->loaded_blocks } ) { |
|
|
1
|
|
|
|
|
32
|
|
|
504
|
2
|
|
|
|
|
6
|
$block_number++; |
|
505
|
2
|
|
|
|
|
11
|
log_info "Applying block $block_number/$number_of_blocks " . ref($block); |
|
506
|
2
|
|
|
|
|
32
|
$block->process_document($document); |
|
507
|
|
|
|
|
|
|
} |
|
508
|
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
# this actually marks the document as successfully done in parallel processing (if this line |
|
510
|
|
|
|
|
|
|
# does not appear in the output, the parallel process will fail -- it must appear at any errorlevel, |
|
511
|
|
|
|
|
|
|
# therefore not using log_info or similiar) |
|
512
|
1
|
50
|
|
|
|
33
|
if ( $self->document_reader->jobindex ) { |
|
513
|
0
|
|
|
|
|
0
|
print STDERR "Document $document_number/$number_of_documents $doc_name: [success].\n"; |
|
514
|
|
|
|
|
|
|
} |
|
515
|
|
|
|
|
|
|
} |
|
516
|
|
|
|
|
|
|
|
|
517
|
1
|
|
|
|
|
9
|
$self->end(); |
|
518
|
|
|
|
|
|
|
|
|
519
|
1
|
50
|
|
|
|
10
|
log_info "Processed $document_number document" |
|
520
|
|
|
|
|
|
|
. ( $document_number == 1 ? '' : 's' ); |
|
521
|
|
|
|
|
|
|
|
|
522
|
1
|
|
|
|
|
3
|
return $document_number; |
|
523
|
|
|
|
|
|
|
} |
|
524
|
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
# Apply process_start to all blocks for which this has not yet been applied |
|
526
|
|
|
|
|
|
|
sub start { |
|
527
|
1
|
|
|
1
|
1
|
4
|
my ($self) = @_; |
|
528
|
|
|
|
|
|
|
|
|
529
|
1
|
|
|
|
|
16
|
log_info "Applying process_start"; |
|
530
|
1
|
|
|
|
|
4
|
foreach my $block ( @{ $self->loaded_blocks } ) { |
|
|
1
|
|
|
|
|
35
|
|
|
531
|
2
|
50
|
|
|
|
91
|
$block->process_start() if !$block->is_started; |
|
532
|
2
|
|
|
|
|
73
|
$block->_set_is_started(1); |
|
533
|
|
|
|
|
|
|
} |
|
534
|
|
|
|
|
|
|
|
|
535
|
1
|
|
|
|
|
10
|
return; |
|
536
|
|
|
|
|
|
|
} |
|
537
|
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
# Apply the scenario to documents given in parameter |
|
539
|
|
|
|
|
|
|
sub apply_to_documents { |
|
540
|
|
|
|
|
|
|
|
|
541
|
0
|
|
|
0
|
1
|
0
|
my ( $self, @documents ) = @_; |
|
542
|
|
|
|
|
|
|
|
|
543
|
0
|
|
|
|
|
0
|
my $number_of_blocks = @{ $self->loaded_blocks }; |
|
|
0
|
|
|
|
|
0
|
|
|
544
|
0
|
|
|
|
|
0
|
my $block_number = 0; |
|
545
|
|
|
|
|
|
|
|
|
546
|
0
|
|
|
|
|
0
|
foreach my $document (@documents){ |
|
547
|
0
|
|
|
|
|
0
|
log_info "Processing document" . $document->full_filename; |
|
548
|
|
|
|
|
|
|
|
|
549
|
0
|
|
|
|
|
0
|
foreach my $block ( @{ $self->loaded_blocks } ) { |
|
|
0
|
|
|
|
|
0
|
|
|
550
|
0
|
|
|
|
|
0
|
$block_number++; |
|
551
|
0
|
|
|
|
|
0
|
log_info "Applying block $block_number/$number_of_blocks " . ref($block); |
|
552
|
0
|
|
|
|
|
0
|
$block->process_document($document); |
|
553
|
|
|
|
|
|
|
} |
|
554
|
|
|
|
|
|
|
} |
|
555
|
|
|
|
|
|
|
|
|
556
|
0
|
|
|
|
|
0
|
return; |
|
557
|
|
|
|
|
|
|
} |
|
558
|
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
# Apply process_end to all blocks for which this has not yet been applied |
|
560
|
|
|
|
|
|
|
sub end { |
|
561
|
1
|
|
|
1
|
1
|
4
|
my ($self) = @_; |
|
562
|
|
|
|
|
|
|
|
|
563
|
1
|
|
|
|
|
5
|
log_info "Applying process_end"; |
|
564
|
1
|
|
|
|
|
2
|
foreach my $block ( @{ $self->loaded_blocks } ) { |
|
|
1
|
|
|
|
|
39
|
|
|
565
|
2
|
50
|
|
|
|
64
|
$block->process_end() if ( $block->is_started ); |
|
566
|
|
|
|
|
|
|
} |
|
567
|
|
|
|
|
|
|
|
|
568
|
1
|
|
|
|
|
4
|
return; |
|
569
|
|
|
|
|
|
|
} |
|
570
|
|
|
|
|
|
|
|
|
571
|
12
|
|
|
12
|
|
5717
|
use Module::Reload; |
|
|
12
|
|
|
|
|
5755
|
|
|
|
12
|
|
|
|
|
1452
|
|
|
572
|
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
sub restart { |
|
574
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
|
575
|
0
|
|
|
|
|
0
|
my $changed_modules = Module::Reload->check; |
|
576
|
0
|
|
|
|
|
0
|
log_info "Number of reloaded modules = $changed_modules"; |
|
577
|
0
|
|
|
|
|
0
|
log_info "reseting the document reader\n"; |
|
578
|
0
|
|
|
|
|
0
|
$self->document_reader->restart(); |
|
579
|
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
# TODO rebuild the reloaded blocks |
|
581
|
0
|
|
|
|
|
0
|
return; |
|
582
|
|
|
|
|
|
|
} |
|
583
|
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
1; |
|
585
|
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
__END__ |
|
587
|
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
=for Pod::Coverage BUILD |
|
589
|
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
=encoding utf-8 |
|
591
|
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
=head1 NAME |
|
593
|
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
Treex::Core::Scenario - a larger Treex processing unit, composed of blocks |
|
595
|
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
=head1 VERSION |
|
597
|
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
version 2.20210102 |
|
599
|
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
601
|
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
use Treex::Core; |
|
603
|
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
my $doc1, $doc2; |
|
605
|
|
|
|
|
|
|
my $scenario = Treex::Core::Scenario->new(from_file => 'myscenario.scen' ); |
|
606
|
|
|
|
|
|
|
$scenario->run; |
|
607
|
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
$scenario = Treex::Core::Scenario->new(from_string => 'W2A::EN::Segment language=en'); |
|
610
|
|
|
|
|
|
|
$scenario->start(); |
|
611
|
|
|
|
|
|
|
$scenario->apply_to_documents($doc1, $doc2); |
|
612
|
|
|
|
|
|
|
$scenario->end(); |
|
613
|
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
616
|
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
A Treex scenario consists of a sequence of (possibly parametrized) Treex blocks. |
|
619
|
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
Scenarios can be described by a simple textual format, which is either passed |
|
621
|
|
|
|
|
|
|
directly to the scenario construction, or is contained in a text file whose |
|
622
|
|
|
|
|
|
|
name is passed. |
|
623
|
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
The string description of scenarios looks as follows. |
|
625
|
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
1) It contains a list of block names from which their 'C<Treex::Block::>' |
|
627
|
|
|
|
|
|
|
prefixes were removed. |
|
628
|
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
2) The block names are separated by one or more white spaces. |
|
630
|
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
3) The block names are listed in the same order in which they should be |
|
632
|
|
|
|
|
|
|
applied on data. |
|
633
|
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
4) For each block, there can be one or more parameters specified, using the |
|
635
|
|
|
|
|
|
|
C<attribute=value> form. |
|
636
|
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
5) Comments start with 'C<#>' and end with the nearest newline character. |
|
638
|
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
Scenario example: |
|
641
|
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
# morphological analysis of an English text |
|
643
|
|
|
|
|
|
|
Util::SetGlobal language=en selector=src |
|
644
|
|
|
|
|
|
|
Read::Text |
|
645
|
|
|
|
|
|
|
W2A::ResegmentSentences |
|
646
|
|
|
|
|
|
|
W2A::EN::Tokenize |
|
647
|
|
|
|
|
|
|
W2A::EN::NormalizeForms |
|
648
|
|
|
|
|
|
|
W2A::EN::FixTokenization |
|
649
|
|
|
|
|
|
|
W2A::EN::TagMorce |
|
650
|
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
=head1 METHODS |
|
653
|
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
=head2 Constructor |
|
655
|
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
=over 4 |
|
657
|
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
=item my $scenario = Treex::Core::Scenario->new(from_string => 'W2A::Tokenize language=en W2A::Lemmatize' ); |
|
659
|
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
Constructor parameter C<from_string> specifies the names of blocks which are |
|
661
|
|
|
|
|
|
|
to be executed (in the specified order) when the scenario is applied on a |
|
662
|
|
|
|
|
|
|
L<Treex::Core::Document> object. |
|
663
|
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
=item my $scenario = Treex::Core::Scenario->new(from_file => 'myscenario.scen' ); |
|
665
|
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
The scenario description is loaded from the file. |
|
667
|
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
=back |
|
669
|
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
=head2 Running the scenario |
|
672
|
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
=over 4 |
|
674
|
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
=item $scenario->run(); |
|
676
|
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
Run the scenario. |
|
678
|
|
|
|
|
|
|
One of the blocks (usually the first one) must be the document reader (see |
|
679
|
|
|
|
|
|
|
L<Treex::Core::DocumentReader>) that produces the |
|
680
|
|
|
|
|
|
|
documents on which this scenario is applied. |
|
681
|
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
=item $scenario->apply_to_documents($treex_doc); |
|
683
|
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
Apply this scenario to a L<Treex::Core::Document> instance obtained from elsewhere. |
|
685
|
|
|
|
|
|
|
Please note that C<start()> must be called before the first call to this method and C<end()> |
|
686
|
|
|
|
|
|
|
after the last call to this method. |
|
687
|
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
The scenario does not need to contain a document reader if documents are given |
|
689
|
|
|
|
|
|
|
explicitly. |
|
690
|
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
=item $scenario->start(); |
|
692
|
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
Apply C<process_start()> to all blocks in the scenario. |
|
694
|
|
|
|
|
|
|
This is called automatically by C<run()>, but must be called before C<apply_to_documents()>. |
|
695
|
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
=item $scenario->end(); |
|
697
|
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
Apply C<process_end()> to all blocks in the scenario. |
|
699
|
|
|
|
|
|
|
This is called automatically by C<run()>, but must be called after calls to C<apply_to_documents()>. |
|
700
|
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
=back |
|
703
|
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
=head2 Internal methods for loading scenarios |
|
705
|
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
=over 4 |
|
707
|
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
=item _load_scenario_file($filename) |
|
709
|
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
loads a scenario description from a file |
|
711
|
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
=item parse_scenario_string |
|
713
|
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
parses a textual description of a scenario |
|
715
|
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
=item construct_scenario_string |
|
717
|
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
constructs a scenario textual description from an existing scenario instance |
|
719
|
|
|
|
|
|
|
accepts named parameter multiline - when set, blocks are separated by newline instead of space |
|
720
|
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
=item load_blocks |
|
722
|
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
use blocks and call their constructors |
|
724
|
|
|
|
|
|
|
can be used for preloading blocks for e.g. server applications |
|
725
|
|
|
|
|
|
|
when running scenario blocks are loaded automatically |
|
726
|
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
=item init |
|
728
|
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
do all initialization so after this method scenario is ready to run |
|
730
|
|
|
|
|
|
|
currently just load blocks |
|
731
|
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
=item restart |
|
733
|
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
resets the document reader, in future it will rebuild reloaded blocks |
|
735
|
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
=back |
|
737
|
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
740
|
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
L<Treex::Core::Block> |
|
742
|
|
|
|
|
|
|
L<Treex::Core> |
|
743
|
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
=head1 AUTHORS |
|
745
|
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
Zdeněk Žabokrtský <zabokrtsky@ufal.mff.cuni.cz> |
|
747
|
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
Martin Popel <popel@ufal.mff.cuni.cz> |
|
749
|
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
David Mareček <marecek@ufal.mff.cuni.cz> |
|
751
|
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
Tomáš Kraut <kraut@ufal.mff.cuni.cz> |
|
753
|
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
Martin Majliš <majlis@ufal.mff.cuni.cz> |
|
755
|
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
Ondřej Dušek <odusek@ufal.mff.cuni.cz> |
|
757
|
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
759
|
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
Copyright © 2011-2012 by Institute of Formal and Applied Linguistics, Charles University in Prague |
|
761
|
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. |