File Coverage

blib/lib/Treex/Core/Scenario.pm

Criterion	Covered	Total	%
statement	132	333	39.6
branch	23	80	28.7
condition	2	26	7.6
subroutine	32	45	71.1
pod	9	10	90.0
total	198	494	40.0

line	stmt	bran	cond	sub	pod	time	code
1							package Treex::Core::Scenario;
2							$Treex::Core::Scenario::VERSION = '2.20150928';
3	3			3		148856	use Moose;
	3					1285828
	3					21
4	3			3		22224	use Treex::Core::Common;
	3					12
	3					16
5	3			3		18027	use File::Basename;
	3					8
	3					221
6	3			3		16	use File::Slurp;
	3					7
	3					201
7	3			3		2727	use File::chdir;
	3					5017
	3					359
8	3			3		19	use Digest::MD5 qw(md5_hex);
	3					7
	3					11725
9
10							#use Parse::RecDescent 1.967003; now using standalone version
11
12							has from_file => (
13							is => 'ro',
14							isa => 'Str',
15							predicate => '_has_from_file',
16							documentation => q(Path to file with scenario),
17							);
18
19							has from_string => (
20							is => 'ro',
21							isa => 'Str',
22							predicate => '_has_from_string',
23							documentation => q(String with scenario),
24							);
25
26							has scenario_string => (
27							is => 'ro',
28							isa => 'Str',
29							builder => '_build_scenario_string',
30							lazy => 1,
31							);
32
33							has block_items => (
34							is => 'ro',
35							isa => 'ArrayRef[HashRef]',
36							builder => 'parse_scenario_string',
37							init_arg => undef,
38							lazy => 1,
39							);
40
41							has loaded_blocks => (
42							is => 'ro',
43							isa => 'ArrayRef[Treex::Core::Block]',
44							builder => '_build_loaded_blocks',
45							predicate => 'is_initialized',
46							lazy => 1,
47							init_arg => undef,
48							);
49
50							has document_reader => (
51							is => 'rw',
52							does => 'Treex::Core::DocumentReader',
53							predicate => '_has_document_reader',
54							writer => '_set_document_reader',
55							init_arg => undef,
56							documentation => 'DocumentReader starts every scenario and reads a stream of documents.'
57							);
58
59							has writers => (
60							is => 'rw',
61							does => 'ArrayRef[Treex::Block::Write::BaseWriter]',
62							default => sub { [] }
63							);
64
65							has _global_params => (
66							is => 'ro',
67							isa => 'HashRef[Str]',
68							traits => ['Hash'],
69							default => sub { {} },
70							handles => {
71							get_global_param => 'get',
72							set_global_param => 'set',
73
74							#get_global_param_names => 'keys',
75							#set_verbose => [ set => 'verbose' ],
76							#get_verbose => [ get => 'verbose' ],
77							#set_language => [ set => 'language' ],
78							#get_language => [ get => 'language' ],
79							#... ?
80							},
81							);
82
83							has parser => (
84							is => 'ro',
85							isa => 'Parse::RecDescent::_Runtime',
86							init_arg => undef,
87							builder => '_build_parser',
88							documentation => q{Parses treex scenarios}
89							);
90
91							has runner => (
92							is => 'ro',
93							isa => 'Treex::Core::Run',
94							writer => '_set_runner',
95							weak_ref => 1,
96							documentation => 'Treex::Core::Run instance in which the scenario is running',
97							);
98
99							has cache => (
100							is => 'rw',
101							isa => 'Maybe[Cache::Memcached]',
102							builder => '_build_cache',
103							);
104
105							sub _build_scenario_string {
106	13			13		26	my $self = shift;
107	13	100				550	if ( $self->_has_from_file ) {
		50
108	10					362	return $self->_load_scenario_file( $self->from_file );
109							}
110							elsif ( $self->_has_from_string ) {
111	3					102	return $self->from_string;
112							}
113	0					0	log_fatal("You have to provide from_file or from_string attribute");
114							}
115
116							my %sequence = ();
117
118							sub _build_loaded_blocks {
119	10			10		25	my $self = shift;
120	10					22	my @block_items = @{ $self->block_items };
	10					369
121	8					16	my $block_count = scalar @block_items;
122	8					10	my $i = 0;
123	8					12	my @loaded_blocks;
124
125	8					13	my $sequence_from = 0;
126	8					18	my $sequence_hash = "";
127	8					20	foreach my $block_item (@block_items) {
128	15					29	$i++;
129	15					26	my $params = '';
130	15	50				111	if ( $block_item->{block_parameters} ) {
131	15					21	$params = join ' ', @{ $block_item->{block_parameters} };
	15					60
132							}
133	15					139	log_info("Loading block $block_item->{block_name} $params ($i/$block_count)");
134	15					68	my $new_block = $self->_load_block($block_item);
135
136	7	50				45	if ( $new_block->does('Treex::Core::DocumentReader') ) {
		50
137	0	0				0	log_fatal("Only one DocumentReader per scenario is permitted ($block_item->{block_name})")
138							if $self->_has_document_reader;
139	0					0	$self->_set_document_reader($new_block);
140							}
141							elsif ( $new_block->isa('Treex::Block::Write::BaseWriter') ) {
142	0					0	push( @{ $self->writers }, $new_block );
	0					0
143	0					0	push @loaded_blocks, $new_block; # duplicity
144							}
145							else {
146	7	50				2653	if ( ref($new_block) eq "Treex::Core::CacheBlock" ) {
		50
147	0					0	$sequence{$sequence_from}{from} = $sequence_from;
148	0					0	$sequence{$sequence_from}{to} = $i;
149	0					0	$sequence{$sequence_from}{hash} = $sequence_hash;
150
151	0					0	$sequence{$i}{_from} = $sequence_from;
152	0					0	$sequence_from = $i;
153	0					0	push( @{ $sequence{$sequence_from}{block} }, $new_block->get_hash() );
	0					0
154	0					0	$sequence_hash = $new_block->get_hash();
155							}
156							elsif ($self->cache) {
157	0					0	$sequence_hash = md5_hex( $sequence_hash . $new_block->get_hash() );
158	0	0				0	if ( defined( $sequence{$sequence_from} ) ) {
159	0					0	push( @{ $sequence{$sequence_from}{block} }, $new_block->get_hash() );
	0					0
160							}
161							}
162
163	7					27	push @loaded_blocks, $new_block;
164							}
165							}
166
167	0					0	log_info('ALL BLOCKS SUCCESSFULLY LOADED.');
168	0					0	return \@loaded_blocks;
169							}
170
171							sub _load_parser {
172	13			13		32	my $self = shift;
173	13					6194	require Treex::Core::ScenarioParser;
174	13					133	return Treex::Core::ScenarioParser->new();
175							}
176
177							sub _my_dir {
178	0			0		0	return dirname( (caller)[1] );
179							}
180
181							sub _build_parser {
182	13			13		26055	my $self = shift;
183	13					29	my $parser;
184	13	50				38	eval {
185	13					68	$parser = $self->_load_parser();
186	13					245	1;
187							} and return $parser;
188	0					0	log_info("Cannot find precompiled scenario parser, trying to build it from grammar");
189	0					0	my $dir = $self->_my_dir(); #get module's directory
190	0					0	my $file = "$dir/ScenarioParser.rdg"; #find grammar file
191	0	0				0	log_fatal("Cannot find grammar file") if !-e $file;
192
193							#in fact we should never reach this
194	0					0	log_warn('We should NOT reach this place. Treex distribution may be corrupted.');
195
196	0					0	my $grammar = read_file($file); #load it
197							eval {
198	0					0	log_info("Trying to precompile it for you");
199	0					0	require Parse::RecDescent;
200	0					0	local $CWD = $dir;
201	0					0	Parse::RecDescent->Precompile( { -standalone => 1 }, $grammar, 'Treex::Core::ScenarioParser' );
202	0					0	$parser = $self->_load_parser();
203	0					0	1;
204	0	0	0			0	} or eval {
205	0					0	log_info("Cannot precompile, loading directly from grammar. Consider precompiling it manually");
206	0					0	require Parse::RecDescent;
207	0					0	$parser = Parse::RecDescent->new($grammar); #create parser
208	0					0	1;
209							} or log_fatal("Cannot create Scenario parser");
210	0					0	return $parser;
211							}
212
213							sub _build_cache {
214	13			13		20761	my $self = shift;
215
216	13	50	33			515	if ( $self->runner && $self->runner->cache ) {
217
218	0					0	require Treex::Core::CacheBlock;
219	0					0	require Treex::Tool::Memcached::Memcached;
220
221	0					0	return Treex::Tool::Memcached::Memcached::get_connection(
222							"documents-cache"
223							);
224							}
225
226	13					43	return;
227							}
228
229							sub _load_scenario_file {
230	10			10		27	my ( $self, $scenario_filename ) = @_;
231	10					93	log_info "Loading scenario description $scenario_filename";
232	10	50				65	my $scenario_string = read_file( $scenario_filename, binmode => ':utf8', err_mode => 'quiet' )
233							or log_fatal "Can't open scenario file $scenario_filename";
234	10					1993	return $scenario_string;
235							}
236
237							sub parse_scenario_string {
238	13			13	1	25	my $self = shift;
239	13					503	my $scenario_string = $self->scenario_string;
240	13					465	my $from_file = $self->from_file;
241
242	13					477	my $parsed = $self->parser->startrule( $scenario_string, 1, $from_file );
243	13	100				65	log_fatal("Cannot parse the scenario: $scenario_string") if !defined $parsed;
244	11					720	return $parsed;
245							}
246
247							# reverse of parse_scenario_string, used in Treex::Core::Run for treex --dump
248							sub construct_scenario_string {
249	3			3	1	2860	my $self = shift;
250	3					8	my %args = @_;
251	3					9	my $multiline = $args{multiline};
252	3					5	my @block_items = @{ $self->block_items };
	3					104
253	3	100				11	my $delim = $multiline ? qq{\n} : q{ };
254	3					5	my @block_strings;
255	3					8	foreach my $block_item (@block_items) {
256	7					15	my $name = $block_item->{block_name};
257	7					9	my @parameters = map { _add_quotes($_) } @{ $block_item->{block_parameters} };
	2					10
	7					17
258	7	100				33	$name =~ s{^Treex::Block::}{} or $name = "::$name"; #strip leading Treex::Block:: or add leading ::
259	7					11	my $params;
260	7	100				16	if ( scalar @parameters ) {
261	2					8	$params = q{ } . join q{ }, @parameters;
262							}
263							else {
264	5					6	$params = q{};
265							}
266	7					21	push @block_strings, $name . $params;
267							}
268	3					45	return join $delim, @block_strings;
269							}
270
271							sub get_required_files {
272	0			0	0	0	my $self = shift;
273	0					0	my @block_items = @{ $self->block_items };
	0					0
274	0					0	my @required_files;
275	0					0	foreach my $block_item (@block_items) {
276	0					0	my $block = $self->_load_block($block_item);
277							push @required_files,
278							map {
279	0					0	$block_item->{block_name} . "\t" . $_;
	0					0
280							} $block->get_required_share_files();
281							}
282	0					0	return @required_files;
283							}
284
285							sub _add_quotes { # adding quotes only if param. value contains a space
286	2			2		4	my ($block_parameter) = @_;
287	2					12	my ( $name, $value ) = split /=/, $block_parameter, 2;
288	2	50				14	if ( $value =~ /\s/ ) {
289	0					0	my $res_string = "$name=";
290
291	0	0	0			0	if ( $value =~ /'/ && $value !~ /"/ ) {
292	0					0	$res_string .= '"' . $value . '"';
293							} else {
294	0					0	$value =~ s/'/\\'/g;
295	0					0	$res_string .= "'" . $value . "'";
296							}
297	0					0	return $res_string;
298							}
299	2					10	return $block_parameter;
300							}
301
302							sub load_blocks {
303	0			0	1	0	my $self = shift;
304	0					0	$self->loaded_blocks; #just access lazy attribute
305	0					0	return;
306							}
307
308							sub init {
309	0			0	1	0	my $self = shift;
310	0					0	$self->load_blocks();
311	0					0	return;
312							}
313
314							sub _load_block {
315	15			15		24	my ( $self, $block_item ) = @_;
316	15					39	my $block_name = $block_item->{block_name};
317	15					24	my $new_block;
318
319							# Initialize with global (scenario) parameters
320	15					24	my %params = ( %{ $self->_global_params }, scenario => $self );
	15					566
321
322							# which can be overriden by (local) block parameters.
323	15					21	foreach my $param ( @{ $block_item->{block_parameters} } ) {
	15					47
324	8					45	my ( $name, $value ) = split /=/, $param, 2;
325	8					34	$params{$name} = $value;
326							}
327
328	15	100		1		1762	eval "use $block_name; 1;" or log_fatal "Can't use block $block_name !\n$@\n";
	1			1		823
	1			1		8
	1			1		23
	1			1		858
	0			1		0
	0			1		0
	1			1		10
	1			1		2
	1			1		28
	1			1		35
	0			1		0
	0			1		0
	1			1		470
	0			1		0
	0					0
	1					13
	1					2
	1					28
	1					35
	0					0
	0					0
	1					10
	1					2
	1					30
	1					33
	0					0
	0					0
	1					11
	1					2
	1					28
	1					33
	0					0
	0					0
	1					11
	1					3
	1					29
	1					32
	0					0
	0					0
	1					13
	1					2
	1					31
	1					39
	0
	0
329	7	50				34	eval {
330	7					60	$new_block = $block_name->new( \%params );
331	7					82	1;
332							} or log_fatal "Treex::Core::Scenario->new: error when initializing block $block_name\n\nEVAL ERROR:\t$@";
333
334	7	0	33			256	if ( $self->cache && $params{'use_cache'} ) {
335	0					0	$new_block = Treex::Core::CacheBlock->new( { block => $new_block, cache => $self->cache } );
336							}
337
338	7					31	return $new_block;
339							}
340
341							sub run {
342	0			0	1	0	my ($self) = @_;
343	0					0	my $number_of_blocks = @{ $self->loaded_blocks };
	0					0
344	0	0				0	log_fatal('No DocumentReader supplied') if !$self->_has_document_reader;
345	0					0	my $reader = $self->document_reader;
346	0		0			0	my $number_of_documents = $reader->number_of_documents_per_this_job() \|\| '?';
347	0					0	my $document_number = 0;
348
349							#if ( $self->cache ) {
350							# $document_number = $self->_run_with_cache( $reader, $number_of_blocks, $number_of_documents );
351							#}
352							#else {
353	0					0	$document_number = $self->_run_without_cache( $reader, $number_of_blocks, $number_of_documents );
354							#}
355
356	0	0				0	log_info "Processed $document_number document"
357							. ( $document_number == 1 ? '' : 's' );
358	0					0	return 1;
359							}
360
361							sub _run_with_cache {
362
363	0			0		0	my ( $self, $reader, $number_of_blocks, $number_of_documents ) = @_;
364	0					0	my $document_number = 0;
365
366	0					0	while ( my $document = $reader->next_document_for_this_job() ) {
367	0					0	$document_number++;
368	0					0	my $doc_name = $document->full_filename;
369	0					0	my $doc_from = $document->loaded_from;
370	0					0	log_info "Document $document_number/$number_of_documents $doc_name loaded from $doc_from";
371	0					0	my $block_number = 0;
372	0					0	my $skip_to = 0;
373	0					0	my $process = 0;
374	0					0	my $skip_from = 0;
375	0					0	my $from_hash = "";
376	0					0	my $document_last_hash = "";
377	0					0	foreach my $block ( @{ $self->loaded_blocks } ) {
	0					0
378	0					0	$block_number++;
379	0					0	$process = 1;
380	0	0				0	if ( $block_number < $skip_to ) {
		0
381
382							# we know that there are identical, so we can skip them
383	0					0	log_info "Skipping block $block_number/$number_of_blocks " . ref($block);
384	0					0	$process = 0;
385							}
386							elsif ( $block_number == $skip_to ) {
387
388							# this is border Cache block -> we have to check whether next sequence is also same
389	0					0	$skip_from = $block_number + 1;
390
391							# following sequence is same => we can continue with skipping
392	0	0	0			0	if ($sequence{$skip_from}{'to'}
393							&&
394							$self->_is_known_sequence( $sequence{$skip_from}{'hash'}, $document->get_hash() )
395							)
396							{
397
398							#log_warn("\tskip from " . $sequence{$skip_from}{from} . ' to ' . $sequence{$skip_from}{to});
399	0					0	$skip_to = $sequence{$skip_from}{to} - 1;
400	0					0	$from_hash = $document->get_hash();
401	0					0	$process = 0;
402							}
403							else {
404	0					0	$document_last_hash = $document->get_hash();
405
406							#$document->set_hash(md5_hex($document->get_hash() . $block->get_hash()));
407	0					0	my $full_hash = $document->get_hash();
408	0					0	$document = $self->cache->get($full_hash);
409
410	0	0				0	if ( !$document ) {
411	0					0	log_fatal("Document - $full_hash is missing!!!");
412							}
413	0					0	$process = 2;
414							}
415							}
416
417	0	0				0	if ( $process == 1 ) {
418	0					0	log_info "Applying block $block_number/$number_of_blocks " . ref($block);
419
420	0	0				0	$block->process_start if ( !$block->is_started );
421
422							#log_info("Document-hash: " . $document->get_hash());
423	0					0	$skip_from = $block_number + 1;
424	0					0	my $status = $block->process_document($document);
425	0	0	0			0	if (defined($status)
			0
			0
426							&&
427							$status == $Treex::Core::Block::DOCUMENT_FROM_CACHE &&
428							$sequence{$skip_from}{'to'} &&
429							$self->_is_known_sequence( $sequence{$skip_from}{'hash'}, $document->get_hash() )
430							)
431							{
432
433							#log_warn("\tskip from " . $sequence{$skip_from}{from} . ' to ' . $sequence{$skip_from}{to});
434	0					0	$skip_to = $sequence{$skip_from}{to} - 1;
435	0					0	$skip_from = $block_number + 1;
436	0					0	$from_hash = $document->get_hash();
437							}
438							}
439
440	0					0	$document_last_hash = $document->get_hash();
441	0					0	$document->set_hash( md5_hex( $document->get_hash() . $block->get_hash() ) );
442
443	0	0				0	if ( ref($block) eq "Treex::Core::CacheBlock" ) {
444
445							# cache block => mark this path as known
446	0					0	my $id = $block_number + 1;
447	0					0	my $from = $sequence{$id}{'_from'};
448
449							# the first sequence has no document
450	0	0				0	if ( defined( $sequence{$from}{'document'} ) ) {
451	0					0	$self->_set_known_sequence( $sequence{$from}{'hash'}, $sequence{$from}{'document'} );
452							}
453
454	0					0	$sequence{$id}{'document'} = $document_last_hash;
455							}
456							}
457
458							# this actually marks the document as successfully done in parallel processing (if this line
459							# does not appear in the output, the parallel process will fail -- it must appear at any errorlevel,
460							# therefore not using log_info or similiar)
461	0	0				0	if ( $self->document_reader->jobindex ) {
462	0					0	print STDERR "Document $document_number/$number_of_documents $doc_name: [success].\n";
463							}
464							}
465
466	0					0	log_info "Applying process_end";
467
468	0					0	foreach my $block ( @{ $self->loaded_blocks } ) {
	0					0
469	0	0				0	$block->process_end() if ( $block->is_started );
470							}
471
472	0					0	return $document_number;
473							}
474
475							sub _is_known_sequence {
476	0			0		0	my ( $self, $sequence_hash, $document_hash ) = @_;
477	0					0	my $hash = md5_hex( $sequence_hash, $document_hash );
478	0					0	return $self->cache->get($hash);
479							}
480
481							sub _set_known_sequence {
482	0			0		0	my ( $self, $sequence_hash, $document_hash ) = @_;
483	0					0	my $hash = md5_hex( $sequence_hash, $document_hash );
484	0					0	$self->cache->set( $hash, 1 );
485
486	0					0	return;
487							}
488
489							sub _run_without_cache {
490
491	0			0		0	my ( $self, $reader, $number_of_blocks, $number_of_documents ) = @_;
492	0					0	my $document_number = 0;
493
494	0					0	$self->start();
495
496	0					0	while ( my $document = $reader->next_document_for_this_job() ) {
497	0					0	$document_number++;
498	0					0	my $doc_name = $document->full_filename;
499	0					0	my $doc_from = $document->loaded_from;
500	0					0	log_info "Document $document_number/$number_of_documents $doc_name loaded from $doc_from";
501	0					0	my $block_number = 0;
502	0					0	foreach my $block ( @{ $self->loaded_blocks } ) {
	0					0
503	0					0	$block_number++;
504	0					0	log_info "Applying block $block_number/$number_of_blocks " . ref($block);
505	0					0	$block->process_document($document);
506							}
507
508							# this actually marks the document as successfully done in parallel processing (if this line
509							# does not appear in the output, the parallel process will fail -- it must appear at any errorlevel,
510							# therefore not using log_info or similiar)
511	0	0				0	if ( $self->document_reader->jobindex ) {
512	0					0	print STDERR "Document $document_number/$number_of_documents $doc_name: [success].\n";
513							}
514							}
515
516	0					0	$self->end();
517
518	0	0				0	log_info "Processed $document_number document"
519							. ( $document_number == 1 ? '' : 's' );
520
521	0					0	return $document_number;
522							}
523
524							# Apply process_start to all blocks for which this has not yet been applied
525							sub start {
526	0			0	1	0	my ($self) = @_;
527
528	0					0	log_info "Applying process_start";
529	0					0	foreach my $block ( @{ $self->loaded_blocks } ) {
	0					0
530	0	0				0	$block->process_start() if ( !$block->is_started );
531							}
532
533	0					0	return;
534							}
535
536							# Apply the scenario to documents given in parameter
537							sub apply_to_documents {
538
539	0			0	1	0	my ( $self, @documents ) = @_;
540
541	0					0	my $number_of_blocks = @{ $self->loaded_blocks };
	0					0
542	0					0	my $block_number = 0;
543
544	0					0	foreach my $document (@documents){
545	0					0	log_info "Processing document" . $document->full_filename;
546
547	0					0	foreach my $block ( @{ $self->loaded_blocks } ) {
	0					0
548	0					0	$block_number++;
549	0					0	log_info "Applying block $block_number/$number_of_blocks " . ref($block);
550	0					0	$block->process_document($document);
551							}
552							}
553
554	0					0	return;
555							}
556
557							# Apply process_end to all blocks for which this has not yet been applied
558							sub end {
559	0			0	1	0	my ($self) = @_;
560
561	0					0	log_info "Applying process_end";
562	0					0	foreach my $block ( @{ $self->loaded_blocks } ) {
	0					0
563	0	0				0	$block->process_end() if ( $block->is_started );
564							}
565
566	0					0	return;
567							}
568
569	3			3		2133	use Module::Reload;
	3					1317
	3					367
570
571							sub restart {
572	0			0	1	0	my ($self) = @_;
573	0					0	my $changed_modules = Module::Reload->check;
574	0					0	log_info "Number of reloaded modules = $changed_modules";
575	0					0	log_info "reseting the document reader\n";
576	0					0	$self->document_reader->restart();
577
578							# TODO rebuild the reloaded blocks
579	0					0	return;
580							}
581
582							1;
583
584							__END__
585
586							=for Pod::Coverage BUILD
587
588							=encoding utf-8
589
590							=head1 NAME
591
592							Treex::Core::Scenario - a larger Treex processing unit, composed of blocks
593
594							=head1 VERSION
595
596							version 2.20150928
597
598							=head1 SYNOPSIS
599
600							use Treex::Core;
601
602							my $doc1, $doc2;
603							my $scenario = Treex::Core::Scenario->new(from_file => 'myscenario.scen' );
604							$scenario->run;
605
606
607							$scenario = Treex::Core::Scenario->new(from_string => 'W2A::EN::Segment language=en');
608							$scenario->start();
609							$scenario->apply_to_documents($doc1, $doc2);
610							$scenario->end();
611
612
613							=head1 DESCRIPTION
614
615
616							A Treex scenario consists of a sequence of (possibly parametrized) Treex blocks.
617
618							Scenarios can be described by a simple textual format, which is either passed
619							directly to the scenario construction, or is contained in a text file whose
620							name is passed.
621
622							The string description of scenarios looks as follows.
623
624							1) It contains a list of block names from which their 'C<Treex::Block::>'
625							prefixes were removed.
626
627							2) The block names are separated by one or more white spaces.
628
629							3) The block names are listed in the same order in which they should be
630							applied on data.
631
632							4) For each block, there can be one or more parameters specified, using the
633							C<attribute=value> form.
634
635							5) Comments start with 'C<#>' and end with the nearest newline character.
636
637
638							Scenario example:
639
640							# morphological analysis of an English text
641							Util::SetGlobal language=en selector=src
642							Read::Text
643							W2A::ResegmentSentences
644							W2A::EN::Tokenize
645							W2A::EN::NormalizeForms
646							W2A::EN::FixTokenization
647							W2A::EN::TagMorce
648
649
650							=head1 METHODS
651
652							=head2 Constructor
653
654							=over 4
655
656							=item my $scenario = Treex::Core::Scenario->new(from_string => 'W2A::Tokenize language=en W2A::Lemmatize' );
657
658							Constructor parameter C<from_string> specifies the names of blocks which are
659							to be executed (in the specified order) when the scenario is applied on a
660							L<Treex::Core::Document> object.
661
662							=item my $scenario = Treex::Core::Scenario->new(from_file => 'myscenario.scen' );
663
664							The scenario description is loaded from the file.
665
666							=back
667
668
669							=head2 Running the scenario
670
671							=over 4
672
673							=item $scenario->run();
674
675							Run the scenario.
676							One of the blocks (usually the first one) must be the document reader (see
677							L<Treex::Core::DocumentReader>) that produces the
678							documents on which this scenario is applied.
679
680							=item $scenario->apply_to_documents($treex_doc);
681
682							Apply this scenario to a L<Treex::Core::Document> instance obtained from elsewhere.
683							Please note that C<start()> must be called before the first call to this method and C<end()>
684							after the last call to this method.
685
686							The scenario does not need to contain a document reader if documents are given
687							explicitly.
688
689							=item $scenario->start();
690
691							Apply C<process_start()> to all blocks in the scenario.
692							This is called automatically by C<run()>, but must be called before C<apply_to_documents()>.
693
694							=item $scenario->end();
695
696							Apply C<process_end()> to all blocks in the scenario.
697							This is called automatically by C<run()>, but must be called after calls to C<apply_to_documents()>.
698
699
700							=back
701
702							=head2 Internal methods for loading scenarios
703
704							=over 4
705
706							=item _load_scenario_file($filename)
707
708							loads a scenario description from a file
709
710							=item parse_scenario_string
711
712							parses a textual description of a scenario
713
714							=item construct_scenario_string
715
716							constructs a scenario textual description from an existing scenario instance
717							accepts named parameter multiline - when set, blocks are separated by newline instead of space
718
719							=item load_blocks
720
721							use blocks and call their constructors
722							can be used for preloading blocks for e.g. server applications
723							when running scenario blocks are loaded automatically
724
725							=item init
726
727							do all initialization so after this method scenario is ready to run
728							currently just load blocks
729
730							=item restart
731
732							resets the document reader, in future it will rebuild reloaded blocks
733
734							=back
735
736
737							=head1 SEE ALSO
738
739							L<Treex::Core::Block>
740							L<Treex::Core>
741
742							=head1 AUTHORS
743
744							ZdenÄ›k Å½abokrtskÃ½ <zabokrtsky@ufal.mff.cuni.cz>
745
746							Martin Popel <popel@ufal.mff.cuni.cz>
747
748							David MareÄek <marecek@ufal.mff.cuni.cz>
749
750							TomÃ¡Å¡ Kraut <kraut@ufal.mff.cuni.cz>
751
752							Martin MajliÅ¡ <majlis@ufal.mff.cuni.cz>
753
754							OndÅ™ej DuÅ¡ek <odusek@ufal.mff.cuni.cz>
755
756							=head1 COPYRIGHT AND LICENSE
757
758							Copyright Â© 2011-2012 by Institute of Formal and Applied Linguistics, Charles University in Prague
759
760							This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.