File Coverage

blib/lib/Treex/Tool/Parser/MSTperl/Labeller.pm

Criterion	Covered	Total	%
statement	1	3	33.3
branch			n/a
condition			n/a
subroutine	1	1	100.0
pod			n/a
total	2	4	50.0

line	stmt	sub	time	code
1				package Treex::Tool::Parser::MSTperl::Labeller;
2				{
3				$Treex::Tool::Parser::MSTperl::Labeller::VERSION = '0.11949';
4				}
5
6	1	1	2552	use Moose;
	0
	0
7				use Carp;
8
9				use Treex::Tool::Parser::MSTperl::Sentence;
10				use Treex::Tool::Parser::MSTperl::Edge;
11				use Treex::Tool::Parser::MSTperl::ModelLabelling;
12
13				has config => (
14				isa => 'Treex::Tool::Parser::MSTperl::Config',
15				is => 'ro',
16				required => '1',
17				);
18
19				has model => (
20				isa => 'Maybe[Treex::Tool::Parser::MSTperl::ModelLabelling]',
21				is => 'rw',
22				);
23
24				sub BUILD {
25				my ($self) = @_;
26
27				$self->model(
28				Treex::Tool::Parser::MSTperl::ModelLabelling->new(
29				config => $self->config,
30				)
31				);
32
33				return;
34				}
35
36				sub load_model {
37
38				# (Str $filename)
39				my ( $self, $filename ) = @_;
40
41				return $self->model->load($filename);
42				}
43
44				sub load_model_tsv {
45
46				# (Str $filename)
47				my ( $self, $filename ) = @_;
48
49				return $self->model->load_tsv($filename);
50				}
51
52				sub label_sentence {
53
54				# (Treex::Tool::Parser::MSTperl::Sentence $sentence)
55				my ( $self, $sentence ) = @_;
56
57				# parse sentence (does not modify $sentence)
58				my $sentence_labelled = $self->label_sentence_internal($sentence);
59
60				return $sentence_labelled->toLabelsArray();
61				}
62
63				sub label_sentence_internal {
64
65				# (Treex::Tool::Parser::MSTperl::Sentence $sentence)
66				my ( $self, $sentence ) = @_;
67
68				if ( !$self->model ) {
69				croak "MSTperl parser error: There is no model for labelling!";
70				}
71
72				# copy the sentence (do not modify $sentence directly)
73				my $sentence_working_copy = $sentence->copy_nonlabelled();
74				$sentence_working_copy->fill_fields_before_labelling();
75
76				if ( $self->config->DEBUG >= 2 ) {
77				print "Labelling sentence: "
78				. ( join ' ', map { $_->fields->[1] } @{ $sentence->nodes } )
79				. " \n";
80				}
81
82				# take root's children, find best scoring labelling sequence, recursion
83				my $root = $sentence_working_copy->getNodeByOrd(0);
84				$self->label_subtree($root);
85
86				return $sentence_working_copy;
87				}
88
89				# assign labels to edges going from the $parent node to its children
90				# (and recurse on the children)
91				# directly modifies the sentence (or better: the nodes within the sentence)
92				sub label_subtree {
93
94				# (Treex::Tool::Parser::MSTperl::Node $parent)
95				my ( $self, $parent ) = @_;
96
97				my $ALGORITHM = $self->config->labeller_algorithm;
98
99				if ( $self->config->DEBUG >= 2 ) {
100				print "Label subtree of node number " . $parent->ord . ' '
101				. $parent->fields->[1] . "\n";
102				}
103
104				my @edges = @{ $parent->children };
105				if ( $self->config->DEBUG >= 3 ) {
106				print "There are " . scalar(@edges) . " children edges \n";
107				}
108
109				if ( @edges == 0 ) {
110				return;
111				}
112
113				# label the nodes using Viterbi algorithm
114				# (this is my own implementation of Viterbi, fitted for this task)
115
116				# States structure: for each state (its key being a label)
117				# there is an array with the current best path to it as 'path' (append-only)
118				# and its current score 'score' (computed somehow,
119				# i.e. differently for different algorithms)
120				my $states = {};
121				my $starting_state_key = $self->config->SEQUENCE_BOUNDARY_LABEL;
122
123				# correspond to algorithms
124				my @starting_scores = (
125
126				# 0 1 2 3 4 5 6 7 8 9
127				1e300, 1, 1, 1e300, 1e300, 1e300, -1, -1, 0, 0,
128
129				# 10 11 12 13 14 15 16 17 18 19 20 21
130				1e300, 1e300, 1e300, 1e300, 1e300, 1e300, 0, 0, 0, 1, 0, 0,
131				);
132
133				# path could be constructed by backpointers
134				# but one would have to keep all the states the whole time
135				$states->{$starting_state_key} = {
136				'path' => [ $self->config->SEQUENCE_BOUNDARY_LABEL ],
137				'score' => $starting_scores[$ALGORITHM],
138				};
139
140				# run Viterbi
141				# In each cycle generates %new_states and sets them as %states,
142				# so at the end it suffices to find the state with the best score in %states
143				# and use its path as the result.
144				my $prev_edge = undef;
145				foreach my $edge (@edges) {
146
147				# only progress and/or debug info
148				if ( $self->config->DEBUG >= 3 ) {
149				print " Labelling edge to node "
150				. $edge->child->ord . ' ' . $edge->child->fields->[1] . "\n";
151				print " Currently there are "
152				. ( keys %$states ) . " states\n";
153				}
154
155				# do one Viterbi step - assign possible labels to $edge
156				# (including appropriate scores of course)
157				$states = $self->label_edge( $edge, $states, $prev_edge );
158
159				if ( $ALGORITHM == 20 ) {
160
161				# set the best label
162				my $best_state_label = $self->find_best_state_label($states);
163				$edge->child->label($best_state_label);
164				}
165
166				$prev_edge = $edge;
167				}
168
169				# TODO: foreach last state multiply its score
170				# by the label->sequence_boundary probability
171
172				if ( $ALGORITHM != 20 ) {
173
174				# End - find the state with the best score - this is the result
175				my $best_state_label = $self->find_best_state_label($states);
176
177				if ($best_state_label) {
178
179				my @labels = @{ $states->{$best_state_label}->{'path'} };
180
181				# get rid of SEQUENCE_BOUNDARY_LABEL
182				shift @labels;
183
184				# only progress and/or debug info
185				if ( $self->config->DEBUG >= 2 ) {
186				print "best state $best_state_label score: " . "\n";
187				print "best path: "
188				. ( join ' ', @labels )
189				. "\n";
190				}
191
192				foreach my $edge (@edges) {
193				my $label = shift @labels;
194				$edge->child->label($label)
195				}
196
197				} else {
198
199				# TODO do not die, provide some backoff instead
200				# (do some smoothing, at least when no states are generated)
201				print "No best state generated, cannot label the sentence!"
202				. " (This is weird.)\n";
203				}
204				} # else: edges are labelled in each step
205
206				# end of Viterbi
207
208				# recursion
209				foreach my $edge (@edges) {
210				$self->label_subtree( $edge->child );
211				}
212
213				return;
214				}
215
216				sub find_best_state_label {
217
218				my ( $self, $states ) = @_;
219
220				# "negative infinity" (works both with real probs and with their logs)
221				my $best_state_score = -999999999;
222				my $best_state_label = undef;
223
224				foreach my $state_label ( keys %$states ) {
225				if ( $self->config->DEBUG >= 4 ) {
226				print "state $state_label score: "
227				. $states->{$state_label}->{'score'} . "\n";
228				}
229				if ( $states->{$state_label}->{'score'} > $best_state_score ) {
230				$best_state_label = $state_label;
231				$best_state_score = $states->{$state_label}->{'score'};
232				}
233				}
234
235				# only progress and/or debug info
236				if ( $self->config->DEBUG >= 2 ) {
237				print "best state $best_state_label score: "
238				. $best_state_score . "\n";
239				}
240
241				return $best_state_label;
242				}
243
244				# used as an internal part of label_subtree
245				# to get all probable labels for an edge
246				# i.e. make one step of the Viterbi algorithm
247				sub label_edge {
248
249				my ( $self, $edge, $states, $prev_edge ) = @_;
250
251				my $ALGORITHM = $self->config->labeller_algorithm;
252				my $new_states = {};
253				foreach my $last_state ( keys %$states ) {
254
255				if ( $ALGORITHM == 21 && defined $prev_edge ) {
256
257				# set last label
258				my $best_prev_state_label = $self->find_best_state_label($states);
259				$prev_edge->child->label($best_prev_state_label);
260				}
261
262				# only progress and/or debug info
263				if ( $self->config->DEBUG >= 4 ) {
264				print " Processing state $last_state (score "
265				. $states->{$last_state}->{'score'} . ")\n";
266				}
267
268				# compute the possible labels scores (typically probabilities),
269				# i.e. products of emission and transition probs
270				# possible_labels{label} = score
271				my %possible_labels = %{
272				$self->get_possible_labels(
273				$edge,
274				$last_state,
275				$states->{$last_state}->{'score'},
276				)
277				};
278
279				# only progress and/or debug info
280				if ( $self->config->DEBUG >= 4 ) {
281				print " " . scalar( keys %possible_labels )
282				. " possible labels are "
283				. ( join ' ', keys %possible_labels )
284				. "\n";
285				}
286
287				foreach my $new_label ( keys %possible_labels ) {
288				my $new_state_score = $possible_labels{$new_label};
289
290				# only progress and/or debug info
291				if ( $self->config->DEBUG >= 5 ) {
292				print " Trying label $new_label, "
293				. "score $new_state_score\n";
294				print " Old state path "
295				. ( join ' ', @{ $states->{$last_state}->{'path'} } )
296				. " \n";
297				print " Old states: "
298				. (
299				join ' ',
300				map {
301				"$_ (" . (
302				join ' ', @{ $states->{$_}->{'path'} }
303				) . ")"
304				} keys %$states
305				)
306				. " \n";
307				print " New states: "
308				. (
309				join ' ',
310				map {
311				"$_ (" . (
312				join ' ', @{ $new_states->{$_}->{'path'} }
313				) . ")"
314				} keys %$new_states
315				)
316				. " \n";
317				}
318
319				# test if this is the best
320				if (defined $new_states->{$new_label}
321				&& $new_states->{$new_label}->{'score'} > $new_state_score
322				)
323				{
324
325				# there is already the same state state
326				# with higher score
327				next;
328				}
329
330				# else such a state is not yet there resp. it is but its score
331				# is lower than $new_state_score -> set it (resp. replace it)
332
333				my @new_state_path = @{ $states->{$last_state}->{'path'} };
334				push @new_state_path, $new_label;
335				$new_states->{$new_label} = {
336				'path' => \@new_state_path,
337				'score' => $new_state_score,
338				};
339
340				# only progress and/or debug info
341				if ( $self->config->DEBUG >= 5 ) {
342				print " New state path "
343				. ( join ' ', @new_state_path )
344				. " \n";
345				print " Old states: "
346				. (
347				join ' ',
348				map {
349				"$_ (" . (
350				join ' ', @{ $states->{$_}->{'path'} }
351				) . ")"
352				} keys %$states
353				)
354				. " \n";
355				print " New states: "
356				. (
357				join ' ',
358				map {
359				"$_ (" . (
360				join ' ', @{ $new_states->{$_}->{'path'} }
361				) . ")"
362				} keys %$new_states
363				)
364				. " \n";
365				}
366				} # foreach $new_label
367
368				# only progress and/or debug info
369				if ( $self->config->DEBUG >= 4 ) {
370				print " Now there are "
371				. ( keys %$new_states ) . " new states\n";
372				}
373				} # foreach $last_state
374
375				$new_states = $self->prune($new_states);
376
377				return $new_states;
378				}
379
380				# prune the states (keep only some of the best)
381				sub prune {
382
383				my ( $self, $states ) = @_;
384
385				# pruning
386
387				# pruning type 1 commented out because emission scores are not probs
388				# (they do not sum up to 1)
389				#
390				# pruning: keep as many best states so that their normed prob
391				# sums up to at least $VITERBI_STATES_PROB_SUM_THRESHOLD
392				# %states = ();
393				# my @best_states = sort {
394				# $new_states{$b}->{'score'}
395				# <=> $new_states{$a}->{'score'}
396				# } keys %new_states;
397				# my $prob_sum = 0;
398				# foreach my $state (@best_states) {
399				# $prob_sum += $new_states{$state}->{'score'};
400				# }
401				# my $threshold = $prob_sum
402				# * $VITERBI_STATES_PROB_SUM_THRESHOLD;
403				# my $best_prob_sum = 0;
404				# while ($best_prob_sum < $threshold) {
405				# my $state = shift @best_states;
406				# $states{$state} = $new_states{$state};
407				# $best_prob_sum += $new_states{$state}->{'score'};
408				# }
409
410				# states going form pruning phase 1 to pruning phase 2
411				# %new_states = %states;
412
413				# simple pruning: keep n best states
414				my $new_states = {};
415				my @best_states
416				= sort {
417				$states->{$b}->{'score'} <=> $states->{$a}->{'score'}
418				} keys %$states;
419				for (
420				my $i = 0;
421				@best_states && $i < $self->config->VITERBI_STATES_NUM_THRESHOLD;
422				$i++
423				)
424				{
425				my $state = shift @best_states;
426				$new_states->{$state} = $states->{$state};
427
428				# only progress and/or debug info
429				if ( $self->config->DEBUG >= 5 ) {
430				print " Pruning let thrgough the state $state"
431				. "\n";
432				}
433				}
434
435				# only progress and/or debug info
436				if ( $self->config->DEBUG >= 4 ) {
437				print " After pruning there are "
438				. ( keys %$new_states ) . " states\n";
439				}
440
441				return $new_states;
442				}
443
444				# computes possible labels for an edge, using info about
445				# the emission scores, transition scores and last state's score
446				sub get_possible_labels {
447				my ( $self, $edge, $previous_label, $previous_label_score ) = @_;
448
449				my $ALGORITHM = $self->config->labeller_algorithm;
450
451				if ($ALGORITHM == 8
452				\|\| $ALGORITHM == 9
453				\|\| $ALGORITHM == 16
454				\|\| $ALGORITHM == 17
455				\|\| $ALGORITHM == 18
456				\|\| $ALGORITHM == 19
457				\|\| $ALGORITHM >= 20
458				)
459				{
460
461				my $result = {};
462				my $all_labels = $self->model->get_all_labels();
463
464				# foreach possible label (actually foreach existing label)
465				foreach my $label ( @{$all_labels} ) {
466
467				# score = previous score + new score
468
469				if ( $ALGORITHM == 20 ) {
470				if ( $self->config->DEBUG >= 4 ) {
471				print " Score for label $label: "
472				. (
473				$self->model->get_label_score(
474				$label, $previous_label, $edge->features_all_labeller()
475				)
476				) . "\n";
477				}
478				$result->{$label} =
479				$self->model->get_label_score(
480				$label, $previous_label, $edge->features_all_labeller()
481				)
482				;
483				} elsif ( $ALGORITHM == 19 ) {
484				if ( $self->config->DEBUG >= 4 ) {
485				print " Score for label $label: $previous_label_score + "
486				. (
487				$self->model->get_label_score(
488				$label, $previous_label, $edge->features_all_labeller()
489				)
490				) . "\n";
491				}
492				$result->{$label} =
493				$previous_label_score
494				* $self->model->get_label_score(
495				$label, $previous_label, $edge->features
496				)
497				;
498				} elsif ( $ALGORITHM == 21 ) {
499				if ( $self->config->DEBUG >= 4 ) {
500				print " Score for label $label: $previous_label_score + "
501				. (
502				$self->model->get_label_score(
503				$label, $previous_label, $edge->features_all_labeller()
504				)
505				) . "\n";
506				}
507				$result->{$label} =
508				$previous_label_score
509				+ $self->model->get_label_score(
510				$label, $previous_label, $edge->features_all_labeller()
511				)
512				;
513				} elsif ( $ALGORITHM == 18 ) {
514				$result->{$label} =
515				$self->model->get_label_score(
516				$label, $previous_label, $edge->features
517				)
518				;
519				} else {
520				if ( $self->config->DEBUG >= 4 ) {
521				print " Score for label $label: $previous_label_score + "
522				. (
523				$self->model->get_label_score(
524				$label, $previous_label, $edge->features
525				)
526				) . "\n";
527				}
528				$result->{$label} =
529				$previous_label_score
530				+ $self->model->get_label_score(
531				$label, $previous_label, $edge->features
532				)
533				;
534				}
535
536				} # end foreach $label
537
538				return $result;
539
540				} else { # $ALGORITHM not in 8, 9, >16
541
542				my $emission_scores =
543				$self->model->get_emission_scores( $edge->features );
544
545				my $transition_scores = {};
546				foreach my $possible_label ( keys %$emission_scores ) {
547				$transition_scores->{$possible_label} =
548				$self->model->get_transition_score(
549				$possible_label, $previous_label
550				);
551				}
552
553				my $possible_labels = $self->get_possible_labels_internal(
554				$emission_scores,
555				$transition_scores,
556				$previous_label_score,
557				);
558
559				if ( scalar( keys %$possible_labels ) > 0 ) {
560				return $possible_labels;
561				} else {
562
563				# no possible states generated -> backoff
564				my %unigrams_copy = %{ $self->model->unigrams };
565				my $blind_scores = \%unigrams_copy;
566
567				# TODO: smoothing (now a very stupid way is used
568				# just to lower the scores below the unblind scores)
569				foreach my $label ( keys %$blind_scores ) {
570				$blind_scores->{$label} *= 0.01;
571				}
572
573				# fall back to unigram distribution for transitions
574				# TODO: smoothing of bigram, unigram and uniform distros
575				# for transitions
576				# (this fallback should then become obsolete)
577				$possible_labels = $self->get_possible_labels_internal(
578				$emission_scores,
579				$blind_scores,
580				$previous_label_score,
581				);
582
583				if ( scalar( keys %$possible_labels ) > 0 ) {
584				return $possible_labels;
585				} else {
586				warn "Based on the training data, no possible label was found"
587				. " for an edge. This usually means that either"
588				. " your training data are not big enough or that"
589				. " the set of features you are using"
590				. " is not well constructed - either it is too small"
591				. " or it lacks features that would be general enough"
592				. " to cover all possible sentences."
593				. " Using blind emission probabilities instead.\n"
594				. " get_possible_labels ($edge, $previous_label,"
595				. " $previous_label_score ) \n"
596				. $edge->sentence->id
597				. ': '
598				. $edge->parent->fields->[1]
599				. " -> "
600				. $edge->child->fields->[1]
601				. "\n";
602
603				# TODO: these are more or less probabilities, which might
604				# be unappropriate for some of the
605				# algorithms -> recompute somehow;
606				# also they do contain the SEQUENCE_BOUNDARY_LABEL prob
607
608				$possible_labels = $self->get_possible_labels_internal(
609				$blind_scores,
610				$blind_scores,
611				$previous_label_score,
612				);
613
614				if ( scalar( keys %$possible_labels ) > 0 ) {
615				return $possible_labels;
616				} else {
617				warn "no possible labels generated, no fallback helped:"
618				. " probably there is a bug in the code!"
619				. " get_possible_labels ($edge, $previous_label,"
620				. " $previous_label_score ) "
621				. $edge->parent->fields->[1]
622				. " -> "
623				. $edge->child->fields->[1]
624				. "\n";
625				return {};
626				} # end no backoff helped
627				} # end backoff by unigrams for emission and transition scores
628				} # end backoff by unigrams for transition scores
629				} # end $ALGORITHM not in 8,9
630				}
631
632				sub get_possible_labels_internal {
633				my ( $self, $emission_scores, $transition_scores, $last_state_score ) = @_;
634
635				# $emission_scores are often not really probs but scores
636
637				my $ALGORITHM = $self->config->labeller_algorithm;
638
639				if ($ALGORITHM == 8
640				\|\| $ALGORITHM == 9
641				\|\| $ALGORITHM == 16
642				\|\| $ALGORITHM == 17
643				\|\| $ALGORITHM == 18
644				\|\| $ALGORITHM == 19
645				)
646				{
647
648				# these algorithms have such a simple way of computing possible labels
649				# they they do not need to have it split into two subroutines
650				croak "Labeller->get_possible_labels_internal not implemented"
651				. " for algorithm no $ALGORITHM!";
652				}
653
654				my %possible_labels;
655				foreach my $possible_label ( keys %$emission_scores ) {
656				my $emission_score = $emission_scores->{$possible_label};
657				my $transition_score = $transition_scores->{$possible_label};
658				if ( !defined $transition_score ) {
659				next;
660				}
661
662				my $possible_state_score;
663				if ( $ALGORITHM == 2 ) {
664				$possible_state_score
665				= $last_state_score + $emission_score * $transition_score;
666				} else {
667				$possible_state_score
668				= $last_state_score * $emission_score * $transition_score;
669				}
670
671				# if no state like that yet or better than current max,
672				# use this new state
673				if ($possible_state_score > 0
674				&& (!$possible_labels{$possible_label}
675				\|\| $possible_labels{$possible_label}->{'score'}
676				< $possible_state_score
677				)
678				)
679				{
680				$possible_labels{$possible_label} = $possible_state_score;
681				}
682
683				# else we already have a state with the same key but higher score
684				} # end foreach $possible_label
685
686				return \%possible_labels;
687				}
688
689				1;
690
691				__END__
692
693
694				=pod
695
696				=for Pod::Coverage BUILD
697
698				=encoding utf-8
699
700				=head1 NAME
701
702
703				=head1 VERSION
704
705				version 0.11949
706				Treex::Tool::Parser::MSTperl::Labeller - pure Perl implementation
707				of a dependency tree labeller for the MST parser
708
709				=head1 DESCRIPTION
710
711				This is a Perl implementation of the labeller for MST Parser
712				which is (most probably) described in
713				McDonald, Ryan: Discriminative Learning And Spanning Tree Algorithms
714				For Dependency Parsing, 2006 (chapter 3.3.3 Two-Stage Labelling).
715
716				For a dependency parse tree - presumably, but not necessarily, obtained using
717				the MST parser (L<Treex::Tool::Parser::MSTperl::Parser>), possibly
718				non-projective - assigns the most probable labels to the edges of the tree,
719				using the given model (L<Treex::Tool::Parser::MSTperl::ModelLabelling>).
720
721				Assigning labels is implemented as sequence labelling, where the sequence to
722				be labelled is each sequence of edges between a parent node and its children.
723				First-order Markov factorization is used, but because we do the labelling as a
724				separate second stage to the parsing, many non-local features can be used,
725				exploiting the knowledge of the structure of the whole tree; also we go from
726				to root downwards and are therefore able to use the knowledge of labels
727				assigned to ancestor edges (especially the edge leading to the common parent
728				node).
729
730				Each label is technically stored with the child node of the edge it belongs to,
731				so sometimes I will talk about a node label, which actually means the label
732				of the edge between the node and its parent.
733
734				A variant of the Viterbi algorithm is used to find the best scoring sequence
735				of labels. However, instead of probabilities we use (real-valued) scores and
736				therefore instead of multiplication addition is used in Viterbi.
737
738				For detail on the features and training see
739				L<Treex::Tool::Parser::MSTperl::TrainerLabelling>.
740
741
742				I have used several sources of information to implement it, especially:
743
744				Kevin Gimpel and Shay Cohen:
745				Discriminative Online Algorithms for Sequence Labeling - A Comparative Study
746				(2007)
747
748				And also parts of these:
749
750				Junâ€™ichi Kazama and Kentaro Torisawa:
751				A New Perceptron Algorithm for Sequence Labeling with Non-local Features
752				(2007)
753
754				Wenliang Chen, Yujie Zhang, Hitoshi Isahara:
755				A Two-stage Parser for Multilingual Dependency Parsing
756				(2007)
757
758				Binyamin Rozenfeld, Ronen Feldman, Moshe Fresko:
759				A Systematic Cross-Comparison of Sequence Classifiers
760				(2004?)
761
762				=head1 FIELDS
763
764				=over 4
765
766				=item config
767
768				Instance of L<Treex::Tool::Parser::MSTperl::Config> containing settings to be
769				used with labeller.
770
771				Currently the settings most relevant to the Labeller are the following:
772
773				=over 8
774
775				=item VITERBI_STATES_NUM_THRESHOLD
776
777				See L<Treex::Tool::Parser::MSTperl::Config/VITERBI_STATES_NUM_THRESHOLD>.
778
779				=item labeller_algorithm
780
781				See L<Treex::Tool::Parser::MSTperl::Config/labeller_algorithm>.
782
783				=item labelledFeaturesControl
784
785				See L<Treex::Tool::Parser::MSTperl::Config/labelledFeaturesControl>.
786
787				=item SEQUENCE_BOUNDARY_LABEL
788
789				See L<Treex::Tool::Parser::MSTperl::Config/SEQUENCE_BOUNDARY_LABEL>.
790
791				=back
792
793				=item model
794
795				An instance of L<Treex::Tool::Parser::MSTperl::ModelLabelling>
796				used to label the trees. It can be changed if needed (it is usually needed
797				when training the labeller).
798
799				=back
800
801				=head1 METHODS
802
803				=over 4
804
805				=item my $labeller = Treex::Tool::Parser::MSTperl::Labeller->new(
806				config => $self->config);
807
808				Creates an instance of the labeller using the given configuration, also
809				initializing the model.
810
811				=item $labeller->load_model('modelfile.lmodel');
812
813				=item $labeller->load_model_tsv('modelfile.tsv');
814
815				Loads a labelled model
816				using L<Treex::Tool::Parser::MSTperl::ModelBase/load>
817				or L<Treex::Tool::Parser::MSTperl::ModelBase/load_tsv>.
818
819				A model has to be loaded or created in an other way
820				before sentences can be labelled.
821
822				=item $labeller->label_sentence($sentence);
823
824				Labels a sentence (instance of L<Treex::Tool::Parser::MSTperl::Sentence>). It
825				finds the best labels for the sentence and returns them as an array reference.
826
827				The given sentence is left intact and any labelling information already
828				contained in the sentence is disregarded.
829
830				=item $labeller->label_sentence_internal($sentence);
831
832				Does the actual labelling, returning a labelled instance of
833				L<Treex::Tool::Parser::MSTperl::Sentence>. The C<label_sentence> sub is
834				actually only a wrapper for this method which extracts the labels of the
835				nodes and returns these. If you are only interested in getting the labels,
836				use C<label_sentence>, but if it is handy for you to get a whole instance of
837				L<Treex::Tool::Parser::MSTperl::Sentence> (which is labelled), use
838				C<label_sentence_internal>. The subroutines are otherwise equivalent.
839
840				The given sentence is left intact and any labelling information already
841				contained in the sentence is disregarded. All other information from the
842				given sentence is copied to the returned sentence, the sentence only differ
843				in the labels assigned.
844
845				=item $labeller->label_subtree($parent);
846
847				Assign labels to edges going from the $parent node to its children
848				(and recurse on the children).
849				Directly modifies the sentence
850				(or more precisely: the nodes within the sentence).
851
852				=item $labeller->label_edge($edge, $states);
853
854				Used as an internal part of label_subtree
855				to get all probable labels for an edge $edge
856				i.e. make one step of the Viterbi algorithm.
857				The $states is a hash reference of all currently active Viterbi states.
858
859				=item $labeller->prune($states);
860
861				Prune the states (passed as a hash ref), currently does n-best pruning
862				with n = C<config->VITERBI_STATES_NUM_THRESHOLD>, keeping always n best scoring
863				states at maximum (all states if there are less than n). Is called after
864				each Viterbi step, i.e. at the end of each call of C<label_edge>.
865				Returns the new pruned states (does not modify its input).
866
867				=item $labeller->get_possible_labels($edge, $previous_label,
868				$previous_label_score);
869
870				Computes possible labels for an edge, using info about the emission scores,
871				transition scores and last state's score. (Because of the first order Markov
872				factorization used, the states correspond to labels assigned to the edges they
873				corresond to.)
874
875				=item $labeller->get_possible_labels_internal
876				($emission_scores, $transition_scores, $last_state_score);
877
878				Does the actual generation of possible labels for an edge, where the emission
879				and transition scores are already computed for this particular edge.
880
881				=back
882
883				=head1 AUTHORS
884
885				Rudolf Rosa <rosa@ufal.mff.cuni.cz>
886
887				=head1 COPYRIGHT AND LICENSE
888
889				Copyright Â© 2011 by Institute of Formal and Applied Linguistics, Charles
890				University in Prague
891
892				This module is free software; you can redistribute it and/or modify it under
893				the same terms as Perl itself.