line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Treex::Block::Read::AlignedSentences; |
2
|
|
|
|
|
|
|
BEGIN { |
3
|
1
|
|
|
1
|
|
457999
|
$Treex::Block::Read::AlignedSentences::VERSION = '0.08170'; |
4
|
|
|
|
|
|
|
} |
5
|
1
|
|
|
1
|
|
1980
|
use Moose; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
use Treex::Core::Common; |
7
|
|
|
|
|
|
|
extends 'Treex::Block::Read::BaseAlignedTextReader'; |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
sub next_document { |
10
|
|
|
|
|
|
|
my ($self) = @_; |
11
|
|
|
|
|
|
|
my $texts_ref = $self->next_document_texts(); |
12
|
|
|
|
|
|
|
return if !defined $texts_ref; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
my %sentences = |
15
|
|
|
|
|
|
|
map { $_ => [ split /\n/, $texts_ref->{$_} ] } keys %{$texts_ref}; |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
my $n = 0; |
18
|
|
|
|
|
|
|
for my $zone_label ( keys %sentences ) { |
19
|
|
|
|
|
|
|
if ( !$n ) { |
20
|
|
|
|
|
|
|
$n = @{ $sentences{$zone_label} }; |
21
|
|
|
|
|
|
|
} |
22
|
|
|
|
|
|
|
log_fatal "Different number of lines in aligned documents" |
23
|
|
|
|
|
|
|
if $n != @{ $sentences{$zone_label} }; |
24
|
|
|
|
|
|
|
} |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
my $doc = $self->new_document(); |
27
|
|
|
|
|
|
|
for my $i ( 0 .. $n - 1 ) { |
28
|
|
|
|
|
|
|
my $bundle = $doc->create_bundle(); |
29
|
|
|
|
|
|
|
for my $zone_label ( keys %sentences ) { |
30
|
|
|
|
|
|
|
my ( $lang, $selector ) = ( $zone_label, $self->selector ); |
31
|
|
|
|
|
|
|
if ( $zone_label =~ /_/ ) { |
32
|
|
|
|
|
|
|
( $lang, $selector ) = split /_/, $zone_label; |
33
|
|
|
|
|
|
|
} |
34
|
|
|
|
|
|
|
my $zone = $bundle->create_zone( $lang, $selector ); |
35
|
|
|
|
|
|
|
$zone->set_sentence( $sentences{$zone_label}[$i] ); |
36
|
|
|
|
|
|
|
} |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
return $doc; |
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
1; |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
__END__ |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=head1 NAME |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
Treex::Block::Read::AlignedSentences |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=head1 VERSION |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
version 0.08170 |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=head1 SYNOPSIS |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
# in scenarios |
58
|
|
|
|
|
|
|
# Read::AlignedSentences en=en1.txt,en2.txt cs_ref=cs1.txt,cs2.txt |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=head1 DESCRIPTION |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
Document reader for plain text format, one sentence per line. |
63
|
|
|
|
|
|
|
Aligned sentences (usually in different languages) are loaded at once into respective zones. |
64
|
|
|
|
|
|
|
The sentences are stored into L<bundles|Treex::Core::Bundle> in the |
65
|
|
|
|
|
|
|
L<document|Treex::Core::Document>. |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head1 ATTRIBUTES |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=over |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=item any parameter in a form of a valid I<zone_label> |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
space or comma separated list of filenames, or C<-> for STDIN. |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=back |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=head1 METHODS |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=over |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=item next_document |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
Loads a document. |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=back |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
=head1 SEE ALSO |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
L<Treex::Block::Read::BaseAlignedReader> |
90
|
|
|
|
|
|
|
L<Treex::Block::Read::BaseAlignedTextReader> |
91
|
|
|
|
|
|
|
L<Treex::Core::Document> |
92
|
|
|
|
|
|
|
L<Treex::Core::Bundle> |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=head1 AUTHOR |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
Martin Popel |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. |