line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Treex::Block::Read::AlignedSentences; |
2
|
|
|
|
|
|
|
$Treex::Block::Read::AlignedSentences::VERSION = '2.20151102'; |
3
|
1
|
|
|
1
|
|
75048
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
25
|
|
4
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
25
|
|
5
|
1
|
|
|
1
|
|
845
|
use Moose; |
|
1
|
|
|
|
|
450508
|
|
|
1
|
|
|
|
|
7
|
|
6
|
1
|
|
|
1
|
|
7764
|
use Treex::Core::Common; |
|
1
|
|
|
|
|
590821
|
|
|
1
|
|
|
|
|
7
|
|
7
|
|
|
|
|
|
|
extends 'Treex::Block::Read::BaseAlignedTextReader'; |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
sub next_document { |
10
|
0
|
|
|
0
|
|
|
my ($self) = @_; |
11
|
|
|
|
|
|
|
|
12
|
0
|
|
|
|
|
|
my $texts_ref = $self->next_document_texts(); |
13
|
|
|
|
|
|
|
|
14
|
0
|
0
|
|
|
|
|
return if !defined $texts_ref; |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
my %sentences = |
17
|
0
|
|
|
|
|
|
map { $_ => [ split /\n/, $texts_ref->{$_} ] } keys %{$texts_ref}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
|
19
|
0
|
|
|
|
|
|
my $n = 0; |
20
|
0
|
|
|
|
|
|
for my $zone_label ( keys %sentences ) { |
21
|
0
|
0
|
|
|
|
|
if ( !$n ) { |
22
|
0
|
|
|
|
|
|
$n = @{ $sentences{$zone_label} }; |
|
0
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
} |
24
|
|
|
|
|
|
|
log_fatal "Different number of lines in aligned documents" |
25
|
0
|
0
|
|
|
|
|
if $n != @{ $sentences{$zone_label} }; |
|
0
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
} |
27
|
|
|
|
|
|
|
|
28
|
0
|
|
|
|
|
|
my $doc = $self->new_document(); |
29
|
0
|
|
|
|
|
|
for my $i ( 0 .. $n - 1 ) { |
30
|
0
|
|
|
|
|
|
my $bundle = $doc->create_bundle(); |
31
|
0
|
|
|
|
|
|
for my $zone_label ( keys %sentences ) { |
32
|
0
|
|
|
|
|
|
my ( $lang, $selector ) = ( $zone_label, $self->selector ); |
33
|
0
|
0
|
|
|
|
|
if ( $zone_label =~ /_/ ) { |
34
|
0
|
|
|
|
|
|
( $lang, $selector ) = split /_/, $zone_label; |
35
|
|
|
|
|
|
|
} |
36
|
0
|
|
|
|
|
|
my $zone = $bundle->create_zone( $lang, $selector ); |
37
|
0
|
|
|
|
|
|
$zone->set_sentence( $sentences{$zone_label}[$i] ); |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
} |
40
|
|
|
|
|
|
|
|
41
|
0
|
|
|
|
|
|
return $doc; |
42
|
|
|
|
|
|
|
} |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
1; |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
__END__ |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=head1 NAME |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
Treex::Block::Read::AlignedSentences |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=head1 VERSION |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
version 2.20151102 |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
=head1 SYNOPSIS |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# in scenarios |
60
|
|
|
|
|
|
|
# Read::AlignedSentences en=en1.txt,en2.txt cs_ref=cs1.txt,cs2.txt |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
=head1 DESCRIPTION |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
Document reader for plain text format, one sentence per line. |
65
|
|
|
|
|
|
|
Aligned sentences (usually in different languages) are loaded at once into respective zones. |
66
|
|
|
|
|
|
|
The sentences are stored into L<bundles|Treex::Core::Bundle> in the |
67
|
|
|
|
|
|
|
L<document|Treex::Core::Document>. |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=head1 ATTRIBUTES |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=over |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=item any parameter in a form of a valid I<zone_label> |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
space or comma separated list of filenames, or C<-> for STDIN. |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=back |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=head1 METHODS |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=over |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=item next_document |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
Loads a document. |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
=back |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
=head1 SEE ALSO |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
L<Treex::Block::Read::BaseAlignedReader> |
92
|
|
|
|
|
|
|
L<Treex::Block::Read::BaseAlignedTextReader> |
93
|
|
|
|
|
|
|
L<Treex::Core::Document> |
94
|
|
|
|
|
|
|
L<Treex::Core::Bundle> |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=head1 AUTHOR |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
Martin Popel |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. |