line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Wiktionary::Parser::Document; |
2
|
|
|
|
|
|
|
|
3
|
3
|
|
|
3
|
|
688
|
use strict; |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
285
|
|
4
|
3
|
|
|
3
|
|
20
|
use warnings; |
|
3
|
|
|
|
|
8
|
|
|
3
|
|
|
|
|
108
|
|
5
|
3
|
|
|
3
|
|
18
|
use Data::Dumper; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
203
|
|
6
|
3
|
|
|
3
|
|
1227
|
use Wiktionary::Parser::Section; |
|
3
|
|
|
|
|
9
|
|
|
3
|
|
|
|
|
116
|
|
7
|
3
|
|
|
3
|
|
1536
|
use Wiktionary::Parser::Section::Translations; |
|
3
|
|
|
|
|
11
|
|
|
3
|
|
|
|
|
124
|
|
8
|
3
|
|
|
3
|
|
2119
|
use Wiktionary::Parser::Section::PartofSpeech; |
|
3
|
|
|
|
|
11
|
|
|
3
|
|
|
|
|
90
|
|
9
|
3
|
|
|
3
|
|
2021
|
use Wiktionary::Parser::Section::Etymology; |
|
3
|
|
|
|
|
10
|
|
|
3
|
|
|
|
|
77
|
|
10
|
3
|
|
|
3
|
|
1683
|
use Wiktionary::Parser::Section::Synonym; |
|
3
|
|
|
|
|
10
|
|
|
3
|
|
|
|
|
91
|
|
11
|
3
|
|
|
3
|
|
20082
|
use Wiktionary::Parser::Section::Hyponym; |
|
3
|
|
|
|
|
9
|
|
|
3
|
|
|
|
|
85
|
|
12
|
3
|
|
|
3
|
|
1811
|
use Wiktionary::Parser::Section::Hypernym; |
|
3
|
|
|
|
|
10
|
|
|
3
|
|
|
|
|
80
|
|
13
|
3
|
|
|
3
|
|
1756
|
use Wiktionary::Parser::Section::Antonym; |
|
3
|
|
|
|
|
10
|
|
|
3
|
|
|
|
|
92
|
|
14
|
3
|
|
|
3
|
|
21
|
use Wiktionary::Parser::Section::Etymology; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
245
|
|
15
|
3
|
|
|
3
|
|
2085
|
use Wiktionary::Parser::Section::Pronunciation; |
|
3
|
|
|
|
|
13
|
|
|
3
|
|
|
|
|
122
|
|
16
|
3
|
|
|
3
|
|
2080
|
use Wiktionary::Parser::Section::DerivedTerms; |
|
3
|
|
|
|
|
10
|
|
|
3
|
|
|
|
|
87
|
|
17
|
3
|
|
|
3
|
|
1978
|
use Wiktionary::Parser::Section::AlternativeForms; |
|
3
|
|
|
|
|
10
|
|
|
3
|
|
|
|
|
96
|
|
18
|
3
|
|
|
3
|
|
1675
|
use Wiktionary::Parser::Section::WikisaurusSection; |
|
3
|
|
|
|
|
8
|
|
|
3
|
|
|
|
|
79
|
|
19
|
3
|
|
|
3
|
|
49
|
use Wiktionary::Parser::Language; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
44410
|
|
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
sub new { |
22
|
21
|
|
|
21
|
0
|
296
|
my $class = shift; |
23
|
21
|
|
|
|
|
76
|
my %args = @_; |
24
|
|
|
|
|
|
|
|
25
|
21
|
|
|
|
|
67
|
my $sections = delete $args{sections}; |
26
|
|
|
|
|
|
|
|
27
|
21
|
|
|
|
|
80
|
my $self = bless \%args, $class; |
28
|
|
|
|
|
|
|
|
29
|
21
|
|
50
|
|
|
122
|
$self->{verbose} ||= 0; |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# if a document is instantiated with existing section objects |
32
|
|
|
|
|
|
|
# add them one by one so that they get indexed |
33
|
21
|
100
|
66
|
|
|
120
|
if ($sections && @$sections) { |
34
|
20
|
|
|
|
|
46
|
for my $section (@$sections) { |
35
|
40
|
|
|
|
|
108
|
$self->add_section($section); |
36
|
|
|
|
|
|
|
} |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
21
|
|
|
|
|
66
|
return $self; |
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
# return the title of this document |
43
|
|
|
|
|
|
|
sub get_title { |
44
|
41
|
|
|
41
|
0
|
47
|
my $self = shift; |
45
|
41
|
|
|
|
|
416
|
return $self->{title}; |
46
|
|
|
|
|
|
|
} |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
# add a section object to the document |
49
|
|
|
|
|
|
|
sub add_section { |
50
|
81
|
|
|
81
|
0
|
133
|
my $self = shift; |
51
|
81
|
|
|
|
|
196
|
my $section = shift; |
52
|
|
|
|
|
|
|
|
53
|
81
|
50
|
|
|
|
381
|
unless ($section->isa('Wiktionary::Parser::Section')) { |
54
|
0
|
|
|
|
|
0
|
die sprintf( |
55
|
|
|
|
|
|
|
'given value (%s) is not of type Wiktionary::Parser::Section', |
56
|
|
|
|
|
|
|
ref($section) |
57
|
|
|
|
|
|
|
); |
58
|
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
# link section to document |
61
|
81
|
100
|
|
|
|
371
|
unless ($section->get_document()) { |
62
|
40
|
|
|
|
|
120
|
$section->set_document($self); |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
|
66
|
81
|
|
|
|
|
283
|
my $section_number = $section->get_section_number(); |
67
|
81
|
|
|
|
|
362
|
$self->{sections}{$section_number} = $section; |
68
|
|
|
|
|
|
|
} |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
# by default return a list of all sections |
71
|
|
|
|
|
|
|
# if title is given, return all sections matching that title |
72
|
|
|
|
|
|
|
# title may be a string or regex |
73
|
|
|
|
|
|
|
sub get_sections { |
74
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
75
|
0
|
|
|
|
|
0
|
my %args = @_; |
76
|
0
|
|
|
|
|
0
|
my $title = $args{title}; |
77
|
|
|
|
|
|
|
|
78
|
0
|
0
|
|
|
|
0
|
if ($title) { |
79
|
0
|
|
|
|
|
0
|
my @sections; |
80
|
0
|
|
|
|
|
0
|
for my $number ($self->get_section_numbers()) { |
81
|
|
|
|
|
|
|
|
82
|
0
|
0
|
|
|
|
0
|
next unless $self->get_section(number => $number)->get_header() =~ m/$title/i; |
83
|
0
|
|
|
|
|
0
|
push @sections, $self->get_section(number => $number); |
84
|
|
|
|
|
|
|
} |
85
|
0
|
|
|
|
|
0
|
return \@sections; |
86
|
|
|
|
|
|
|
} |
87
|
|
|
|
|
|
|
|
88
|
0
|
|
|
|
|
0
|
return $self->{sections}; |
89
|
|
|
|
|
|
|
} |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
# given some criteria to select a set of sections |
92
|
|
|
|
|
|
|
# return a document object encompassing only those sections |
93
|
|
|
|
|
|
|
sub get_sub_document { |
94
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
95
|
0
|
|
|
|
|
0
|
my %args = @_; |
96
|
0
|
|
|
|
|
0
|
my $title = $args{title}; |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
# if no section name pattern was passed in, just return the whole document |
99
|
0
|
0
|
|
|
|
0
|
return $self unless $title; |
100
|
|
|
|
|
|
|
|
101
|
0
|
|
|
|
|
0
|
my $sections = $self->get_sections(title => $title); |
102
|
|
|
|
|
|
|
|
103
|
0
|
0
|
0
|
|
|
0
|
return unless $sections && @$sections; |
104
|
|
|
|
|
|
|
|
105
|
0
|
|
|
|
|
0
|
my @children; |
106
|
0
|
|
|
|
|
0
|
for my $section (@$sections) { |
107
|
0
|
0
|
|
|
|
0
|
push @children, @{$section->get_child_sections() || []}; |
|
0
|
|
|
|
|
0
|
|
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
|
110
|
0
|
|
|
|
|
0
|
push @$sections, @children; |
111
|
0
|
|
|
|
|
0
|
my $sub_document = $self->create_sub_document( |
112
|
|
|
|
|
|
|
sections => $sections, |
113
|
|
|
|
|
|
|
); |
114
|
|
|
|
|
|
|
|
115
|
0
|
|
|
|
|
0
|
return $sub_document; |
116
|
|
|
|
|
|
|
} |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
# return a document object consisting of just the given language section and its children |
120
|
|
|
|
|
|
|
sub get_language_section { |
121
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
122
|
0
|
|
|
|
|
0
|
my %args = @_; |
123
|
0
|
0
|
|
|
|
0
|
my $language = $args{language} or die 'language needs to be specified'; |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
# go through the document top to bottom and return the first matching section |
126
|
0
|
|
|
|
|
0
|
my $section; |
127
|
0
|
|
|
|
|
0
|
for my $number ($self->get_section_numbers()) { |
128
|
0
|
0
|
|
|
|
0
|
next unless $self->get_section(number => $number)->get_header() =~ m/^$language$/i; |
129
|
0
|
|
|
|
|
0
|
$section = $self->get_section(number => $number); |
130
|
0
|
|
|
|
|
0
|
last; |
131
|
|
|
|
|
|
|
} |
132
|
0
|
0
|
|
|
|
0
|
if ($section) { |
133
|
0
|
|
|
|
|
0
|
return $section->get_child_document(); |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
|
136
|
0
|
|
|
|
|
0
|
return; |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
sub get_section { |
140
|
298
|
|
|
298
|
0
|
344
|
my $self = shift; |
141
|
298
|
|
|
|
|
681
|
my %args = @_; |
142
|
298
|
|
|
|
|
419
|
my $number = $args{number}; # lookup section by number |
143
|
|
|
|
|
|
|
|
144
|
298
|
50
|
|
|
|
569
|
if ($number) { |
145
|
298
|
|
|
|
|
1631
|
return $self->{sections}{$number}; |
146
|
|
|
|
|
|
|
} |
147
|
0
|
|
|
|
|
0
|
return; |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
# act as a section factory |
151
|
|
|
|
|
|
|
sub create_section { |
152
|
41
|
|
|
41
|
0
|
58
|
my $self = shift; |
153
|
41
|
|
|
|
|
129
|
my %args = @_; |
154
|
41
|
|
|
|
|
113
|
my $section_number = $args{section_number}; |
155
|
41
|
|
|
|
|
68
|
my $header = $args{header}; |
156
|
|
|
|
|
|
|
|
157
|
41
|
|
|
|
|
45
|
my $section; |
158
|
|
|
|
|
|
|
my $class; |
159
|
41
|
50
|
|
|
|
87
|
if ($self->get_title() =~ m/^Wikisaurus\:/) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
160
|
0
|
|
|
|
|
0
|
$class = 'Wiktionary::Parser::Section::WikisaurusSection'; |
161
|
|
|
|
|
|
|
} elsif ($header =~ m/translation/i) { |
162
|
2
|
|
|
|
|
5
|
$class = 'Wiktionary::Parser::Section::Translations'; |
163
|
|
|
|
|
|
|
} elsif ($header =~ m/etymology/i) { |
164
|
6
|
|
|
|
|
9
|
$class = 'Wiktionary::Parser::Section::Etymology'; |
165
|
|
|
|
|
|
|
} elsif ($header =~ m/synonym/i) { |
166
|
0
|
|
|
|
|
0
|
$class = 'Wiktionary::Parser::Section::Synonym'; |
167
|
|
|
|
|
|
|
} elsif ($header =~ m/hypernym/i) { |
168
|
0
|
|
|
|
|
0
|
$class = 'Wiktionary::Parser::Section::Hypernym'; |
169
|
|
|
|
|
|
|
} elsif ($header =~ m/hyponym/i) { |
170
|
0
|
|
|
|
|
0
|
$class = 'Wiktionary::Parser::Section::Hyponym'; |
171
|
|
|
|
|
|
|
} elsif ($header =~ m/antonym/i) { |
172
|
0
|
|
|
|
|
0
|
$class = 'Wiktionary::Parser::Section::Antonym'; |
173
|
|
|
|
|
|
|
} elsif ($header =~ m/pronunciation/i) { |
174
|
4
|
|
|
|
|
7
|
$class = 'Wiktionary::Parser::Section::Pronunciation'; |
175
|
|
|
|
|
|
|
} elsif ($header =~ m/alternat\w+ form/i) { |
176
|
0
|
|
|
|
|
0
|
$class = 'Wiktionary::Parser::Section::AlternativeForms'; |
177
|
|
|
|
|
|
|
} elsif ($header =~ m/derived\sterm/i) { |
178
|
2
|
|
|
|
|
4
|
$class = 'Wiktionary::Parser::Section::DerivedTerms'; |
179
|
|
|
|
|
|
|
} elsif ($self->is_part_of_speech($header)) { |
180
|
12
|
|
|
|
|
20
|
$class = 'Wiktionary::Parser::Section::PartofSpeech' |
181
|
|
|
|
|
|
|
} else { |
182
|
15
|
|
|
|
|
21
|
$class = 'Wiktionary::Parser::Section'; |
183
|
|
|
|
|
|
|
} |
184
|
|
|
|
|
|
|
|
185
|
41
|
|
|
|
|
238
|
$section = $class->new( |
186
|
|
|
|
|
|
|
section_number => $section_number, |
187
|
|
|
|
|
|
|
header => $header, |
188
|
|
|
|
|
|
|
document => $self, |
189
|
|
|
|
|
|
|
); |
190
|
|
|
|
|
|
|
|
191
|
41
|
|
|
|
|
102
|
$self->add_section($section); |
192
|
|
|
|
|
|
|
|
193
|
41
|
|
|
|
|
177
|
return $section; |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
sub get_table_of_contents { |
197
|
1
|
|
|
1
|
0
|
9
|
my $self = shift; |
198
|
1
|
|
|
|
|
2
|
my @contents; |
199
|
1
|
|
|
|
|
6
|
for my $number ($self->get_section_numbers()) { |
200
|
40
|
|
|
|
|
90
|
push @contents, sprintf("%s,%s",$number,$self->get_section(number => $number)->get_header()); |
201
|
|
|
|
|
|
|
} |
202
|
1
|
|
|
|
|
13
|
return \@contents; |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
# grab all translation sections |
207
|
|
|
|
|
|
|
sub get_translation_sections { |
208
|
20
|
|
|
20
|
0
|
28
|
my $self = shift; |
209
|
20
|
|
|
|
|
74
|
return $self->get_sections_of_type('Wiktionary::Parser::Section::Translations'); |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
# grab all part of speech sections |
213
|
|
|
|
|
|
|
sub get_part_of_speech_sections { |
214
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
215
|
0
|
|
|
|
|
0
|
return $self->get_sections_of_type('Wiktionary::Parser::Section::PartofSpeech'); |
216
|
|
|
|
|
|
|
} |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
sub get_synonym_sections { |
219
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
220
|
0
|
|
|
|
|
0
|
return $self->get_sections_of_type('Wiktionary::Parser::Section::Synonym'); |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
sub get_hypernym_sections { |
224
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
225
|
0
|
|
|
|
|
0
|
return $self->get_sections_of_type('Wiktionary::Parser::Section::Hypernym'); |
226
|
|
|
|
|
|
|
} |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
sub get_hyponym_sections { |
229
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
230
|
0
|
|
|
|
|
0
|
return $self->get_sections_of_type('Wiktionary::Parser::Section::Hyponym'); |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
sub get_pronunciation_sections { |
234
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
235
|
0
|
|
|
|
|
0
|
return $self->get_sections_of_type('Wiktionary::Parser::Section::Pronunciation'); |
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
sub get_sections_of_type { |
240
|
20
|
|
|
20
|
0
|
34
|
my $self = shift; |
241
|
20
|
|
|
|
|
28
|
my $type = shift; |
242
|
20
|
|
|
|
|
28
|
my @sections; |
243
|
20
|
|
|
|
|
62
|
for my $number ($self->get_section_numbers()) { |
244
|
40
|
50
|
|
|
|
112
|
next unless $self->get_section(number => $number)->isa($type); |
245
|
40
|
|
|
|
|
88
|
push @sections, $self->get_section(number => $number); |
246
|
|
|
|
|
|
|
} |
247
|
20
|
|
|
|
|
69
|
return \@sections; |
248
|
|
|
|
|
|
|
} |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
sub get_section_numbers { |
252
|
22
|
|
|
22
|
0
|
34
|
my $self = shift; |
253
|
22
|
50
|
|
|
|
44
|
return (sort {$a cmp $b} grep {$_} keys %{$self->{sections} || {}}); |
|
360
|
|
|
|
|
471
|
|
|
122
|
|
|
|
|
270
|
|
|
22
|
|
|
|
|
135
|
|
254
|
|
|
|
|
|
|
} |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
sub get_word_senses { |
258
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
259
|
0
|
|
|
|
|
0
|
my %args = @_; |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
# follow links to other wiktionary pages |
262
|
0
|
0
|
|
|
|
0
|
my $_meta_follow_links = defined($args{_meta_follow_links}) ? $args{_meta_follow_links} : 1; |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
|
265
|
0
|
|
|
|
|
0
|
my $sections = $self->get_translation_sections(); |
266
|
0
|
|
|
|
|
0
|
my @word_senses; |
267
|
0
|
0
|
|
|
|
0
|
for my $section (@{$sections || []}) { |
|
0
|
|
|
|
|
0
|
|
268
|
0
|
|
|
|
|
0
|
my $word_senses = $section->get_word_senses(); |
269
|
|
|
|
|
|
|
|
270
|
0
|
0
|
|
|
|
0
|
for my $word_sense (@{$word_senses || []}) { |
|
0
|
|
|
|
|
0
|
|
271
|
|
|
|
|
|
|
|
272
|
0
|
0
|
|
|
|
0
|
if (my ($title) = $word_sense->get_word() =~ m/^wiktionary\:(.+)$/i) { |
273
|
0
|
0
|
|
|
|
0
|
if ($_meta_follow_links) { |
274
|
|
|
|
|
|
|
# get titles to linked pages |
275
|
|
|
|
|
|
|
# get translations from the linked document |
276
|
0
|
|
|
|
|
0
|
my $linked_document = $self->get_parser()->get_document(title => $title); |
277
|
|
|
|
|
|
|
# set _meta_follow_links to 0, to ensure we don't end up in |
278
|
|
|
|
|
|
|
# an endless loop if pages link back to each other |
279
|
0
|
|
|
|
|
0
|
my $linked_word_senses = $linked_document->get_word_senses(_meta_follow_links => 0); |
280
|
0
|
|
|
|
|
0
|
push @word_senses, @{$linked_word_senses}; |
|
0
|
|
|
|
|
0
|
|
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
|
283
|
0
|
|
|
|
|
0
|
next; |
284
|
|
|
|
|
|
|
} |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
|
287
|
0
|
|
|
|
|
0
|
push @word_senses, $word_sense->get_word(); |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
} |
290
|
|
|
|
|
|
|
|
291
|
0
|
|
|
|
|
0
|
return \@word_senses; |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
sub get_synonyms { |
295
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
296
|
0
|
|
|
|
|
0
|
my %args = @_; |
297
|
0
|
|
|
|
|
0
|
return $self->get_classifications( |
298
|
|
|
|
|
|
|
class => 'Wiktionary::Parser::Section::Synonym', |
299
|
|
|
|
|
|
|
); |
300
|
|
|
|
|
|
|
} |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
sub get_hyponyms { |
303
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
304
|
0
|
|
|
|
|
0
|
my %args = @_; |
305
|
0
|
|
|
|
|
0
|
return $self->get_classifications( |
306
|
|
|
|
|
|
|
class => 'Wiktionary::Parser::Section::Hyponym', |
307
|
|
|
|
|
|
|
); |
308
|
|
|
|
|
|
|
} |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
sub get_hypernyms { |
311
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
312
|
0
|
|
|
|
|
0
|
my %args = @_; |
313
|
0
|
|
|
|
|
0
|
return $self->get_classifications( |
314
|
|
|
|
|
|
|
class => 'Wiktionary::Parser::Section::Hypernym', |
315
|
|
|
|
|
|
|
); |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
sub get_antonyms { |
319
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
320
|
0
|
|
|
|
|
0
|
my %args = @_; |
321
|
0
|
|
|
|
|
0
|
return $self->get_classifications( |
322
|
|
|
|
|
|
|
class => 'Wiktionary::Parser::Section::Antonym', |
323
|
|
|
|
|
|
|
); |
324
|
|
|
|
|
|
|
} |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
sub get_classifications { |
328
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
329
|
0
|
|
|
|
|
0
|
my %args = @_; |
330
|
0
|
|
|
|
|
0
|
my $class = $args{class}; |
331
|
|
|
|
|
|
|
|
332
|
0
|
0
|
|
|
|
0
|
if ($self->{"__get_${class}__"}) { |
333
|
0
|
|
|
|
|
0
|
return $self->{"__get_${class}__"}; |
334
|
|
|
|
|
|
|
} |
335
|
|
|
|
|
|
|
|
336
|
0
|
|
|
|
|
0
|
my $sections = $self->get_sections_of_type($class); |
337
|
0
|
|
|
|
|
0
|
my %x_nyms; |
338
|
|
|
|
|
|
|
|
339
|
0
|
0
|
|
|
|
0
|
for my $section (@{$sections || []}) { |
|
0
|
|
|
|
|
0
|
|
340
|
0
|
|
|
|
|
0
|
my $x_nyms = $section->get_groups(); |
341
|
0
|
0
|
|
|
|
0
|
for my $x_nym (@{$x_nyms || []}) { |
|
0
|
|
|
|
|
0
|
|
342
|
|
|
|
|
|
|
|
343
|
0
|
|
|
|
|
0
|
my $lang = $x_nym->{language}; |
344
|
0
|
|
|
|
|
0
|
my $sense = $x_nym->{sense}; |
345
|
|
|
|
|
|
|
|
346
|
0
|
0
|
|
|
|
0
|
my @lexemes = @{$x_nym->{lexemes} || []}; |
|
0
|
|
|
|
|
0
|
|
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
|
349
|
0
|
|
|
|
|
0
|
my @full_word_list; |
350
|
0
|
|
|
|
|
0
|
while (my $lexeme = shift @lexemes) { |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
# look for links to wikisaurus entries |
354
|
|
|
|
|
|
|
# and include content from those documents |
355
|
|
|
|
|
|
|
|
356
|
0
|
0
|
|
|
|
0
|
if ($lexeme =~ m/^Wikisaurus:/) { |
357
|
0
|
|
|
|
|
0
|
my $wikisaurus_document = $self->get_linked_document(title => $lexeme); |
358
|
0
|
|
|
|
|
0
|
my $ws_sections = $wikisaurus_document->get_sections(title => $section->get_header()); |
359
|
|
|
|
|
|
|
|
360
|
0
|
0
|
|
|
|
0
|
for my $ws_section (@{$ws_sections || []}) { |
|
0
|
|
|
|
|
0
|
|
361
|
0
|
|
|
|
|
0
|
my $word_list = $ws_section->get_words(); |
362
|
0
|
0
|
|
|
|
0
|
for my $word (@{$word_list || []}) { |
|
0
|
|
|
|
|
0
|
|
363
|
0
|
|
|
|
|
0
|
push @full_word_list, $word->{word}; |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
} |
366
|
|
|
|
|
|
|
} else { |
367
|
0
|
|
|
|
|
0
|
push @full_word_list, $lexeme; |
368
|
|
|
|
|
|
|
} |
369
|
|
|
|
|
|
|
} |
370
|
|
|
|
|
|
|
|
371
|
0
|
|
|
|
|
0
|
push @{$x_nyms{$lang}{sense}{$sense}}, @full_word_list; |
|
0
|
|
|
|
|
0
|
|
372
|
0
|
|
0
|
|
|
0
|
$x_nyms{$lang}{language} ||= $self->get_language_mapper()->code2language($lang); |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
} |
375
|
|
|
|
|
|
|
} |
376
|
|
|
|
|
|
|
|
377
|
0
|
|
|
|
|
0
|
$self->{"__get_${class}__"} = \%x_nyms; |
378
|
|
|
|
|
|
|
|
379
|
0
|
|
|
|
|
0
|
return \%x_nyms; |
380
|
|
|
|
|
|
|
} |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
# return lists of words from the Derived Terms sections broken down by language |
383
|
|
|
|
|
|
|
sub get_derived_terms { |
384
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
385
|
0
|
|
|
|
|
0
|
my $class = 'Wiktionary::Parser::Section::DerivedTerms'; |
386
|
0
|
|
|
|
|
0
|
my $sections = $self->get_sections_of_type($class); |
387
|
0
|
|
|
|
|
0
|
my %terms; |
388
|
0
|
0
|
|
|
|
0
|
for my $section (@{$sections || []}) { |
|
0
|
|
|
|
|
0
|
|
389
|
0
|
|
|
|
|
0
|
my $hr = $section->get_derived_terms(); |
390
|
0
|
|
|
|
|
0
|
for my $language (keys %{$hr}) { |
|
0
|
|
|
|
|
0
|
|
391
|
0
|
0
|
|
|
|
0
|
push @{$terms{$language}}, @{$hr->{$language} || []} |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
392
|
|
|
|
|
|
|
} |
393
|
|
|
|
|
|
|
} |
394
|
0
|
|
|
|
|
0
|
return \%terms; |
395
|
|
|
|
|
|
|
} |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
# return all pronunciation metadata broken down by language |
398
|
|
|
|
|
|
|
sub get_pronunciations { |
399
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
400
|
0
|
|
|
|
|
0
|
my %args = @_; |
401
|
0
|
|
|
|
|
0
|
my $class = 'Wiktionary::Parser::Section::Pronunciation'; |
402
|
|
|
|
|
|
|
|
403
|
0
|
|
|
|
|
0
|
my $sections = $self->get_sections_of_type($class); |
404
|
0
|
|
|
|
|
0
|
my %meta; |
405
|
|
|
|
|
|
|
my %seen; |
406
|
|
|
|
|
|
|
|
407
|
0
|
0
|
|
|
|
0
|
for my $section (@{$sections || []}) { |
|
0
|
|
|
|
|
0
|
|
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
{ |
410
|
0
|
|
|
|
|
0
|
my $hr = $section->get_pronunciations(); |
|
0
|
|
|
|
|
0
|
|
411
|
0
|
|
|
|
|
0
|
for my $lang (keys %{$hr}) { |
|
0
|
|
|
|
|
0
|
|
412
|
0
|
|
|
|
|
0
|
push @{$meta{$lang}{pronunciation}}, @{$hr->{$lang}}; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
413
|
0
|
|
0
|
|
|
0
|
$meta{$lang}{language} ||= $self->get_language_mapper()->code2language($lang); |
414
|
|
|
|
|
|
|
} |
415
|
|
|
|
|
|
|
} |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
{ |
418
|
0
|
|
|
|
|
0
|
my $hr = $section->get_audio(); |
|
0
|
|
|
|
|
0
|
|
419
|
0
|
|
|
|
|
0
|
for my $lang (keys %{$hr}) { |
|
0
|
|
|
|
|
0
|
|
420
|
|
|
|
|
|
|
# remove duplicates |
421
|
0
|
|
|
|
|
0
|
push @{$meta{$lang}{audio}}, grep {!$seen{audio}{$lang}{ $_->{file} }++} @{$hr->{$lang}}; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
422
|
0
|
|
0
|
|
|
0
|
$meta{$lang}{language} ||= $self->get_language_mapper()->code2language($lang); |
423
|
|
|
|
|
|
|
} |
424
|
|
|
|
|
|
|
} |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
{ |
427
|
0
|
|
|
|
|
0
|
my $hr = $section->get_rhymes(); |
|
0
|
|
|
|
|
0
|
|
428
|
0
|
|
|
|
|
0
|
for my $lang (keys %{$hr}) { |
|
0
|
|
|
|
|
0
|
|
429
|
0
|
|
|
|
|
0
|
push @{$meta{$lang}{rhyme}}, @{$hr->{$lang}}; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
430
|
0
|
|
0
|
|
|
0
|
$meta{$lang}{language} ||= $self->get_language_mapper()->code2language($lang); |
431
|
|
|
|
|
|
|
} |
432
|
|
|
|
|
|
|
} |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
{ |
435
|
0
|
|
|
|
|
0
|
my $hr = $section->get_homophones(); |
|
0
|
|
|
|
|
0
|
|
436
|
0
|
|
|
|
|
0
|
for my $lang (keys %{$hr}) { |
|
0
|
|
|
|
|
0
|
|
437
|
0
|
|
|
|
|
0
|
push @{$meta{$lang}{homophone}}, @{$hr->{$lang}}; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
438
|
0
|
|
0
|
|
|
0
|
$meta{$lang}{language} ||= $self->get_language_mapper()->code2language($lang); |
439
|
|
|
|
|
|
|
} |
440
|
|
|
|
|
|
|
} |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
{ |
443
|
0
|
|
|
|
|
0
|
my $hr = $section->get_hyphenations(); |
|
0
|
|
|
|
|
0
|
|
444
|
0
|
|
|
|
|
0
|
for my $lang (keys %{$hr}) { |
|
0
|
|
|
|
|
0
|
|
445
|
0
|
|
|
|
|
0
|
push @{$meta{$lang}{hyphenation}}, @{$hr->{$lang}}; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
446
|
0
|
|
0
|
|
|
0
|
$meta{$lang}{language} ||= $self->get_language_mapper()->code2language($lang); |
447
|
|
|
|
|
|
|
} |
448
|
|
|
|
|
|
|
} |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
} |
452
|
|
|
|
|
|
|
|
453
|
0
|
|
|
|
|
0
|
return \%meta; |
454
|
|
|
|
|
|
|
} |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
sub get_parts_of_speech { |
459
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
460
|
0
|
|
|
|
|
0
|
my %args = @_; |
461
|
|
|
|
|
|
|
|
462
|
0
|
0
|
|
|
|
0
|
if ($self->{__get_parts_of_speech__}) { |
463
|
0
|
|
|
|
|
0
|
return $self->{__get_parts_of_speech__}; |
464
|
|
|
|
|
|
|
} |
465
|
|
|
|
|
|
|
|
466
|
0
|
|
|
|
|
0
|
my $sections = $self->get_part_of_speech_sections(); |
467
|
|
|
|
|
|
|
|
468
|
0
|
|
|
|
|
0
|
my %parts_of_speech; |
469
|
0
|
0
|
|
|
|
0
|
for my $section (@{$sections || []}) { |
|
0
|
|
|
|
|
0
|
|
470
|
0
|
|
|
|
|
0
|
my $pos = $section->get_part_of_speech(); |
471
|
0
|
|
|
|
|
0
|
my $lang_code = $section->get_language_code(); |
472
|
0
|
0
|
0
|
|
|
0
|
next unless $pos && $lang_code; |
473
|
0
|
|
|
|
|
0
|
push @{$parts_of_speech{$lang_code}{part_of_speech}}, $pos; |
|
0
|
|
|
|
|
0
|
|
474
|
0
|
|
0
|
|
|
0
|
$parts_of_speech{$lang_code}{language} ||= get_language_mapper()->code2language($lang_code); |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
} |
477
|
|
|
|
|
|
|
|
478
|
0
|
|
|
|
|
0
|
$self->{__get_parts_of_speech__} = \%parts_of_speech; |
479
|
0
|
|
|
|
|
0
|
return \%parts_of_speech; |
480
|
|
|
|
|
|
|
} |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
sub get_translations { |
484
|
20
|
|
|
20
|
0
|
203
|
my $self = shift; |
485
|
20
|
|
|
|
|
55
|
my %args = @_; |
486
|
20
|
100
|
|
|
|
71
|
my $include_transliterations = defined($args{include_transliterations}) ? $args{include_transliterations} : 1; |
487
|
20
|
50
|
|
|
|
56
|
my $include_alternate_translations = defined($args{include_alternate_translations}) ? $args{include_alternate_translations} : 1; |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
# follow links to other wiktionary pages |
490
|
20
|
50
|
|
|
|
60
|
my $_meta_follow_links = defined($args{_meta_follow_links}) ? $args{_meta_follow_links} : 1; |
491
|
|
|
|
|
|
|
|
492
|
20
|
|
|
|
|
81
|
my $sections = $self->get_translation_sections(); |
493
|
20
|
|
|
|
|
32
|
my @word_senses; |
494
|
|
|
|
|
|
|
my %translations; |
495
|
|
|
|
|
|
|
|
496
|
20
|
50
|
|
|
|
28
|
for my $section (@{$sections || []}) { |
|
20
|
|
|
|
|
61
|
|
497
|
40
|
|
|
|
|
148
|
my $word_senses = $section->get_word_senses(); |
498
|
|
|
|
|
|
|
|
499
|
40
|
50
|
|
|
|
80
|
for my $word_sense (@{$word_senses || []}) { |
|
40
|
|
|
|
|
142
|
|
500
|
20
|
|
|
|
|
75
|
my $word_sense_lexeme = $word_sense->get_word(); |
501
|
20
|
|
|
|
|
77
|
my $translations = $word_sense->get_translations(); |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
# if we have a link to another page, download that page and merge its translation data |
504
|
20
|
50
|
|
|
|
62
|
if (my ($title) = $word_sense->get_word() =~ m/^wiktionary\:(.+)$/i) { |
505
|
|
|
|
|
|
|
|
506
|
0
|
0
|
|
|
|
0
|
if ($_meta_follow_links) { |
507
|
|
|
|
|
|
|
# get titles to linked pages |
508
|
|
|
|
|
|
|
# get translations from the linked document |
509
|
|
|
|
|
|
|
|
510
|
0
|
|
|
|
|
0
|
my $linked_document = $self->get_parser()->get_document(title => $title); |
511
|
|
|
|
|
|
|
# set _meta_follow_links to 0, to ensure we don't end up in |
512
|
|
|
|
|
|
|
# an endless loop if pages link back to each other |
513
|
|
|
|
|
|
|
|
514
|
0
|
|
|
|
|
0
|
my $linked_translations = {}; |
515
|
0
|
0
|
|
|
|
0
|
if ($linked_document) { |
516
|
0
|
|
|
|
|
0
|
$linked_translations = $linked_document->get_translations(_meta_follow_links => 0); |
517
|
|
|
|
|
|
|
} |
518
|
|
|
|
|
|
|
|
519
|
0
|
|
|
|
|
0
|
for my $linked_word_sense (keys %$linked_translations) { |
520
|
0
|
0
|
|
|
|
0
|
for my $linked_lang_code (keys %{ $linked_translations->{$linked_word_sense} || {} }) { |
|
0
|
|
|
|
|
0
|
|
521
|
0
|
|
|
|
|
0
|
$translations{$linked_word_sense}{$linked_lang_code}{language} = $linked_translations->{$linked_word_sense}{$linked_lang_code}{language}; |
522
|
0
|
0
|
|
|
|
0
|
push @{ $translations{$linked_word_sense}{$linked_lang_code}{translations} }, @{ $linked_translations->{$linked_word_sense}{$linked_lang_code}{translations} || []}; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
523
|
|
|
|
|
|
|
} |
524
|
|
|
|
|
|
|
} |
525
|
|
|
|
|
|
|
} |
526
|
|
|
|
|
|
|
|
527
|
0
|
|
|
|
|
0
|
next; |
528
|
|
|
|
|
|
|
} |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
|
532
|
20
|
50
|
|
|
|
44
|
for my $language (keys %{$translations || {}}) { |
|
20
|
|
|
|
|
84
|
|
533
|
|
|
|
|
|
|
|
534
|
20
|
|
|
|
|
92
|
my $language_code = $self->get_language_mapper()->language2code($language); |
535
|
20
|
|
|
|
|
65
|
my $normalized_language = $self->get_language_mapper()->code2language($language_code); |
536
|
|
|
|
|
|
|
|
537
|
20
|
|
|
|
|
48
|
my $lexemes = $translations->{$language}; |
538
|
20
|
|
|
|
|
23
|
my %seen; |
539
|
20
|
|
|
|
|
29
|
for my $lexeme (@{$lexemes}) { |
|
20
|
|
|
|
|
44
|
|
540
|
37
|
|
|
|
|
147
|
my @translations = $lexeme->get_translations(); |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
# if the lexeme has a language code, use that to determine language |
543
|
37
|
|
|
|
|
138
|
my $tagged_language_code = $lexeme->get_language_code(); |
544
|
37
|
100
|
|
|
|
89
|
if ($tagged_language_code) { |
545
|
25
|
|
66
|
|
|
169
|
$normalized_language = $self->get_language_mapper()->code2language($tagged_language_code) || $normalized_language; |
546
|
25
|
|
33
|
|
|
66
|
$language_code = $self->get_language_mapper()->language2code($normalized_language) || $tagged_language_code; |
547
|
|
|
|
|
|
|
} |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
|
550
|
37
|
|
|
|
|
222
|
my $part_of_speech = $section->get_part_of_speech(); |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
|
553
|
37
|
100
|
100
|
|
|
192
|
if ($include_transliterations && $lexeme->get_transliteration()) { |
554
|
14
|
|
|
|
|
46
|
push @translations, $lexeme->get_transliteration(); |
555
|
|
|
|
|
|
|
} |
556
|
37
|
50
|
33
|
|
|
181
|
if ($include_alternate_translations && $lexeme->get_alternate()) { |
557
|
0
|
|
|
|
|
0
|
push @translations, $lexeme->get_alternate(); |
558
|
|
|
|
|
|
|
} |
559
|
|
|
|
|
|
|
|
560
|
37
|
|
|
|
|
160
|
for my $lex (sort @translations) { |
561
|
|
|
|
|
|
|
|
562
|
107
|
50
|
|
|
|
431
|
next unless defined $lex; |
563
|
107
|
100
|
|
|
|
120
|
unless (grep {$_ eq $lex} @{$translations{$word_sense_lexeme}{$language_code}{translations} || []}) { |
|
270
|
100
|
|
|
|
645
|
|
|
107
|
|
|
|
|
456
|
|
564
|
48
|
|
|
|
|
51
|
push @{$translations{$word_sense_lexeme}{$language_code}{translations}},$lex; |
|
48
|
|
|
|
|
142
|
|
565
|
48
|
|
66
|
|
|
189
|
$translations{$word_sense_lexeme}{$language_code}{language} ||= $normalized_language; |
566
|
48
|
|
66
|
|
|
276
|
$translations{$word_sense_lexeme}{$language_code}{part_of_speech} ||= $part_of_speech; |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
} |
569
|
|
|
|
|
|
|
} |
570
|
|
|
|
|
|
|
} |
571
|
|
|
|
|
|
|
} |
572
|
|
|
|
|
|
|
} |
573
|
|
|
|
|
|
|
} |
574
|
|
|
|
|
|
|
|
575
|
20
|
|
|
|
|
155
|
return \%translations; |
576
|
|
|
|
|
|
|
} |
577
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
sub is_part_of_speech { |
579
|
67
|
|
|
67
|
0
|
90
|
my $self = shift; |
580
|
67
|
|
|
|
|
86
|
my $header = shift; |
581
|
67
|
100
|
|
|
|
114
|
return 1 if grep { $header =~ m/^$_$/i } qw( |
|
737
|
|
|
|
|
7548
|
|
582
|
|
|
|
|
|
|
noun |
583
|
|
|
|
|
|
|
verb |
584
|
|
|
|
|
|
|
adjective |
585
|
|
|
|
|
|
|
adverb |
586
|
|
|
|
|
|
|
pronoun |
587
|
|
|
|
|
|
|
preposition |
588
|
|
|
|
|
|
|
article |
589
|
|
|
|
|
|
|
conjunction |
590
|
|
|
|
|
|
|
determiner |
591
|
|
|
|
|
|
|
interjection |
592
|
|
|
|
|
|
|
symbol |
593
|
|
|
|
|
|
|
); |
594
|
|
|
|
|
|
|
|
595
|
35
|
|
|
|
|
121
|
return 0; |
596
|
|
|
|
|
|
|
} |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
# call the parser to download a page for a term in this document |
600
|
|
|
|
|
|
|
sub get_linked_document { |
601
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
602
|
0
|
|
|
|
|
0
|
my %args = @_; |
603
|
0
|
|
|
|
|
0
|
my $title = $args{title}; |
604
|
|
|
|
|
|
|
|
605
|
0
|
|
0
|
|
|
0
|
$self->{linked_documents} ||= {}; |
606
|
0
|
0
|
|
|
|
0
|
if ($self->{linked_documents}{$title}) { |
607
|
0
|
|
|
|
|
0
|
return $self->{linked_documents}{$title}; |
608
|
|
|
|
|
|
|
} |
609
|
|
|
|
|
|
|
|
610
|
0
|
|
|
|
|
0
|
$self->debug("Getting Linked Page: $title"); |
611
|
|
|
|
|
|
|
|
612
|
0
|
|
|
|
|
0
|
my $parser = $self->get_parser(); |
613
|
0
|
0
|
|
|
|
0
|
return unless $parser; |
614
|
|
|
|
|
|
|
|
615
|
0
|
|
|
|
|
0
|
$self->{linked_documents}{$title} = $parser->get_document(title => $title); |
616
|
0
|
|
|
|
|
0
|
return $self->{linked_documents}{$title}; |
617
|
|
|
|
|
|
|
} |
618
|
|
|
|
|
|
|
|
619
|
|
|
|
|
|
|
sub get_parser { |
620
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
621
|
0
|
|
|
|
|
0
|
return $self->{parser}; |
622
|
|
|
|
|
|
|
} |
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
sub get_language_mapper { |
625
|
90
|
|
|
90
|
0
|
115
|
my $self = shift; |
626
|
90
|
|
66
|
|
|
565
|
return $self->{language_map} ||= Wiktionary::Parser::Language->new(); |
627
|
|
|
|
|
|
|
} |
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
# create a document object with a subset of sections |
631
|
|
|
|
|
|
|
sub create_sub_document { |
632
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
633
|
0
|
|
|
|
|
0
|
my %args = @_; |
634
|
0
|
0
|
|
|
|
0
|
my $sections = $args{sections} or die 'sections must be defined'; |
635
|
0
|
|
|
|
|
0
|
return __PACKAGE__->new( sections => $sections, title => $self->get_title() ); |
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
} |
638
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
sub debug { |
640
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
641
|
0
|
0
|
|
|
|
0
|
return unless $self->{verbose}; |
642
|
0
|
|
|
|
|
0
|
local $\ = "\n"; |
643
|
0
|
|
|
|
|
0
|
local $, = ' '; |
644
|
0
|
|
|
|
|
0
|
print 'DEBUG:',@_; |
645
|
|
|
|
|
|
|
} |
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
sub add_category { |
649
|
17
|
|
|
17
|
0
|
51
|
my $self = shift; |
650
|
17
|
|
|
|
|
51
|
my %args = @_; |
651
|
17
|
|
|
|
|
28
|
my $category = $args{category}; |
652
|
17
|
|
|
|
|
26
|
push @{$self->{categories}},$category; |
|
17
|
|
|
|
|
67
|
|
653
|
|
|
|
|
|
|
} |
654
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
sub add_language_link { |
656
|
51
|
|
|
51
|
0
|
62
|
my $self = shift; |
657
|
51
|
|
|
|
|
129
|
my %args = @_; |
658
|
51
|
|
|
|
|
67
|
my $tag = $args{tag}; |
659
|
51
|
|
|
|
|
48
|
push @{$self->{language_links}},$tag; |
|
51
|
|
|
|
|
212
|
|
660
|
|
|
|
|
|
|
} |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
sub get_language_links { |
663
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
664
|
0
|
|
|
|
|
0
|
return $self->{language_links}; |
665
|
|
|
|
|
|
|
} |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
sub get_categories { |
668
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
669
|
0
|
|
|
|
|
0
|
return $self->{categories}; |
670
|
|
|
|
|
|
|
} |
671
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
# get the languages represented by sections in this document |
673
|
|
|
|
|
|
|
sub get_section_languages { |
674
|
1
|
|
|
1
|
0
|
1456
|
my $self = shift; |
675
|
1
|
|
|
|
|
2
|
my @languages; |
676
|
|
|
|
|
|
|
# get top level sections |
677
|
1
|
|
|
|
|
4
|
for my $number ($self->get_section_numbers()) { |
678
|
40
|
100
|
|
|
|
88
|
next if $number =~ m/\./; |
679
|
7
|
|
|
|
|
18
|
push @languages, $self->get_section(number => $number)->get_header(); |
680
|
|
|
|
|
|
|
} |
681
|
1
|
|
|
|
|
12
|
return \@languages; |
682
|
|
|
|
|
|
|
} |
683
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
1; |