| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Wiktionary::Parser::Section; |
|
2
|
|
|
|
|
|
|
|
|
3
|
3
|
|
|
3
|
|
18
|
use strict; |
|
|
3
|
|
|
|
|
7
|
|
|
|
3
|
|
|
|
|
111
|
|
|
4
|
3
|
|
|
3
|
|
19
|
use warnings; |
|
|
3
|
|
|
|
|
16
|
|
|
|
3
|
|
|
|
|
80
|
|
|
5
|
3
|
|
|
3
|
|
17
|
use Data::Dumper; |
|
|
3
|
|
|
|
|
7
|
|
|
|
3
|
|
|
|
|
166
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
3
|
|
|
3
|
|
1693
|
use Wiktionary::Parser::TemplateParser; |
|
|
3
|
|
|
|
|
7
|
|
|
|
3
|
|
|
|
|
82
|
|
|
8
|
3
|
|
|
3
|
|
1818
|
use Wiktionary::Parser::Language; |
|
|
3
|
|
|
|
|
12
|
|
|
|
3
|
|
|
|
|
14586
|
|
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
sub new { |
|
11
|
81
|
|
|
81
|
0
|
155
|
my $class = shift; |
|
12
|
81
|
|
|
|
|
246
|
my %args = @_; |
|
13
|
|
|
|
|
|
|
|
|
14
|
81
|
50
|
|
|
|
239
|
die 'section_number not defined' unless defined $args{section_number}; |
|
15
|
81
|
50
|
|
|
|
323
|
$args{header} = lc($args{header}) if $args{header}; |
|
16
|
|
|
|
|
|
|
|
|
17
|
81
|
|
|
|
|
314
|
my $self = bless \%args, $class; |
|
18
|
|
|
|
|
|
|
|
|
19
|
81
|
|
|
|
|
387
|
$self->{template_parser} = Wiktionary::Parser::TemplateParser->new(); |
|
20
|
|
|
|
|
|
|
|
|
21
|
81
|
|
|
|
|
324
|
return $self; |
|
22
|
|
|
|
|
|
|
} |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
sub get_section_number { |
|
25
|
331
|
|
|
331
|
0
|
376
|
my $self = shift; |
|
26
|
331
|
|
|
|
|
888
|
return $self->{section_number}; |
|
27
|
|
|
|
|
|
|
} |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
sub get_header { |
|
30
|
198
|
|
|
198
|
0
|
279
|
my $self = shift; |
|
31
|
198
|
|
|
|
|
747
|
return lc $self->{header}; |
|
32
|
|
|
|
|
|
|
} |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
sub set_header { |
|
35
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
36
|
0
|
|
|
|
|
0
|
$self->{header} = lc shift; |
|
37
|
|
|
|
|
|
|
} |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
# add a line of content to this section |
|
40
|
|
|
|
|
|
|
sub add_content { |
|
41
|
40
|
|
|
40
|
0
|
54
|
my $self = shift; |
|
42
|
40
|
|
|
|
|
46
|
my $line = shift; |
|
43
|
|
|
|
|
|
|
|
|
44
|
40
|
|
|
|
|
35
|
push @{$self->{content}}, $line; |
|
|
40
|
|
|
|
|
145
|
|
|
45
|
|
|
|
|
|
|
} |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
# return an arrayref of lines of text from this section |
|
48
|
|
|
|
|
|
|
sub get_content { |
|
49
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
50
|
0
|
|
|
|
|
0
|
return $self->{content}; |
|
51
|
|
|
|
|
|
|
} |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
sub get_document { |
|
54
|
292
|
|
|
292
|
0
|
366
|
my $self = shift; |
|
55
|
292
|
|
|
|
|
1205
|
return $self->{document}; |
|
56
|
|
|
|
|
|
|
} |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
sub set_document { |
|
59
|
40
|
|
|
40
|
0
|
63
|
my $self = shift; |
|
60
|
40
|
|
|
|
|
143
|
my $doc = shift; |
|
61
|
40
|
|
|
|
|
233
|
return $self->{document} = $doc; |
|
62
|
|
|
|
|
|
|
} |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# return the parent section |
|
66
|
|
|
|
|
|
|
# e.g. if this is section 1.2.1 |
|
67
|
|
|
|
|
|
|
# return section 1.2 |
|
68
|
|
|
|
|
|
|
sub get_parent_section { |
|
69
|
250
|
|
|
250
|
0
|
299
|
my $self = shift; |
|
70
|
250
|
|
|
|
|
457
|
my $num = $self->get_section_number(); |
|
71
|
250
|
100
|
|
|
|
728
|
if ($num =~ m/\./) { |
|
72
|
171
|
|
|
|
|
679
|
$num =~ s/\.\d+$//; |
|
73
|
171
|
|
|
|
|
336
|
$num =~ s/\.0*$//; # remove trailing zero's - if sections were added too deep e.g. section ===== under === |
|
74
|
|
|
|
|
|
|
|
|
75
|
171
|
|
|
|
|
350
|
return $self->get_document()->get_section(number => $num); |
|
76
|
|
|
|
|
|
|
} |
|
77
|
79
|
|
|
|
|
208
|
return; |
|
78
|
|
|
|
|
|
|
} |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
# return a list of sections below this one |
|
82
|
|
|
|
|
|
|
sub get_child_sections { |
|
83
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
84
|
0
|
|
|
|
|
0
|
my $section_number = $self->get_section_number(); |
|
85
|
0
|
|
|
|
|
0
|
my @section_numbers = $self->get_document()->get_section_numbers(); |
|
86
|
|
|
|
|
|
|
|
|
87
|
0
|
|
|
|
|
0
|
my @children = ($self); |
|
88
|
0
|
|
|
|
|
0
|
for my $num (@section_numbers) { |
|
89
|
0
|
0
|
|
|
|
0
|
next unless $num =~ m/^$section_number\.\d+/; |
|
90
|
0
|
0
|
|
|
|
0
|
next if $num eq $section_number; |
|
91
|
0
|
|
|
|
|
0
|
push @children, $self->get_document()->get_section(number => $num); |
|
92
|
|
|
|
|
|
|
} |
|
93
|
0
|
|
|
|
|
0
|
return \@children; |
|
94
|
|
|
|
|
|
|
} |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
# return a document object containing only the child sections of this section |
|
97
|
|
|
|
|
|
|
sub get_child_document { |
|
98
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
99
|
0
|
|
|
|
|
0
|
my $children = $self->get_child_sections(); |
|
100
|
|
|
|
|
|
|
|
|
101
|
0
|
0
|
0
|
|
|
0
|
return unless $children && @$children; |
|
102
|
0
|
|
|
|
|
0
|
return $self->get_document()->create_sub_document( |
|
103
|
|
|
|
|
|
|
sections => $children, |
|
104
|
|
|
|
|
|
|
); |
|
105
|
|
|
|
|
|
|
} |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
# returns the topmost section in the hierarchy |
|
109
|
|
|
|
|
|
|
sub _get_parent_language_section { |
|
110
|
79
|
|
|
79
|
|
104
|
my $self = shift; |
|
111
|
79
|
|
|
|
|
155
|
my $sections = $self->get_ancestor_sections(); |
|
112
|
79
|
50
|
33
|
|
|
363
|
if ($sections && @$sections) { |
|
113
|
79
|
|
|
|
|
206
|
return $sections->[-1]; |
|
114
|
|
|
|
|
|
|
} |
|
115
|
0
|
|
|
|
|
0
|
return $self; |
|
116
|
|
|
|
|
|
|
} |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
sub _get_parent_part_of_speech_section { |
|
119
|
20
|
|
|
20
|
|
34
|
my $self = shift; |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
# if the current section is a part of speech section, return self |
|
122
|
20
|
50
|
|
|
|
55
|
if ($self->get_document()->is_part_of_speech( $self->get_header() )) { |
|
123
|
0
|
|
|
|
|
0
|
return $self; |
|
124
|
|
|
|
|
|
|
} |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
# otherwise look through parent sections for a match |
|
127
|
20
|
|
|
|
|
81
|
my $sections = $self->get_ancestor_sections(); |
|
128
|
20
|
50
|
33
|
|
|
124
|
if ($sections && @$sections) { |
|
129
|
20
|
|
|
|
|
45
|
for my $section (@$sections) { |
|
130
|
|
|
|
|
|
|
next unless |
|
131
|
20
|
50
|
|
|
|
48
|
$self->get_document()->is_part_of_speech( |
|
132
|
|
|
|
|
|
|
$section->get_header() |
|
133
|
|
|
|
|
|
|
); |
|
134
|
20
|
|
|
|
|
66
|
return $section; |
|
135
|
|
|
|
|
|
|
} |
|
136
|
|
|
|
|
|
|
} |
|
137
|
0
|
|
|
|
|
0
|
return; |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
} |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
# get all parent sections up to the top level |
|
142
|
|
|
|
|
|
|
sub get_ancestor_sections { |
|
143
|
99
|
|
|
99
|
0
|
156
|
my $self = shift; |
|
144
|
99
|
|
|
|
|
104
|
my @ancestors; |
|
145
|
99
|
|
|
|
|
155
|
my $section = $self; |
|
146
|
99
|
|
|
|
|
242
|
while (my $parent = $section->get_parent_section()) { |
|
147
|
151
|
|
|
|
|
241
|
push @ancestors, $parent; |
|
148
|
151
|
|
|
|
|
394
|
$section = $parent; |
|
149
|
|
|
|
|
|
|
} |
|
150
|
|
|
|
|
|
|
|
|
151
|
99
|
|
|
|
|
214
|
return \@ancestors; |
|
152
|
|
|
|
|
|
|
} |
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
# get the language from the parent section at the top of the section hierarchy |
|
155
|
|
|
|
|
|
|
sub get_language { |
|
156
|
79
|
|
|
79
|
0
|
99
|
my $self = shift; |
|
157
|
79
|
|
|
|
|
161
|
my $parent = $self->_get_parent_language_section(); |
|
158
|
79
|
50
|
|
|
|
164
|
return unless $parent; |
|
159
|
79
|
|
|
|
|
161
|
my $language_name = $parent->get_header(); |
|
160
|
79
|
|
|
|
|
312
|
my $lang = Wiktionary::Parser::Language->new(); |
|
161
|
79
|
|
|
|
|
236
|
my $code = $lang->language2code($language_name); |
|
162
|
79
|
|
|
|
|
331
|
return $code; |
|
163
|
|
|
|
|
|
|
} |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
# get the part of speech of the current section based on the part of speech section above this in the hierarchy |
|
166
|
|
|
|
|
|
|
sub get_part_of_speech { |
|
167
|
37
|
|
|
37
|
0
|
91
|
my $self = shift; |
|
168
|
37
|
100
|
|
|
|
212
|
if ($self->{__part_of_speech__}) { |
|
169
|
17
|
|
|
|
|
97
|
return $self->{__part_of_speech__}; |
|
170
|
|
|
|
|
|
|
} |
|
171
|
20
|
|
|
|
|
73
|
my $parent = $self->_get_parent_part_of_speech_section(); |
|
172
|
20
|
50
|
|
|
|
54
|
return unless $parent; |
|
173
|
|
|
|
|
|
|
|
|
174
|
20
|
|
|
|
|
54
|
$self->{__part_of_speech__} = $parent->get_header(); |
|
175
|
20
|
|
|
|
|
73
|
return $self->{__part_of_speech__}; |
|
176
|
|
|
|
|
|
|
} |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
sub get_template_parser { |
|
179
|
104
|
|
|
104
|
0
|
115
|
my $self = shift; |
|
180
|
104
|
|
|
|
|
394
|
return $self->{template_parser}; |
|
181
|
|
|
|
|
|
|
} |
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
1; |
|
185
|
|
|
|
|
|
|
|