| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Wiktionary::Parser::Document; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 3 |  |  | 3 |  | 688 | use strict; | 
|  | 3 |  |  |  |  | 7 |  | 
|  | 3 |  |  |  |  | 285 |  | 
| 4 | 3 |  |  | 3 |  | 20 | use warnings; | 
|  | 3 |  |  |  |  | 8 |  | 
|  | 3 |  |  |  |  | 108 |  | 
| 5 | 3 |  |  | 3 |  | 18 | use Data::Dumper; | 
|  | 3 |  |  |  |  | 6 |  | 
|  | 3 |  |  |  |  | 203 |  | 
| 6 | 3 |  |  | 3 |  | 1227 | use Wiktionary::Parser::Section; | 
|  | 3 |  |  |  |  | 9 |  | 
|  | 3 |  |  |  |  | 116 |  | 
| 7 | 3 |  |  | 3 |  | 1536 | use Wiktionary::Parser::Section::Translations; | 
|  | 3 |  |  |  |  | 11 |  | 
|  | 3 |  |  |  |  | 124 |  | 
| 8 | 3 |  |  | 3 |  | 2119 | use Wiktionary::Parser::Section::PartofSpeech; | 
|  | 3 |  |  |  |  | 11 |  | 
|  | 3 |  |  |  |  | 90 |  | 
| 9 | 3 |  |  | 3 |  | 2021 | use Wiktionary::Parser::Section::Etymology; | 
|  | 3 |  |  |  |  | 10 |  | 
|  | 3 |  |  |  |  | 77 |  | 
| 10 | 3 |  |  | 3 |  | 1683 | use Wiktionary::Parser::Section::Synonym; | 
|  | 3 |  |  |  |  | 10 |  | 
|  | 3 |  |  |  |  | 91 |  | 
| 11 | 3 |  |  | 3 |  | 20082 | use Wiktionary::Parser::Section::Hyponym; | 
|  | 3 |  |  |  |  | 9 |  | 
|  | 3 |  |  |  |  | 85 |  | 
| 12 | 3 |  |  | 3 |  | 1811 | use Wiktionary::Parser::Section::Hypernym; | 
|  | 3 |  |  |  |  | 10 |  | 
|  | 3 |  |  |  |  | 80 |  | 
| 13 | 3 |  |  | 3 |  | 1756 | use Wiktionary::Parser::Section::Antonym; | 
|  | 3 |  |  |  |  | 10 |  | 
|  | 3 |  |  |  |  | 92 |  | 
| 14 | 3 |  |  | 3 |  | 21 | use Wiktionary::Parser::Section::Etymology; | 
|  | 3 |  |  |  |  | 5 |  | 
|  | 3 |  |  |  |  | 245 |  | 
| 15 | 3 |  |  | 3 |  | 2085 | use Wiktionary::Parser::Section::Pronunciation; | 
|  | 3 |  |  |  |  | 13 |  | 
|  | 3 |  |  |  |  | 122 |  | 
| 16 | 3 |  |  | 3 |  | 2080 | use Wiktionary::Parser::Section::DerivedTerms; | 
|  | 3 |  |  |  |  | 10 |  | 
|  | 3 |  |  |  |  | 87 |  | 
| 17 | 3 |  |  | 3 |  | 1978 | use Wiktionary::Parser::Section::AlternativeForms; | 
|  | 3 |  |  |  |  | 10 |  | 
|  | 3 |  |  |  |  | 96 |  | 
| 18 | 3 |  |  | 3 |  | 1675 | use Wiktionary::Parser::Section::WikisaurusSection; | 
|  | 3 |  |  |  |  | 8 |  | 
|  | 3 |  |  |  |  | 79 |  | 
| 19 | 3 |  |  | 3 |  | 49 | use Wiktionary::Parser::Language; | 
|  | 3 |  |  |  |  | 6 |  | 
|  | 3 |  |  |  |  | 44410 |  | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | sub new { | 
| 22 | 21 |  |  | 21 | 0 | 296 | my $class = shift; | 
| 23 | 21 |  |  |  |  | 76 | my %args = @_; | 
| 24 |  |  |  |  |  |  |  | 
| 25 | 21 |  |  |  |  | 67 | my $sections = delete $args{sections}; | 
| 26 |  |  |  |  |  |  |  | 
| 27 | 21 |  |  |  |  | 80 | my $self = bless \%args, $class; | 
| 28 |  |  |  |  |  |  |  | 
| 29 | 21 |  | 50 |  |  | 122 | $self->{verbose} ||= 0; | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  | # if a document is instantiated with existing section objects | 
| 32 |  |  |  |  |  |  | # add them one by one so that they get indexed | 
| 33 | 21 | 100 | 66 |  |  | 120 | if ($sections && @$sections) { | 
| 34 | 20 |  |  |  |  | 46 | for my $section (@$sections) { | 
| 35 | 40 |  |  |  |  | 108 | $self->add_section($section); | 
| 36 |  |  |  |  |  |  | } | 
| 37 |  |  |  |  |  |  | } | 
| 38 |  |  |  |  |  |  |  | 
| 39 | 21 |  |  |  |  | 66 | return $self; | 
| 40 |  |  |  |  |  |  | } | 
| 41 |  |  |  |  |  |  |  | 
| 42 |  |  |  |  |  |  | # return the title of this document | 
| 43 |  |  |  |  |  |  | sub get_title { | 
| 44 | 41 |  |  | 41 | 0 | 47 | my $self = shift; | 
| 45 | 41 |  |  |  |  | 416 | return $self->{title}; | 
| 46 |  |  |  |  |  |  | } | 
| 47 |  |  |  |  |  |  |  | 
| 48 |  |  |  |  |  |  | # add a section object to the document | 
| 49 |  |  |  |  |  |  | sub add_section { | 
| 50 | 81 |  |  | 81 | 0 | 133 | my $self = shift; | 
| 51 | 81 |  |  |  |  | 196 | my $section = shift; | 
| 52 |  |  |  |  |  |  |  | 
| 53 | 81 | 50 |  |  |  | 381 | unless ($section->isa('Wiktionary::Parser::Section')) { | 
| 54 | 0 |  |  |  |  | 0 | die sprintf( | 
| 55 |  |  |  |  |  |  | 'given value (%s) is not of type Wiktionary::Parser::Section', | 
| 56 |  |  |  |  |  |  | ref($section) | 
| 57 |  |  |  |  |  |  | ); | 
| 58 |  |  |  |  |  |  | } | 
| 59 |  |  |  |  |  |  |  | 
| 60 |  |  |  |  |  |  | # link section to document | 
| 61 | 81 | 100 |  |  |  | 371 | unless ($section->get_document()) { | 
| 62 | 40 |  |  |  |  | 120 | $section->set_document($self); | 
| 63 |  |  |  |  |  |  | } | 
| 64 |  |  |  |  |  |  |  | 
| 65 |  |  |  |  |  |  |  | 
| 66 | 81 |  |  |  |  | 283 | my $section_number = $section->get_section_number(); | 
| 67 | 81 |  |  |  |  | 362 | $self->{sections}{$section_number} = $section; | 
| 68 |  |  |  |  |  |  | } | 
| 69 |  |  |  |  |  |  |  | 
| 70 |  |  |  |  |  |  | # by default return a list of all sections | 
| 71 |  |  |  |  |  |  | # if title is given, return all sections matching that title | 
| 72 |  |  |  |  |  |  | # title may be a string or regex | 
| 73 |  |  |  |  |  |  | sub get_sections { | 
| 74 | 0 |  |  | 0 | 0 | 0 | my $self = shift; | 
| 75 | 0 |  |  |  |  | 0 | my %args = @_; | 
| 76 | 0 |  |  |  |  | 0 | my $title = $args{title}; | 
| 77 |  |  |  |  |  |  |  | 
| 78 | 0 | 0 |  |  |  | 0 | if ($title) { | 
| 79 | 0 |  |  |  |  | 0 | my @sections; | 
| 80 | 0 |  |  |  |  | 0 | for my $number ($self->get_section_numbers()) { | 
| 81 |  |  |  |  |  |  |  | 
| 82 | 0 | 0 |  |  |  | 0 | next unless $self->get_section(number => $number)->get_header() =~ m/$title/i; | 
| 83 | 0 |  |  |  |  | 0 | push @sections, $self->get_section(number => $number); | 
| 84 |  |  |  |  |  |  | } | 
| 85 | 0 |  |  |  |  | 0 | return \@sections; | 
| 86 |  |  |  |  |  |  | } | 
| 87 |  |  |  |  |  |  |  | 
| 88 | 0 |  |  |  |  | 0 | return $self->{sections}; | 
| 89 |  |  |  |  |  |  | } | 
| 90 |  |  |  |  |  |  |  | 
| 91 |  |  |  |  |  |  | # given some criteria to select a set of sections | 
| 92 |  |  |  |  |  |  | # return a document object encompassing only those sections | 
| 93 |  |  |  |  |  |  | sub get_sub_document { | 
| 94 | 0 |  |  | 0 | 0 | 0 | my $self = shift; | 
| 95 | 0 |  |  |  |  | 0 | my %args = @_; | 
| 96 | 0 |  |  |  |  | 0 | my $title = $args{title}; | 
| 97 |  |  |  |  |  |  |  | 
| 98 |  |  |  |  |  |  | # if no section name pattern was passed in, just return the whole document | 
| 99 | 0 | 0 |  |  |  | 0 | return $self unless $title; | 
| 100 |  |  |  |  |  |  |  | 
| 101 | 0 |  |  |  |  | 0 | my $sections = $self->get_sections(title => $title); | 
| 102 |  |  |  |  |  |  |  | 
| 103 | 0 | 0 | 0 |  |  | 0 | return unless $sections && @$sections; | 
| 104 |  |  |  |  |  |  |  | 
| 105 | 0 |  |  |  |  | 0 | my @children; | 
| 106 | 0 |  |  |  |  | 0 | for my $section (@$sections) { | 
| 107 | 0 | 0 |  |  |  | 0 | push @children, @{$section->get_child_sections() || []}; | 
|  | 0 |  |  |  |  | 0 |  | 
| 108 |  |  |  |  |  |  | } | 
| 109 |  |  |  |  |  |  |  | 
| 110 | 0 |  |  |  |  | 0 | push @$sections, @children; | 
| 111 | 0 |  |  |  |  | 0 | my $sub_document = $self->create_sub_document( | 
| 112 |  |  |  |  |  |  | sections => $sections, | 
| 113 |  |  |  |  |  |  | ); | 
| 114 |  |  |  |  |  |  |  | 
| 115 | 0 |  |  |  |  | 0 | return $sub_document; | 
| 116 |  |  |  |  |  |  | } | 
| 117 |  |  |  |  |  |  |  | 
| 118 |  |  |  |  |  |  |  | 
| 119 |  |  |  |  |  |  | # return a document object consisting of just the given language section and its children | 
| 120 |  |  |  |  |  |  | sub get_language_section { | 
| 121 | 0 |  |  | 0 | 0 | 0 | my $self = shift; | 
| 122 | 0 |  |  |  |  | 0 | my %args = @_; | 
| 123 | 0 | 0 |  |  |  | 0 | my $language = $args{language} or die 'language needs to be specified'; | 
| 124 |  |  |  |  |  |  |  | 
| 125 |  |  |  |  |  |  | # go through the document top to bottom and return the first matching section | 
| 126 | 0 |  |  |  |  | 0 | my $section; | 
| 127 | 0 |  |  |  |  | 0 | for my $number ($self->get_section_numbers()) { | 
| 128 | 0 | 0 |  |  |  | 0 | next unless $self->get_section(number => $number)->get_header() =~ m/^$language$/i; | 
| 129 | 0 |  |  |  |  | 0 | $section = $self->get_section(number => $number); | 
| 130 | 0 |  |  |  |  | 0 | last; | 
| 131 |  |  |  |  |  |  | } | 
| 132 | 0 | 0 |  |  |  | 0 | if ($section) { | 
| 133 | 0 |  |  |  |  | 0 | return $section->get_child_document(); | 
| 134 |  |  |  |  |  |  | } | 
| 135 |  |  |  |  |  |  |  | 
| 136 | 0 |  |  |  |  | 0 | return; | 
| 137 |  |  |  |  |  |  | } | 
| 138 |  |  |  |  |  |  |  | 
| 139 |  |  |  |  |  |  | sub get_section { | 
| 140 | 298 |  |  | 298 | 0 | 344 | my $self = shift; | 
| 141 | 298 |  |  |  |  | 681 | my %args = @_; | 
| 142 | 298 |  |  |  |  | 419 | my $number = $args{number}; # lookup section by number | 
| 143 |  |  |  |  |  |  |  | 
| 144 | 298 | 50 |  |  |  | 569 | if ($number) { | 
| 145 | 298 |  |  |  |  | 1631 | return $self->{sections}{$number}; | 
| 146 |  |  |  |  |  |  | } | 
| 147 | 0 |  |  |  |  | 0 | return; | 
| 148 |  |  |  |  |  |  | } | 
| 149 |  |  |  |  |  |  |  | 
| 150 |  |  |  |  |  |  | # act as a section factory | 
| 151 |  |  |  |  |  |  | sub create_section { | 
| 152 | 41 |  |  | 41 | 0 | 58 | my $self = shift; | 
| 153 | 41 |  |  |  |  | 129 | my %args = @_; | 
| 154 | 41 |  |  |  |  | 113 | my $section_number = $args{section_number}; | 
| 155 | 41 |  |  |  |  | 68 | my $header = $args{header}; | 
| 156 |  |  |  |  |  |  |  | 
| 157 | 41 |  |  |  |  | 45 | my $section; | 
| 158 |  |  |  |  |  |  | my $class; | 
| 159 | 41 | 50 |  |  |  | 87 | if ($self->get_title() =~ m/^Wikisaurus\:/) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 160 | 0 |  |  |  |  | 0 | $class = 'Wiktionary::Parser::Section::WikisaurusSection'; | 
| 161 |  |  |  |  |  |  | } elsif ($header =~ m/translation/i) { | 
| 162 | 2 |  |  |  |  | 5 | $class = 'Wiktionary::Parser::Section::Translations'; | 
| 163 |  |  |  |  |  |  | } elsif ($header =~ m/etymology/i) { | 
| 164 | 6 |  |  |  |  | 9 | $class = 'Wiktionary::Parser::Section::Etymology'; | 
| 165 |  |  |  |  |  |  | } elsif ($header =~ m/synonym/i) { | 
| 166 | 0 |  |  |  |  | 0 | $class = 'Wiktionary::Parser::Section::Synonym'; | 
| 167 |  |  |  |  |  |  | } elsif ($header =~ m/hypernym/i) { | 
| 168 | 0 |  |  |  |  | 0 | $class = 'Wiktionary::Parser::Section::Hypernym'; | 
| 169 |  |  |  |  |  |  | } elsif ($header =~ m/hyponym/i) { | 
| 170 | 0 |  |  |  |  | 0 | $class = 'Wiktionary::Parser::Section::Hyponym'; | 
| 171 |  |  |  |  |  |  | } elsif ($header =~ m/antonym/i) { | 
| 172 | 0 |  |  |  |  | 0 | $class = 'Wiktionary::Parser::Section::Antonym'; | 
| 173 |  |  |  |  |  |  | } elsif ($header =~ m/pronunciation/i) { | 
| 174 | 4 |  |  |  |  | 7 | $class = 'Wiktionary::Parser::Section::Pronunciation'; | 
| 175 |  |  |  |  |  |  | } elsif ($header =~ m/alternat\w+ form/i) { | 
| 176 | 0 |  |  |  |  | 0 | $class = 'Wiktionary::Parser::Section::AlternativeForms'; | 
| 177 |  |  |  |  |  |  | } elsif ($header =~ m/derived\sterm/i) { | 
| 178 | 2 |  |  |  |  | 4 | $class = 'Wiktionary::Parser::Section::DerivedTerms'; | 
| 179 |  |  |  |  |  |  | } elsif ($self->is_part_of_speech($header)) { | 
| 180 | 12 |  |  |  |  | 20 | $class = 'Wiktionary::Parser::Section::PartofSpeech' | 
| 181 |  |  |  |  |  |  | } else { | 
| 182 | 15 |  |  |  |  | 21 | $class = 'Wiktionary::Parser::Section'; | 
| 183 |  |  |  |  |  |  | } | 
| 184 |  |  |  |  |  |  |  | 
| 185 | 41 |  |  |  |  | 238 | $section = $class->new( | 
| 186 |  |  |  |  |  |  | section_number => $section_number, | 
| 187 |  |  |  |  |  |  | header         => $header, | 
| 188 |  |  |  |  |  |  | document       => $self, | 
| 189 |  |  |  |  |  |  | ); | 
| 190 |  |  |  |  |  |  |  | 
| 191 | 41 |  |  |  |  | 102 | $self->add_section($section); | 
| 192 |  |  |  |  |  |  |  | 
| 193 | 41 |  |  |  |  | 177 | return $section; | 
| 194 |  |  |  |  |  |  | } | 
| 195 |  |  |  |  |  |  |  | 
| 196 |  |  |  |  |  |  | sub get_table_of_contents { | 
| 197 | 1 |  |  | 1 | 0 | 9 | my $self = shift; | 
| 198 | 1 |  |  |  |  | 2 | my @contents; | 
| 199 | 1 |  |  |  |  | 6 | for my $number ($self->get_section_numbers()) { | 
| 200 | 40 |  |  |  |  | 90 | push @contents, sprintf("%s,%s",$number,$self->get_section(number => $number)->get_header()); | 
| 201 |  |  |  |  |  |  | } | 
| 202 | 1 |  |  |  |  | 13 | return \@contents; | 
| 203 |  |  |  |  |  |  | } | 
| 204 |  |  |  |  |  |  |  | 
| 205 |  |  |  |  |  |  |  | 
| 206 |  |  |  |  |  |  | # grab all translation sections | 
| 207 |  |  |  |  |  |  | sub get_translation_sections { | 
| 208 | 20 |  |  | 20 | 0 | 28 | my $self = shift; | 
| 209 | 20 |  |  |  |  | 74 | return $self->get_sections_of_type('Wiktionary::Parser::Section::Translations'); | 
| 210 |  |  |  |  |  |  | } | 
| 211 |  |  |  |  |  |  |  | 
| 212 |  |  |  |  |  |  | # grab all part of speech sections | 
| 213 |  |  |  |  |  |  | sub get_part_of_speech_sections { | 
| 214 | 0 |  |  | 0 | 0 | 0 | my $self = shift; | 
| 215 | 0 |  |  |  |  | 0 | return $self->get_sections_of_type('Wiktionary::Parser::Section::PartofSpeech'); | 
| 216 |  |  |  |  |  |  | } | 
| 217 |  |  |  |  |  |  |  | 
| 218 |  |  |  |  |  |  | sub get_synonym_sections { | 
| 219 | 0 |  |  | 0 | 0 | 0 | my $self = shift; | 
| 220 | 0 |  |  |  |  | 0 | return $self->get_sections_of_type('Wiktionary::Parser::Section::Synonym'); | 
| 221 |  |  |  |  |  |  | } | 
| 222 |  |  |  |  |  |  |  | 
| 223 |  |  |  |  |  |  | sub get_hypernym_sections { | 
| 224 | 0 |  |  | 0 | 0 | 0 | my $self = shift; | 
| 225 | 0 |  |  |  |  | 0 | return $self->get_sections_of_type('Wiktionary::Parser::Section::Hypernym'); | 
| 226 |  |  |  |  |  |  | } | 
| 227 |  |  |  |  |  |  |  | 
| 228 |  |  |  |  |  |  | sub get_hyponym_sections { | 
| 229 | 0 |  |  | 0 | 0 | 0 | my $self = shift; | 
| 230 | 0 |  |  |  |  | 0 | return $self->get_sections_of_type('Wiktionary::Parser::Section::Hyponym'); | 
| 231 |  |  |  |  |  |  | } | 
| 232 |  |  |  |  |  |  |  | 
| 233 |  |  |  |  |  |  | sub get_pronunciation_sections { | 
| 234 | 0 |  |  | 0 | 0 | 0 | my $self = shift; | 
| 235 | 0 |  |  |  |  | 0 | return $self->get_sections_of_type('Wiktionary::Parser::Section::Pronunciation'); | 
| 236 |  |  |  |  |  |  | } | 
| 237 |  |  |  |  |  |  |  | 
| 238 |  |  |  |  |  |  |  | 
| 239 |  |  |  |  |  |  | sub get_sections_of_type { | 
| 240 | 20 |  |  | 20 | 0 | 34 | my $self = shift; | 
| 241 | 20 |  |  |  |  | 28 | my $type = shift; | 
| 242 | 20 |  |  |  |  | 28 | my @sections; | 
| 243 | 20 |  |  |  |  | 62 | for my $number ($self->get_section_numbers()) { | 
| 244 | 40 | 50 |  |  |  | 112 | next unless $self->get_section(number => $number)->isa($type); | 
| 245 | 40 |  |  |  |  | 88 | push @sections, $self->get_section(number => $number); | 
| 246 |  |  |  |  |  |  | } | 
| 247 | 20 |  |  |  |  | 69 | return \@sections; | 
| 248 |  |  |  |  |  |  | } | 
| 249 |  |  |  |  |  |  |  | 
| 250 |  |  |  |  |  |  |  | 
| 251 |  |  |  |  |  |  | sub get_section_numbers { | 
| 252 | 22 |  |  | 22 | 0 | 34 | my $self = shift; | 
| 253 | 22 | 50 |  |  |  | 44 | return (sort {$a cmp $b} grep {$_} keys %{$self->{sections} || {}}); | 
|  | 360 |  |  |  |  | 471 |  | 
|  | 122 |  |  |  |  | 270 |  | 
|  | 22 |  |  |  |  | 135 |  | 
| 254 |  |  |  |  |  |  | } | 
| 255 |  |  |  |  |  |  |  | 
| 256 |  |  |  |  |  |  |  | 
| 257 |  |  |  |  |  |  | sub get_word_senses { | 
| 258 | 0 |  |  | 0 | 0 | 0 | my $self = shift; | 
| 259 | 0 |  |  |  |  | 0 | my %args = @_; | 
| 260 |  |  |  |  |  |  |  | 
| 261 |  |  |  |  |  |  | # follow links to other wiktionary pages | 
| 262 | 0 | 0 |  |  |  | 0 | my $_meta_follow_links = defined($args{_meta_follow_links}) ? $args{_meta_follow_links} : 1; | 
| 263 |  |  |  |  |  |  |  | 
| 264 |  |  |  |  |  |  |  | 
| 265 | 0 |  |  |  |  | 0 | my $sections = $self->get_translation_sections(); | 
| 266 | 0 |  |  |  |  | 0 | my @word_senses; | 
| 267 | 0 | 0 |  |  |  | 0 | for my $section (@{$sections || []}) { | 
|  | 0 |  |  |  |  | 0 |  | 
| 268 | 0 |  |  |  |  | 0 | my $word_senses = $section->get_word_senses(); | 
| 269 |  |  |  |  |  |  |  | 
| 270 | 0 | 0 |  |  |  | 0 | for my $word_sense (@{$word_senses || []}) { | 
|  | 0 |  |  |  |  | 0 |  | 
| 271 |  |  |  |  |  |  |  | 
| 272 | 0 | 0 |  |  |  | 0 | if (my ($title) = $word_sense->get_word() =~ m/^wiktionary\:(.+)$/i) { | 
| 273 | 0 | 0 |  |  |  | 0 | if ($_meta_follow_links) { | 
| 274 |  |  |  |  |  |  | # get titles to linked pages | 
| 275 |  |  |  |  |  |  | # get translations from the linked document | 
| 276 | 0 |  |  |  |  | 0 | my $linked_document = $self->get_parser()->get_document(title => $title); | 
| 277 |  |  |  |  |  |  | # set _meta_follow_links to 0, to ensure we don't end up in | 
| 278 |  |  |  |  |  |  | # an endless loop if pages link back to each other | 
| 279 | 0 |  |  |  |  | 0 | my $linked_word_senses = $linked_document->get_word_senses(_meta_follow_links => 0); | 
| 280 | 0 |  |  |  |  | 0 | push @word_senses, @{$linked_word_senses}; | 
|  | 0 |  |  |  |  | 0 |  | 
| 281 |  |  |  |  |  |  | } | 
| 282 |  |  |  |  |  |  |  | 
| 283 | 0 |  |  |  |  | 0 | next; | 
| 284 |  |  |  |  |  |  | } | 
| 285 |  |  |  |  |  |  |  | 
| 286 |  |  |  |  |  |  |  | 
| 287 | 0 |  |  |  |  | 0 | push @word_senses, $word_sense->get_word(); | 
| 288 |  |  |  |  |  |  | } | 
| 289 |  |  |  |  |  |  | } | 
| 290 |  |  |  |  |  |  |  | 
| 291 | 0 |  |  |  |  | 0 | return \@word_senses; | 
| 292 |  |  |  |  |  |  | } | 
| 293 |  |  |  |  |  |  |  | 
| 294 |  |  |  |  |  |  | sub get_synonyms { | 
| 295 | 0 |  |  | 0 | 0 | 0 | my $self = shift; | 
| 296 | 0 |  |  |  |  | 0 | my %args = @_; | 
| 297 | 0 |  |  |  |  | 0 | return $self->get_classifications( | 
| 298 |  |  |  |  |  |  | class => 'Wiktionary::Parser::Section::Synonym', | 
| 299 |  |  |  |  |  |  | ); | 
| 300 |  |  |  |  |  |  | } | 
| 301 |  |  |  |  |  |  |  | 
| 302 |  |  |  |  |  |  | sub get_hyponyms { | 
| 303 | 0 |  |  | 0 | 0 | 0 | my $self = shift; | 
| 304 | 0 |  |  |  |  | 0 | my %args = @_; | 
| 305 | 0 |  |  |  |  | 0 | return $self->get_classifications( | 
| 306 |  |  |  |  |  |  | class => 'Wiktionary::Parser::Section::Hyponym', | 
| 307 |  |  |  |  |  |  | ); | 
| 308 |  |  |  |  |  |  | } | 
| 309 |  |  |  |  |  |  |  | 
| 310 |  |  |  |  |  |  | sub get_hypernyms { | 
| 311 | 0 |  |  | 0 | 0 | 0 | my $self = shift; | 
| 312 | 0 |  |  |  |  | 0 | my %args = @_; | 
| 313 | 0 |  |  |  |  | 0 | return $self->get_classifications( | 
| 314 |  |  |  |  |  |  | class => 'Wiktionary::Parser::Section::Hypernym', | 
| 315 |  |  |  |  |  |  | ); | 
| 316 |  |  |  |  |  |  | } | 
| 317 |  |  |  |  |  |  |  | 
| 318 |  |  |  |  |  |  | sub get_antonyms { | 
| 319 | 0 |  |  | 0 | 0 | 0 | my $self = shift; | 
| 320 | 0 |  |  |  |  | 0 | my %args = @_; | 
| 321 | 0 |  |  |  |  | 0 | return $self->get_classifications( | 
| 322 |  |  |  |  |  |  | class => 'Wiktionary::Parser::Section::Antonym', | 
| 323 |  |  |  |  |  |  | ); | 
| 324 |  |  |  |  |  |  | } | 
| 325 |  |  |  |  |  |  |  | 
| 326 |  |  |  |  |  |  |  | 
| 327 |  |  |  |  |  |  | sub get_classifications { | 
| 328 | 0 |  |  | 0 | 0 | 0 | my $self = shift; | 
| 329 | 0 |  |  |  |  | 0 | my %args = @_; | 
| 330 | 0 |  |  |  |  | 0 | my $class = $args{class}; | 
| 331 |  |  |  |  |  |  |  | 
| 332 | 0 | 0 |  |  |  | 0 | if ($self->{"__get_${class}__"}) { | 
| 333 | 0 |  |  |  |  | 0 | return $self->{"__get_${class}__"}; | 
| 334 |  |  |  |  |  |  | } | 
| 335 |  |  |  |  |  |  |  | 
| 336 | 0 |  |  |  |  | 0 | my $sections = $self->get_sections_of_type($class); | 
| 337 | 0 |  |  |  |  | 0 | my %x_nyms; | 
| 338 |  |  |  |  |  |  |  | 
| 339 | 0 | 0 |  |  |  | 0 | for my $section (@{$sections || []}) { | 
|  | 0 |  |  |  |  | 0 |  | 
| 340 | 0 |  |  |  |  | 0 | my $x_nyms = $section->get_groups(); | 
| 341 | 0 | 0 |  |  |  | 0 | for my $x_nym (@{$x_nyms || []}) { | 
|  | 0 |  |  |  |  | 0 |  | 
| 342 |  |  |  |  |  |  |  | 
| 343 | 0 |  |  |  |  | 0 | my $lang = $x_nym->{language}; | 
| 344 | 0 |  |  |  |  | 0 | my $sense = $x_nym->{sense}; | 
| 345 |  |  |  |  |  |  |  | 
| 346 | 0 | 0 |  |  |  | 0 | my @lexemes = @{$x_nym->{lexemes} || []}; | 
|  | 0 |  |  |  |  | 0 |  | 
| 347 |  |  |  |  |  |  |  | 
| 348 |  |  |  |  |  |  |  | 
| 349 | 0 |  |  |  |  | 0 | my @full_word_list; | 
| 350 | 0 |  |  |  |  | 0 | while (my $lexeme = shift @lexemes) { | 
| 351 |  |  |  |  |  |  |  | 
| 352 |  |  |  |  |  |  |  | 
| 353 |  |  |  |  |  |  | # look for links to wikisaurus entries | 
| 354 |  |  |  |  |  |  | # and include content from those documents | 
| 355 |  |  |  |  |  |  |  | 
| 356 | 0 | 0 |  |  |  | 0 | if ($lexeme =~ m/^Wikisaurus:/) { | 
| 357 | 0 |  |  |  |  | 0 | my $wikisaurus_document = $self->get_linked_document(title => $lexeme); | 
| 358 | 0 |  |  |  |  | 0 | my $ws_sections = $wikisaurus_document->get_sections(title => $section->get_header()); | 
| 359 |  |  |  |  |  |  |  | 
| 360 | 0 | 0 |  |  |  | 0 | for my $ws_section (@{$ws_sections || []}) { | 
|  | 0 |  |  |  |  | 0 |  | 
| 361 | 0 |  |  |  |  | 0 | my $word_list = $ws_section->get_words(); | 
| 362 | 0 | 0 |  |  |  | 0 | for my $word (@{$word_list || []}) { | 
|  | 0 |  |  |  |  | 0 |  | 
| 363 | 0 |  |  |  |  | 0 | push @full_word_list, $word->{word}; | 
| 364 |  |  |  |  |  |  | } | 
| 365 |  |  |  |  |  |  | } | 
| 366 |  |  |  |  |  |  | } else { | 
| 367 | 0 |  |  |  |  | 0 | push @full_word_list, $lexeme; | 
| 368 |  |  |  |  |  |  | } | 
| 369 |  |  |  |  |  |  | } | 
| 370 |  |  |  |  |  |  |  | 
| 371 | 0 |  |  |  |  | 0 | push @{$x_nyms{$lang}{sense}{$sense}}, @full_word_list; | 
|  | 0 |  |  |  |  | 0 |  | 
| 372 | 0 |  | 0 |  |  | 0 | $x_nyms{$lang}{language} ||= $self->get_language_mapper()->code2language($lang); | 
| 373 |  |  |  |  |  |  |  | 
| 374 |  |  |  |  |  |  | } | 
| 375 |  |  |  |  |  |  | } | 
| 376 |  |  |  |  |  |  |  | 
| 377 | 0 |  |  |  |  | 0 | $self->{"__get_${class}__"} = \%x_nyms; | 
| 378 |  |  |  |  |  |  |  | 
| 379 | 0 |  |  |  |  | 0 | return \%x_nyms; | 
| 380 |  |  |  |  |  |  | } | 
| 381 |  |  |  |  |  |  |  | 
| 382 |  |  |  |  |  |  | # return lists of words from the Derived Terms sections broken down by language | 
| 383 |  |  |  |  |  |  | sub get_derived_terms { | 
| 384 | 0 |  |  | 0 | 0 | 0 | my $self = shift; | 
| 385 | 0 |  |  |  |  | 0 | my $class = 'Wiktionary::Parser::Section::DerivedTerms'; | 
| 386 | 0 |  |  |  |  | 0 | my $sections = $self->get_sections_of_type($class); | 
| 387 | 0 |  |  |  |  | 0 | my %terms; | 
| 388 | 0 | 0 |  |  |  | 0 | for my $section (@{$sections || []}) { | 
|  | 0 |  |  |  |  | 0 |  | 
| 389 | 0 |  |  |  |  | 0 | my $hr = $section->get_derived_terms(); | 
| 390 | 0 |  |  |  |  | 0 | for my $language (keys %{$hr}) { | 
|  | 0 |  |  |  |  | 0 |  | 
| 391 | 0 | 0 |  |  |  | 0 | push @{$terms{$language}}, @{$hr->{$language} || []} | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
| 392 |  |  |  |  |  |  | } | 
| 393 |  |  |  |  |  |  | } | 
| 394 | 0 |  |  |  |  | 0 | return \%terms; | 
| 395 |  |  |  |  |  |  | } | 
| 396 |  |  |  |  |  |  |  | 
| 397 |  |  |  |  |  |  | # return all pronunciation metadata broken down by language | 
| 398 |  |  |  |  |  |  | sub get_pronunciations { | 
| 399 | 0 |  |  | 0 | 0 | 0 | my $self = shift; | 
| 400 | 0 |  |  |  |  | 0 | my %args = @_; | 
| 401 | 0 |  |  |  |  | 0 | my $class = 'Wiktionary::Parser::Section::Pronunciation'; | 
| 402 |  |  |  |  |  |  |  | 
| 403 | 0 |  |  |  |  | 0 | my $sections = $self->get_sections_of_type($class); | 
| 404 | 0 |  |  |  |  | 0 | my %meta; | 
| 405 |  |  |  |  |  |  | my %seen; | 
| 406 |  |  |  |  |  |  |  | 
| 407 | 0 | 0 |  |  |  | 0 | for my $section (@{$sections || []}) { | 
|  | 0 |  |  |  |  | 0 |  | 
| 408 |  |  |  |  |  |  |  | 
| 409 |  |  |  |  |  |  | { | 
| 410 | 0 |  |  |  |  | 0 | my $hr = $section->get_pronunciations(); | 
|  | 0 |  |  |  |  | 0 |  | 
| 411 | 0 |  |  |  |  | 0 | for my $lang (keys %{$hr}) { | 
|  | 0 |  |  |  |  | 0 |  | 
| 412 | 0 |  |  |  |  | 0 | push @{$meta{$lang}{pronunciation}}, @{$hr->{$lang}}; | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
| 413 | 0 |  | 0 |  |  | 0 | $meta{$lang}{language} ||= $self->get_language_mapper()->code2language($lang); | 
| 414 |  |  |  |  |  |  | } | 
| 415 |  |  |  |  |  |  | } | 
| 416 |  |  |  |  |  |  |  | 
| 417 |  |  |  |  |  |  | { | 
| 418 | 0 |  |  |  |  | 0 | my $hr = $section->get_audio(); | 
|  | 0 |  |  |  |  | 0 |  | 
| 419 | 0 |  |  |  |  | 0 | for my $lang (keys %{$hr}) { | 
|  | 0 |  |  |  |  | 0 |  | 
| 420 |  |  |  |  |  |  | # remove duplicates | 
| 421 | 0 |  |  |  |  | 0 | push @{$meta{$lang}{audio}}, grep {!$seen{audio}{$lang}{ $_->{file} }++} @{$hr->{$lang}}; | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
| 422 | 0 |  | 0 |  |  | 0 | $meta{$lang}{language} ||= $self->get_language_mapper()->code2language($lang); | 
| 423 |  |  |  |  |  |  | } | 
| 424 |  |  |  |  |  |  | } | 
| 425 |  |  |  |  |  |  |  | 
| 426 |  |  |  |  |  |  | { | 
| 427 | 0 |  |  |  |  | 0 | my $hr = $section->get_rhymes(); | 
|  | 0 |  |  |  |  | 0 |  | 
| 428 | 0 |  |  |  |  | 0 | for my $lang (keys %{$hr}) { | 
|  | 0 |  |  |  |  | 0 |  | 
| 429 | 0 |  |  |  |  | 0 | push @{$meta{$lang}{rhyme}}, @{$hr->{$lang}}; | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
| 430 | 0 |  | 0 |  |  | 0 | $meta{$lang}{language} ||= $self->get_language_mapper()->code2language($lang); | 
| 431 |  |  |  |  |  |  | } | 
| 432 |  |  |  |  |  |  | } | 
| 433 |  |  |  |  |  |  |  | 
| 434 |  |  |  |  |  |  | { | 
| 435 | 0 |  |  |  |  | 0 | my $hr = $section->get_homophones(); | 
|  | 0 |  |  |  |  | 0 |  | 
| 436 | 0 |  |  |  |  | 0 | for my $lang (keys %{$hr}) { | 
|  | 0 |  |  |  |  | 0 |  | 
| 437 | 0 |  |  |  |  | 0 | push @{$meta{$lang}{homophone}}, @{$hr->{$lang}}; | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
| 438 | 0 |  | 0 |  |  | 0 | $meta{$lang}{language} ||= $self->get_language_mapper()->code2language($lang); | 
| 439 |  |  |  |  |  |  | } | 
| 440 |  |  |  |  |  |  | } | 
| 441 |  |  |  |  |  |  |  | 
| 442 |  |  |  |  |  |  | { | 
| 443 | 0 |  |  |  |  | 0 | my $hr = $section->get_hyphenations(); | 
|  | 0 |  |  |  |  | 0 |  | 
| 444 | 0 |  |  |  |  | 0 | for my $lang (keys %{$hr}) { | 
|  | 0 |  |  |  |  | 0 |  | 
| 445 | 0 |  |  |  |  | 0 | push @{$meta{$lang}{hyphenation}}, @{$hr->{$lang}}; | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
| 446 | 0 |  | 0 |  |  | 0 | $meta{$lang}{language} ||= $self->get_language_mapper()->code2language($lang); | 
| 447 |  |  |  |  |  |  | } | 
| 448 |  |  |  |  |  |  | } | 
| 449 |  |  |  |  |  |  |  | 
| 450 |  |  |  |  |  |  |  | 
| 451 |  |  |  |  |  |  | } | 
| 452 |  |  |  |  |  |  |  | 
| 453 | 0 |  |  |  |  | 0 | return \%meta; | 
| 454 |  |  |  |  |  |  | } | 
| 455 |  |  |  |  |  |  |  | 
| 456 |  |  |  |  |  |  |  | 
| 457 |  |  |  |  |  |  |  | 
| 458 |  |  |  |  |  |  | sub get_parts_of_speech { | 
| 459 | 0 |  |  | 0 | 0 | 0 | my $self = shift; | 
| 460 | 0 |  |  |  |  | 0 | my %args = @_; | 
| 461 |  |  |  |  |  |  |  | 
| 462 | 0 | 0 |  |  |  | 0 | if ($self->{__get_parts_of_speech__}) { | 
| 463 | 0 |  |  |  |  | 0 | return $self->{__get_parts_of_speech__}; | 
| 464 |  |  |  |  |  |  | } | 
| 465 |  |  |  |  |  |  |  | 
| 466 | 0 |  |  |  |  | 0 | my $sections = $self->get_part_of_speech_sections(); | 
| 467 |  |  |  |  |  |  |  | 
| 468 | 0 |  |  |  |  | 0 | my %parts_of_speech; | 
| 469 | 0 | 0 |  |  |  | 0 | for my $section (@{$sections || []}) { | 
|  | 0 |  |  |  |  | 0 |  | 
| 470 | 0 |  |  |  |  | 0 | my $pos = $section->get_part_of_speech(); | 
| 471 | 0 |  |  |  |  | 0 | my $lang_code = $section->get_language_code(); | 
| 472 | 0 | 0 | 0 |  |  | 0 | next unless $pos && $lang_code; | 
| 473 | 0 |  |  |  |  | 0 | push @{$parts_of_speech{$lang_code}{part_of_speech}}, $pos; | 
|  | 0 |  |  |  |  | 0 |  | 
| 474 | 0 |  | 0 |  |  | 0 | $parts_of_speech{$lang_code}{language} ||= get_language_mapper()->code2language($lang_code); | 
| 475 |  |  |  |  |  |  |  | 
| 476 |  |  |  |  |  |  | } | 
| 477 |  |  |  |  |  |  |  | 
| 478 | 0 |  |  |  |  | 0 | $self->{__get_parts_of_speech__} = \%parts_of_speech; | 
| 479 | 0 |  |  |  |  | 0 | return \%parts_of_speech; | 
| 480 |  |  |  |  |  |  | } | 
| 481 |  |  |  |  |  |  |  | 
| 482 |  |  |  |  |  |  |  | 
| 483 |  |  |  |  |  |  | sub get_translations { | 
| 484 | 20 |  |  | 20 | 0 | 203 | my $self = shift; | 
| 485 | 20 |  |  |  |  | 55 | my %args = @_; | 
| 486 | 20 | 100 |  |  |  | 71 | my $include_transliterations = defined($args{include_transliterations}) ? $args{include_transliterations} : 1; | 
| 487 | 20 | 50 |  |  |  | 56 | my $include_alternate_translations = defined($args{include_alternate_translations}) ? $args{include_alternate_translations} : 1; | 
| 488 |  |  |  |  |  |  |  | 
| 489 |  |  |  |  |  |  | # follow links to other wiktionary pages | 
| 490 | 20 | 50 |  |  |  | 60 | my $_meta_follow_links = defined($args{_meta_follow_links}) ? $args{_meta_follow_links} : 1; | 
| 491 |  |  |  |  |  |  |  | 
| 492 | 20 |  |  |  |  | 81 | my $sections = $self->get_translation_sections(); | 
| 493 | 20 |  |  |  |  | 32 | my @word_senses; | 
| 494 |  |  |  |  |  |  | my %translations; | 
| 495 |  |  |  |  |  |  |  | 
| 496 | 20 | 50 |  |  |  | 28 | for my $section (@{$sections || []}) { | 
|  | 20 |  |  |  |  | 61 |  | 
| 497 | 40 |  |  |  |  | 148 | my $word_senses = $section->get_word_senses(); | 
| 498 |  |  |  |  |  |  |  | 
| 499 | 40 | 50 |  |  |  | 80 | for my $word_sense (@{$word_senses || []}) { | 
|  | 40 |  |  |  |  | 142 |  | 
| 500 | 20 |  |  |  |  | 75 | my $word_sense_lexeme = $word_sense->get_word(); | 
| 501 | 20 |  |  |  |  | 77 | my $translations = $word_sense->get_translations(); | 
| 502 |  |  |  |  |  |  |  | 
| 503 |  |  |  |  |  |  | # if we have a link to another page, download that page and merge its translation data | 
| 504 | 20 | 50 |  |  |  | 62 | if (my ($title) = $word_sense->get_word() =~ m/^wiktionary\:(.+)$/i) { | 
| 505 |  |  |  |  |  |  |  | 
| 506 | 0 | 0 |  |  |  | 0 | if ($_meta_follow_links) { | 
| 507 |  |  |  |  |  |  | # get titles to linked pages | 
| 508 |  |  |  |  |  |  | # get translations from the linked document | 
| 509 |  |  |  |  |  |  |  | 
| 510 | 0 |  |  |  |  | 0 | my $linked_document = $self->get_parser()->get_document(title => $title); | 
| 511 |  |  |  |  |  |  | # set _meta_follow_links to 0, to ensure we don't end up in | 
| 512 |  |  |  |  |  |  | # an endless loop if pages link back to each other | 
| 513 |  |  |  |  |  |  |  | 
| 514 | 0 |  |  |  |  | 0 | my $linked_translations = {}; | 
| 515 | 0 | 0 |  |  |  | 0 | if ($linked_document) { | 
| 516 | 0 |  |  |  |  | 0 | $linked_translations = $linked_document->get_translations(_meta_follow_links => 0); | 
| 517 |  |  |  |  |  |  | } | 
| 518 |  |  |  |  |  |  |  | 
| 519 | 0 |  |  |  |  | 0 | for my $linked_word_sense (keys %$linked_translations) { | 
| 520 | 0 | 0 |  |  |  | 0 | for my $linked_lang_code (keys %{ $linked_translations->{$linked_word_sense} || {} }) { | 
|  | 0 |  |  |  |  | 0 |  | 
| 521 | 0 |  |  |  |  | 0 | $translations{$linked_word_sense}{$linked_lang_code}{language} = $linked_translations->{$linked_word_sense}{$linked_lang_code}{language}; | 
| 522 | 0 | 0 |  |  |  | 0 | push @{ $translations{$linked_word_sense}{$linked_lang_code}{translations} }, @{ $linked_translations->{$linked_word_sense}{$linked_lang_code}{translations} || []}; | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
| 523 |  |  |  |  |  |  | } | 
| 524 |  |  |  |  |  |  | } | 
| 525 |  |  |  |  |  |  | } | 
| 526 |  |  |  |  |  |  |  | 
| 527 | 0 |  |  |  |  | 0 | next; | 
| 528 |  |  |  |  |  |  | } | 
| 529 |  |  |  |  |  |  |  | 
| 530 |  |  |  |  |  |  |  | 
| 531 |  |  |  |  |  |  |  | 
| 532 | 20 | 50 |  |  |  | 44 | for my $language (keys %{$translations || {}}) { | 
|  | 20 |  |  |  |  | 84 |  | 
| 533 |  |  |  |  |  |  |  | 
| 534 | 20 |  |  |  |  | 92 | my $language_code = $self->get_language_mapper()->language2code($language); | 
| 535 | 20 |  |  |  |  | 65 | my $normalized_language = $self->get_language_mapper()->code2language($language_code); | 
| 536 |  |  |  |  |  |  |  | 
| 537 | 20 |  |  |  |  | 48 | my $lexemes = $translations->{$language}; | 
| 538 | 20 |  |  |  |  | 23 | my %seen; | 
| 539 | 20 |  |  |  |  | 29 | for my $lexeme (@{$lexemes}) { | 
|  | 20 |  |  |  |  | 44 |  | 
| 540 | 37 |  |  |  |  | 147 | my @translations = $lexeme->get_translations(); | 
| 541 |  |  |  |  |  |  |  | 
| 542 |  |  |  |  |  |  | # if the lexeme has a language code, use that to determine language | 
| 543 | 37 |  |  |  |  | 138 | my $tagged_language_code = $lexeme->get_language_code(); | 
| 544 | 37 | 100 |  |  |  | 89 | if ($tagged_language_code) { | 
| 545 | 25 |  | 66 |  |  | 169 | $normalized_language = $self->get_language_mapper()->code2language($tagged_language_code) || $normalized_language; | 
| 546 | 25 |  | 33 |  |  | 66 | $language_code = $self->get_language_mapper()->language2code($normalized_language) || $tagged_language_code; | 
| 547 |  |  |  |  |  |  | } | 
| 548 |  |  |  |  |  |  |  | 
| 549 |  |  |  |  |  |  |  | 
| 550 | 37 |  |  |  |  | 222 | my $part_of_speech = $section->get_part_of_speech(); | 
| 551 |  |  |  |  |  |  |  | 
| 552 |  |  |  |  |  |  |  | 
| 553 | 37 | 100 | 100 |  |  | 192 | if ($include_transliterations && $lexeme->get_transliteration()) { | 
| 554 | 14 |  |  |  |  | 46 | push @translations, $lexeme->get_transliteration(); | 
| 555 |  |  |  |  |  |  | } | 
| 556 | 37 | 50 | 33 |  |  | 181 | if ($include_alternate_translations && $lexeme->get_alternate()) { | 
| 557 | 0 |  |  |  |  | 0 | push @translations, $lexeme->get_alternate(); | 
| 558 |  |  |  |  |  |  | } | 
| 559 |  |  |  |  |  |  |  | 
| 560 | 37 |  |  |  |  | 160 | for my $lex (sort @translations) { | 
| 561 |  |  |  |  |  |  |  | 
| 562 | 107 | 50 |  |  |  | 431 | next unless defined $lex; | 
| 563 | 107 | 100 |  |  |  | 120 | unless (grep {$_ eq $lex} @{$translations{$word_sense_lexeme}{$language_code}{translations} || []}) { | 
|  | 270 | 100 |  |  |  | 645 |  | 
|  | 107 |  |  |  |  | 456 |  | 
| 564 | 48 |  |  |  |  | 51 | push @{$translations{$word_sense_lexeme}{$language_code}{translations}},$lex; | 
|  | 48 |  |  |  |  | 142 |  | 
| 565 | 48 |  | 66 |  |  | 189 | $translations{$word_sense_lexeme}{$language_code}{language} ||= $normalized_language; | 
| 566 | 48 |  | 66 |  |  | 276 | $translations{$word_sense_lexeme}{$language_code}{part_of_speech} ||= $part_of_speech; | 
| 567 |  |  |  |  |  |  |  | 
| 568 |  |  |  |  |  |  | } | 
| 569 |  |  |  |  |  |  | } | 
| 570 |  |  |  |  |  |  | } | 
| 571 |  |  |  |  |  |  | } | 
| 572 |  |  |  |  |  |  | } | 
| 573 |  |  |  |  |  |  | } | 
| 574 |  |  |  |  |  |  |  | 
| 575 | 20 |  |  |  |  | 155 | return \%translations; | 
| 576 |  |  |  |  |  |  | } | 
| 577 |  |  |  |  |  |  |  | 
| 578 |  |  |  |  |  |  | sub is_part_of_speech { | 
| 579 | 67 |  |  | 67 | 0 | 90 | my $self = shift; | 
| 580 | 67 |  |  |  |  | 86 | my $header = shift; | 
| 581 | 67 | 100 |  |  |  | 114 | return 1 if grep { $header =~ m/^$_$/i } qw( | 
|  | 737 |  |  |  |  | 7548 |  | 
| 582 |  |  |  |  |  |  | noun | 
| 583 |  |  |  |  |  |  | verb | 
| 584 |  |  |  |  |  |  | adjective | 
| 585 |  |  |  |  |  |  | adverb | 
| 586 |  |  |  |  |  |  | pronoun | 
| 587 |  |  |  |  |  |  | preposition | 
| 588 |  |  |  |  |  |  | article | 
| 589 |  |  |  |  |  |  | conjunction | 
| 590 |  |  |  |  |  |  | determiner | 
| 591 |  |  |  |  |  |  | interjection | 
| 592 |  |  |  |  |  |  | symbol | 
| 593 |  |  |  |  |  |  | ); | 
| 594 |  |  |  |  |  |  |  | 
| 595 | 35 |  |  |  |  | 121 | return 0; | 
| 596 |  |  |  |  |  |  | } | 
| 597 |  |  |  |  |  |  |  | 
| 598 |  |  |  |  |  |  |  | 
| 599 |  |  |  |  |  |  | # call the parser to download a page for a term in this document | 
| 600 |  |  |  |  |  |  | sub get_linked_document { | 
| 601 | 0 |  |  | 0 | 0 | 0 | my $self = shift; | 
| 602 | 0 |  |  |  |  | 0 | my %args = @_; | 
| 603 | 0 |  |  |  |  | 0 | my $title = $args{title}; | 
| 604 |  |  |  |  |  |  |  | 
| 605 | 0 |  | 0 |  |  | 0 | $self->{linked_documents} ||= {}; | 
| 606 | 0 | 0 |  |  |  | 0 | if ($self->{linked_documents}{$title}) { | 
| 607 | 0 |  |  |  |  | 0 | return $self->{linked_documents}{$title}; | 
| 608 |  |  |  |  |  |  | } | 
| 609 |  |  |  |  |  |  |  | 
| 610 | 0 |  |  |  |  | 0 | $self->debug("Getting Linked Page: $title"); | 
| 611 |  |  |  |  |  |  |  | 
| 612 | 0 |  |  |  |  | 0 | my $parser = $self->get_parser(); | 
| 613 | 0 | 0 |  |  |  | 0 | return unless $parser; | 
| 614 |  |  |  |  |  |  |  | 
| 615 | 0 |  |  |  |  | 0 | $self->{linked_documents}{$title} = $parser->get_document(title => $title); | 
| 616 | 0 |  |  |  |  | 0 | return $self->{linked_documents}{$title}; | 
| 617 |  |  |  |  |  |  | } | 
| 618 |  |  |  |  |  |  |  | 
| 619 |  |  |  |  |  |  | sub get_parser { | 
| 620 | 0 |  |  | 0 | 0 | 0 | my $self = shift; | 
| 621 | 0 |  |  |  |  | 0 | return $self->{parser}; | 
| 622 |  |  |  |  |  |  | } | 
| 623 |  |  |  |  |  |  |  | 
| 624 |  |  |  |  |  |  | sub get_language_mapper { | 
| 625 | 90 |  |  | 90 | 0 | 115 | my $self = shift; | 
| 626 | 90 |  | 66 |  |  | 565 | return $self->{language_map} ||= Wiktionary::Parser::Language->new(); | 
| 627 |  |  |  |  |  |  | } | 
| 628 |  |  |  |  |  |  |  | 
| 629 |  |  |  |  |  |  |  | 
| 630 |  |  |  |  |  |  | # create a document object with a subset of sections | 
| 631 |  |  |  |  |  |  | sub create_sub_document { | 
| 632 | 0 |  |  | 0 | 0 | 0 | my $self = shift; | 
| 633 | 0 |  |  |  |  | 0 | my %args = @_; | 
| 634 | 0 | 0 |  |  |  | 0 | my $sections = $args{sections} or die 'sections must be defined'; | 
| 635 | 0 |  |  |  |  | 0 | return __PACKAGE__->new( sections => $sections, title => $self->get_title() ); | 
| 636 |  |  |  |  |  |  |  | 
| 637 |  |  |  |  |  |  | } | 
| 638 |  |  |  |  |  |  |  | 
| 639 |  |  |  |  |  |  | sub debug { | 
| 640 | 0 |  |  | 0 | 0 | 0 | my $self = shift; | 
| 641 | 0 | 0 |  |  |  | 0 | return unless $self->{verbose}; | 
| 642 | 0 |  |  |  |  | 0 | local $\ = "\n"; | 
| 643 | 0 |  |  |  |  | 0 | local $, = ' '; | 
| 644 | 0 |  |  |  |  | 0 | print 'DEBUG:',@_; | 
| 645 |  |  |  |  |  |  | } | 
| 646 |  |  |  |  |  |  |  | 
| 647 |  |  |  |  |  |  |  | 
| 648 |  |  |  |  |  |  | sub add_category { | 
| 649 | 17 |  |  | 17 | 0 | 51 | my $self = shift; | 
| 650 | 17 |  |  |  |  | 51 | my %args = @_; | 
| 651 | 17 |  |  |  |  | 28 | my $category = $args{category}; | 
| 652 | 17 |  |  |  |  | 26 | push @{$self->{categories}},$category; | 
|  | 17 |  |  |  |  | 67 |  | 
| 653 |  |  |  |  |  |  | } | 
| 654 |  |  |  |  |  |  |  | 
| 655 |  |  |  |  |  |  | sub add_language_link { | 
| 656 | 51 |  |  | 51 | 0 | 62 | my $self = shift; | 
| 657 | 51 |  |  |  |  | 129 | my %args = @_; | 
| 658 | 51 |  |  |  |  | 67 | my $tag = $args{tag}; | 
| 659 | 51 |  |  |  |  | 48 | push @{$self->{language_links}},$tag; | 
|  | 51 |  |  |  |  | 212 |  | 
| 660 |  |  |  |  |  |  | } | 
| 661 |  |  |  |  |  |  |  | 
| 662 |  |  |  |  |  |  | sub get_language_links { | 
| 663 | 0 |  |  | 0 | 0 | 0 | my $self = shift; | 
| 664 | 0 |  |  |  |  | 0 | return $self->{language_links}; | 
| 665 |  |  |  |  |  |  | } | 
| 666 |  |  |  |  |  |  |  | 
| 667 |  |  |  |  |  |  | sub get_categories { | 
| 668 | 0 |  |  | 0 | 0 | 0 | my $self = shift; | 
| 669 | 0 |  |  |  |  | 0 | return $self->{categories}; | 
| 670 |  |  |  |  |  |  | } | 
| 671 |  |  |  |  |  |  |  | 
| 672 |  |  |  |  |  |  | # get the languages represented by sections in this document | 
| 673 |  |  |  |  |  |  | sub get_section_languages { | 
| 674 | 1 |  |  | 1 | 0 | 1456 | my $self = shift; | 
| 675 | 1 |  |  |  |  | 2 | my @languages; | 
| 676 |  |  |  |  |  |  | # get top level sections | 
| 677 | 1 |  |  |  |  | 4 | for my $number ($self->get_section_numbers()) { | 
| 678 | 40 | 100 |  |  |  | 88 | next if $number =~ m/\./; | 
| 679 | 7 |  |  |  |  | 18 | push @languages, $self->get_section(number => $number)->get_header(); | 
| 680 |  |  |  |  |  |  | } | 
| 681 | 1 |  |  |  |  | 12 | return \@languages; | 
| 682 |  |  |  |  |  |  | } | 
| 683 |  |  |  |  |  |  |  | 
| 684 |  |  |  |  |  |  | 1; |