| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package WWW::WWWJDIC; | 
| 2 |  |  |  |  |  |  | require Exporter; | 
| 3 |  |  |  |  |  |  | @ISA = qw(Exporter); | 
| 4 |  |  |  |  |  |  | @EXPORT_OK = qw/get_mirrors/; | 
| 5 | 1 |  |  | 1 |  | 48344 | use warnings; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 26 |  | 
| 6 | 1 |  |  | 1 |  | 4 | use strict; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 28 |  | 
| 7 |  |  |  |  |  |  | our $VERSION = '0.003'; | 
| 8 | 1 |  |  | 1 |  | 309 | use Encode qw/encode decode/; | 
|  | 1 |  |  |  |  | 6882 |  | 
|  | 1 |  |  |  |  | 53 |  | 
| 9 | 1 |  |  | 1 |  | 314 | use utf8; | 
|  | 1 |  |  |  |  | 11 |  | 
|  | 1 |  |  |  |  | 4 |  | 
| 10 | 1 |  |  | 1 |  | 238 | use URI::Escape; | 
|  | 1 |  |  |  |  | 1049 |  | 
|  | 1 |  |  |  |  | 47 |  | 
| 11 | 1 |  |  | 1 |  | 298 | use JSON::Parse 'json_file_to_perl'; | 
|  | 1 |  |  |  |  | 688 |  | 
|  | 1 |  |  |  |  | 496 |  | 
| 12 |  |  |  |  |  |  |  | 
| 13 |  |  |  |  |  |  | my $jfile = __FILE__; | 
| 14 |  |  |  |  |  |  | $jfile =~ s/\.pm/.json/; | 
| 15 |  |  |  |  |  |  | my $j = json_file_to_perl ($jfile); | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  | my %mirrors = %{$j->{mirrors}}; | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | our %dictionaries = ( | 
| 20 |  |  |  |  |  |  | 'AV' => 'aviation ', | 
| 21 |  |  |  |  |  |  | 'BU' => 'buddhdic', | 
| 22 |  |  |  |  |  |  | 'CA' => 'cardic', | 
| 23 |  |  |  |  |  |  | 'CC' => 'concrete', | 
| 24 |  |  |  |  |  |  | 'CO' => 'compdic', | 
| 25 |  |  |  |  |  |  | 'ED' => 'edict (the rest)', | 
| 26 |  |  |  |  |  |  | 'EP' => 'edict (priority subset)', | 
| 27 |  |  |  |  |  |  | 'ES' => 'engscidic', | 
| 28 |  |  |  |  |  |  | 'EV' => 'envgloss', | 
| 29 |  |  |  |  |  |  | 'FM' => 'finmktdic', | 
| 30 |  |  |  |  |  |  | 'FO' => 'forsdic_e', | 
| 31 |  |  |  |  |  |  | 'GE' => 'geodic ', | 
| 32 |  |  |  |  |  |  | 'KD' => 'small hiragana dictionary for glossing ', | 
| 33 |  |  |  |  |  |  | 'LG' => 'lingdic', | 
| 34 |  |  |  |  |  |  | 'LS' => 'lifscidic', | 
| 35 |  |  |  |  |  |  | 'MA' => 'manufdic', | 
| 36 |  |  |  |  |  |  | 'NA' => 'enamdict', | 
| 37 |  |  |  |  |  |  | 'PL' => 'j_places (entries not already in enamdict)', | 
| 38 |  |  |  |  |  |  | 'PP' => 'pandpdic ', | 
| 39 |  |  |  |  |  |  | 'RH' => 'revhenkan (kanji/kana with no English translation yet)', | 
| 40 |  |  |  |  |  |  | 'RW' => 'riverwater', | 
| 41 |  |  |  |  |  |  | 'SP' => 'special words & phrases', | 
| 42 |  |  |  |  |  |  | 'ST' => 'stardict', | 
| 43 |  |  |  |  |  |  | ); | 
| 44 |  |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  | our %codes = ( | 
| 46 |  |  |  |  |  |  | 'Buddh' => 'Buddhism', | 
| 47 |  |  |  |  |  |  | 'MA' => 'martial arts', | 
| 48 |  |  |  |  |  |  | 'P' => '"Priority" entry, i.e. among approx. 20,000 words deemed to be common in Japanese', | 
| 49 |  |  |  |  |  |  | 'X' => 'rude or X-rated term (not displayed in educational software)', | 
| 50 |  |  |  |  |  |  | 'abbr' => 'abbreviation', | 
| 51 |  |  |  |  |  |  | 'adj-f' => 'noun, verb, etc. acting prenominally (incl. rentaikei)', | 
| 52 |  |  |  |  |  |  | 'adj-i' => 'adjective (keiyoushi)', | 
| 53 |  |  |  |  |  |  | 'adj-na' => 'adjectival nouns or quasi-adjectives (keiyoudoushi)', | 
| 54 |  |  |  |  |  |  | 'adj-no' => 'nouns which may take the genitive case particle "no"', | 
| 55 |  |  |  |  |  |  | 'adj-pn' => 'pre-noun adjectival (rentaishi)', | 
| 56 |  |  |  |  |  |  | 'adj-t' => '"taru" adjective', | 
| 57 |  |  |  |  |  |  | 'adv' => 'adverb (fukushi)', | 
| 58 |  |  |  |  |  |  | 'arch' => 'archaism', | 
| 59 |  |  |  |  |  |  | 'ateji' => 'kanji used as phonetic symbol(s)', | 
| 60 |  |  |  |  |  |  | 'aux' => 'auxiliary', | 
| 61 |  |  |  |  |  |  | 'aux-v' => 'auxiliary verb', | 
| 62 |  |  |  |  |  |  | 'c' => 'company name', | 
| 63 |  |  |  |  |  |  | 'col' => 'colloquialism', | 
| 64 |  |  |  |  |  |  | 'comp' => 'computing/telecommunications', | 
| 65 |  |  |  |  |  |  | 'conj' => 'conjunction', | 
| 66 |  |  |  |  |  |  | 'ctr' => 'counter', | 
| 67 |  |  |  |  |  |  | 'exp' => 'Expressions (phrases, clauses, etc.)', | 
| 68 |  |  |  |  |  |  | 'f' => 'female given name', | 
| 69 |  |  |  |  |  |  | 'fam' => 'familiar language', | 
| 70 |  |  |  |  |  |  | 'fem' => 'female term or language', | 
| 71 |  |  |  |  |  |  | 'food' => 'food', | 
| 72 |  |  |  |  |  |  | 'g' => 'given name, as-yet not classified by sex', | 
| 73 |  |  |  |  |  |  | 'geom' => 'geometry', | 
| 74 |  |  |  |  |  |  | 'gikun' => 'gikun (meaning) reading', | 
| 75 |  |  |  |  |  |  | 'h' => 'a full (family plus given) name of a historical person', | 
| 76 |  |  |  |  |  |  | 'hon' => 'honorific or respectful (sonkeigo) language', | 
| 77 |  |  |  |  |  |  | 'hum' => 'humble (kenjougo) language', | 
| 78 |  |  |  |  |  |  | 'iK' => 'word containing irregular kanji usage', | 
| 79 |  |  |  |  |  |  | 'id' => 'idiomatic expression', | 
| 80 |  |  |  |  |  |  | 'ik' => 'word containing irregular kana usage', | 
| 81 |  |  |  |  |  |  | 'int' => 'interjection (kandoushi)', | 
| 82 |  |  |  |  |  |  | 'io' => 'irregular okurigana usage', | 
| 83 |  |  |  |  |  |  | 'ling' => 'linguistics', | 
| 84 |  |  |  |  |  |  | 'm' => 'male given name', | 
| 85 |  |  |  |  |  |  | 'm-sl' => 'manga slang', | 
| 86 |  |  |  |  |  |  | 'male' => 'male term or language', | 
| 87 |  |  |  |  |  |  | 'math' => 'mathematics', | 
| 88 |  |  |  |  |  |  | 'mil' => 'military', | 
| 89 |  |  |  |  |  |  | 'n' => 'noun (common) (futsuumeishi)', | 
| 90 |  |  |  |  |  |  | 'n-adv' => 'adverbial noun (fukushitekimeishi)', | 
| 91 |  |  |  |  |  |  | 'n-t' => 'noun (temporal) (jisoumeishi)', | 
| 92 |  |  |  |  |  |  | 'o' => 'organization name', | 
| 93 |  |  |  |  |  |  | 'oK' => 'word containing out-dated kanji', | 
| 94 |  |  |  |  |  |  | 'obs' => 'obsolete term', | 
| 95 |  |  |  |  |  |  | 'obsc' => 'obscure term', | 
| 96 |  |  |  |  |  |  | 'ok' => 'out-dated or obsolete kana usage', | 
| 97 |  |  |  |  |  |  | 'on-mim' => 'onomatopoeic or mimetic word', | 
| 98 |  |  |  |  |  |  | 'p' => 'place-name', | 
| 99 |  |  |  |  |  |  | 'physics' => 'physics', | 
| 100 |  |  |  |  |  |  | 'pn' => 'pronoun', | 
| 101 |  |  |  |  |  |  | 'pol' => 'polite (teineigo) language', | 
| 102 |  |  |  |  |  |  | 'pr' => 'product name', | 
| 103 |  |  |  |  |  |  | 'pref' => 'prefix', | 
| 104 |  |  |  |  |  |  | 'prt' => 'particle', | 
| 105 |  |  |  |  |  |  | 's' => 'surname', | 
| 106 |  |  |  |  |  |  | 'sens' => 'term with some sensitivity about its usage', | 
| 107 |  |  |  |  |  |  | 'sl' => 'slang', | 
| 108 |  |  |  |  |  |  | 'st' => 'station name', | 
| 109 |  |  |  |  |  |  | 'suf' => 'suffix', | 
| 110 |  |  |  |  |  |  | 'u' => 'person name, as-yet unclassified', | 
| 111 |  |  |  |  |  |  | 'uK' => 'word usually written using kanji alone', | 
| 112 |  |  |  |  |  |  | 'uk' => 'word usually written using kana alone', | 
| 113 |  |  |  |  |  |  | 'v1' => 'Ichidan verb', | 
| 114 |  |  |  |  |  |  | 'v5' => 'Godan verb (not completely classified)', | 
| 115 |  |  |  |  |  |  | 'v5aru' => 'Godan verb - -aru special class', | 
| 116 |  |  |  |  |  |  | 'v5k-s' => 'Godan verb - Iku/Yuku special class', | 
| 117 |  |  |  |  |  |  | 'v5u, v5k, etc.' => 'Godan verb with `u\', `ku\', etc. endings', | 
| 118 |  |  |  |  |  |  | 'vi' => 'intransitive verb', | 
| 119 |  |  |  |  |  |  | 'vk' => 'Kuru verb - special class', | 
| 120 |  |  |  |  |  |  | 'vs' => 'noun or participle which takes the aux. verb suru', | 
| 121 |  |  |  |  |  |  | 'vs-s' => 'suru verb - special class', | 
| 122 |  |  |  |  |  |  | 'vt' => 'transitive verb', | 
| 123 |  |  |  |  |  |  | 'vulg' => 'vulgar expression or word', | 
| 124 |  |  |  |  |  |  | 'vz' => 'Ichidan verb - -zuru special class (alternative form of -jiru verbs)', | 
| 125 |  |  |  |  |  |  | ); | 
| 126 |  |  |  |  |  |  |  | 
| 127 |  |  |  |  |  |  | sub get_mirrors | 
| 128 |  |  |  |  |  |  | { | 
| 129 | 1 |  |  | 1 | 1 | 77 | return %mirrors; | 
| 130 |  |  |  |  |  |  | } | 
| 131 |  |  |  |  |  |  |  | 
| 132 |  |  |  |  |  |  | # Default mirror | 
| 133 |  |  |  |  |  |  |  | 
| 134 |  |  |  |  |  |  | our $default = 'usa'; | 
| 135 |  |  |  |  |  |  |  | 
| 136 |  |  |  |  |  |  | sub new | 
| 137 |  |  |  |  |  |  | { | 
| 138 | 0 |  |  | 0 | 1 |  | my ($class, %options) = @_; | 
| 139 | 0 |  |  |  |  |  | my $wwwjdic = {}; | 
| 140 | 0 | 0 |  |  |  |  | if ($options{mirror}) { | 
| 141 | 0 |  |  |  |  |  | my $mirror = lc $options{mirror}; | 
| 142 | 0 | 0 |  |  |  |  | if ($mirrors{$mirror}) { | 
| 143 | 0 |  |  |  |  |  | $wwwjdic->{site} = $mirrors{$mirror}; | 
| 144 |  |  |  |  |  |  | } | 
| 145 |  |  |  |  |  |  | else { | 
| 146 | 0 |  |  |  |  |  | print STDERR __PACKAGE__, | 
| 147 |  |  |  |  |  |  | ": unknown mirror '$options{mirror}': using $default\n"; | 
| 148 |  |  |  |  |  |  | } | 
| 149 |  |  |  |  |  |  | } | 
| 150 |  |  |  |  |  |  | else { | 
| 151 | 0 |  |  |  |  |  | $wwwjdic->{site} = $mirrors{$default}; | 
| 152 |  |  |  |  |  |  | } | 
| 153 | 0 |  |  |  |  |  | bless $wwwjdic; | 
| 154 | 0 |  |  |  |  |  | return $wwwjdic; | 
| 155 |  |  |  |  |  |  | } | 
| 156 |  |  |  |  |  |  |  | 
| 157 |  |  |  |  |  |  | sub lookup_url | 
| 158 |  |  |  |  |  |  | { | 
| 159 | 0 |  |  | 0 | 1 |  | my ($wwwjdic, $search_key, $search_type) = @_; | 
| 160 | 0 |  |  |  |  |  | my %type; | 
| 161 | 0 |  |  |  |  |  | for (@$search_type) { | 
| 162 | 0 | 0 |  |  |  |  | $type{max} = $_ if /^[0-9]+$/; | 
| 163 |  |  |  |  |  |  | } | 
| 164 | 0 |  |  |  |  |  | my $url = $wwwjdic->{site}; # Start off with the site. | 
| 165 |  |  |  |  |  |  | # Q = all the dictionaries. | 
| 166 |  |  |  |  |  |  | # M = backdoor entry. | 
| 167 |  |  |  |  |  |  | # search type = U: UTF-8 lookup | 
| 168 | 0 |  |  |  |  |  | $url .= "?QMUJ"; | 
| 169 | 0 |  |  |  |  |  | my $search_key_encoded = URI::Escape::uri_escape_utf8 ($search_key); | 
| 170 | 0 |  |  |  |  |  | $url .= $search_key_encoded; | 
| 171 |  |  |  |  |  |  | # This means UTF-8 encoding. I don't think this is documented | 
| 172 |  |  |  |  |  |  | # anywhere. | 
| 173 | 0 |  |  |  |  |  | $url .= "_3"; | 
| 174 |  |  |  |  |  |  | # Maximum number of results to return. | 
| 175 | 0 | 0 |  |  |  |  | $url .= '_' . $type{max} if $type{max}; | 
| 176 | 0 |  |  |  |  |  | return $url; | 
| 177 |  |  |  |  |  |  | } | 
| 178 |  |  |  |  |  |  |  | 
| 179 |  |  |  |  |  |  | 1; | 
| 180 |  |  |  |  |  |  |  |