line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Acme::Ikamusume; |
2
|
3
|
|
|
3
|
|
84577
|
use 5.010001; |
|
3
|
|
|
|
|
7
|
|
3
|
3
|
|
|
3
|
|
13
|
use strict; |
|
3
|
|
|
|
|
655
|
|
|
3
|
|
|
|
|
62
|
|
4
|
3
|
|
|
3
|
|
14
|
use warnings; |
|
3
|
|
|
|
|
655
|
|
|
3
|
|
|
|
|
67
|
|
5
|
3
|
|
|
3
|
|
918
|
use utf8; |
|
3
|
|
|
|
|
14
|
|
|
3
|
|
|
|
|
10
|
|
6
|
|
|
|
|
|
|
our $VERSION = '0.08'; |
7
|
|
|
|
|
|
|
|
8
|
3
|
|
|
3
|
|
1363
|
use File::ShareDir qw/dist_file/; |
|
3
|
|
|
|
|
12507
|
|
|
3
|
|
|
|
|
181
|
|
9
|
3
|
|
|
3
|
|
1256
|
use Lingua::JA::Kana; |
|
3
|
|
|
|
|
107280
|
|
|
3
|
|
|
|
|
248
|
|
10
|
|
|
|
|
|
|
|
11
|
3
|
|
|
3
|
|
1101
|
use Text::Mecabist; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
sub geso { |
14
|
|
|
|
|
|
|
my $self = bless { }, shift; |
15
|
|
|
|
|
|
|
my $text = shift // ""; |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
my $parser = Text::Mecabist->new({ |
18
|
|
|
|
|
|
|
userdic => dist_file('Acme-Ikamusume', Text::Mecabist->encoding->name .'.dic'), |
19
|
|
|
|
|
|
|
}); |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
my $doc = $parser->parse($text, sub { |
22
|
|
|
|
|
|
|
my $node = shift; |
23
|
|
|
|
|
|
|
return if not $node->readable; |
24
|
|
|
|
|
|
|
return if not $node->reading; |
25
|
|
|
|
|
|
|
$self->apply_rules($node); |
26
|
|
|
|
|
|
|
}); |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
return $doc->join('text'); |
29
|
|
|
|
|
|
|
} |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub godan { |
32
|
|
|
|
|
|
|
my ($verb, $from, $to) = @_; |
33
|
|
|
|
|
|
|
if (my ($kana) = $verb =~ /(\p{InHiragana})$/) { |
34
|
|
|
|
|
|
|
$kana = kana2romaji($kana); |
35
|
|
|
|
|
|
|
$kana =~ s/^sh/s/; |
36
|
|
|
|
|
|
|
$kana =~ s/^ch/t/; |
37
|
|
|
|
|
|
|
$kana =~ s/^ts/t/; |
38
|
|
|
|
|
|
|
$kana =~ s/$from$/$to/; |
39
|
|
|
|
|
|
|
$kana =~ s/^a$/wa/; |
40
|
|
|
|
|
|
|
$kana =~ s/ti/chi/; |
41
|
|
|
|
|
|
|
$kana =~ s/tu/tsu/; |
42
|
|
|
|
|
|
|
$kana = romaji2hiragana($kana); |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
$verb =~ s/.$/$kana/; |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
$verb; |
47
|
|
|
|
|
|
|
} |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
our @rules = ( |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# userdic extra field |
52
|
|
|
|
|
|
|
sub { |
53
|
|
|
|
|
|
|
my $node = shift; |
54
|
|
|
|
|
|
|
my $word = $node->extra1 or return; |
55
|
|
|
|
|
|
|
$node->text($word); |
56
|
|
|
|
|
|
|
}, |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
# IKA: inflection |
59
|
|
|
|
|
|
|
sub { |
60
|
|
|
|
|
|
|
my $node = shift; |
61
|
|
|
|
|
|
|
return if not $node->extra2; |
62
|
|
|
|
|
|
|
return if not $node->extra2 =~ /inflection/; |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
my $prev = $node->prev or return; |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
if ($prev->is('名詞')) { |
67
|
|
|
|
|
|
|
$node->text('じゃなイカ'); |
68
|
|
|
|
|
|
|
} |
69
|
|
|
|
|
|
|
elsif ($prev->is('副詞')) { |
70
|
|
|
|
|
|
|
$node->text('でゲソか'); |
71
|
|
|
|
|
|
|
} |
72
|
|
|
|
|
|
|
elsif ($prev->is('助動詞') and |
73
|
|
|
|
|
|
|
$prev->surface eq 'です' and |
74
|
|
|
|
|
|
|
$prev->prev and $prev->prev->text !~ /^[いイ]{2}$/) { |
75
|
|
|
|
|
|
|
$prev->text('じゃなイ'); |
76
|
|
|
|
|
|
|
$node->text('カ'); |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
if ($prev->text =~ /(?:イー?カ|ゲソ)$/) { |
80
|
|
|
|
|
|
|
return; |
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
elsif ($prev->inflection_type =~ /五段/) { |
83
|
|
|
|
|
|
|
$prev->text(godan($prev->text, '.' => 'a')); |
84
|
|
|
|
|
|
|
$node->text('なイカ'); |
85
|
|
|
|
|
|
|
} |
86
|
|
|
|
|
|
|
elsif ($prev->inflection_type =~ /一段|カ変|サ変/) { |
87
|
|
|
|
|
|
|
$node->text('なイカ'); |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
}, |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
# formal MASU to casual |
92
|
|
|
|
|
|
|
sub { |
93
|
|
|
|
|
|
|
my $node = shift; |
94
|
|
|
|
|
|
|
if ($node->lemma eq 'ます' and |
95
|
|
|
|
|
|
|
$node->is('助動詞') and |
96
|
|
|
|
|
|
|
$node->prev && $node->prev->is('動詞')) { |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
if ($node->is('基本形')) { # ます |
99
|
|
|
|
|
|
|
$node->prev->text($node->prev->lemma); |
100
|
|
|
|
|
|
|
$node->text(''); |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
if ($node->next->pos =~ /^助詞/) { |
103
|
|
|
|
|
|
|
$node->text($node->text . 'でゲソ'); |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
if ($node->is('連用形') and # ます |
107
|
|
|
|
|
|
|
$node->pos3 !~ /五段/) { # 五段 => { -i/っ/ん/い } |
108
|
|
|
|
|
|
|
$node->text(''); |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
}, |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
# no honorific |
114
|
|
|
|
|
|
|
sub { |
115
|
|
|
|
|
|
|
my $node = shift; |
116
|
|
|
|
|
|
|
if ($node->feature =~ /^名詞,接尾,人名,/ and |
117
|
|
|
|
|
|
|
$node->prev->text ne 'イカ娘') { |
118
|
|
|
|
|
|
|
$node->text(''); |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
}, |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
# IKA/GESO: replace |
123
|
|
|
|
|
|
|
sub { |
124
|
|
|
|
|
|
|
my $node = shift; |
125
|
|
|
|
|
|
|
my $text = $node->text; |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
$text =~ s/い[いー]か(.)/イーカ$1/g; |
128
|
|
|
|
|
|
|
$text =~ s/いか/イカ/g; |
129
|
|
|
|
|
|
|
$text =~ s/げそ/ゲソ/g; |
130
|
|
|
|
|
|
|
$node->text($text); |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
return if $text =~ /イー?カ|ゲソ/; |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
my $curr = katakana2hiragana($node->reading || ""); |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
$node->text($curr) if $curr =~ s/い[いー]か(.)/イーカ$1/g; |
137
|
|
|
|
|
|
|
$node->text($curr) if $curr =~ s/いか/イカ/g; |
138
|
|
|
|
|
|
|
$node->text($curr) if $curr =~ s/げそ/ゲソ/g; |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
my $next = katakana2hiragana(($node->next and $node->next->reading) || ""); |
141
|
|
|
|
|
|
|
my $prev = katakana2hiragana(join "", |
142
|
|
|
|
|
|
|
$node->prev && $node->prev->prev && $node->prev->prev->text, |
143
|
|
|
|
|
|
|
$node->prev && $node->prev->text); |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
$node->text($curr) if $next =~ /^か./ && $curr =~ s/い[いー]$/イー/; |
146
|
|
|
|
|
|
|
$node->text($curr) if $prev =~ /い[いー]$/ && $curr =~ s/^か(.)/カ$1/; |
147
|
|
|
|
|
|
|
$node->text($curr) if $prev =~ /[いイ]$/ && $curr =~ s/^か/カ/; |
148
|
|
|
|
|
|
|
$node->text($curr) if $next =~ /^か/ && $curr =~ s/い$/イ/; |
149
|
|
|
|
|
|
|
$node->text($curr) if $next =~ /^そ/ && $curr =~ s/げ$/ゲ/; |
150
|
|
|
|
|
|
|
$node->text($curr) if $prev =~ /げ$/ && $curr =~ s/^そ/ソ/; |
151
|
|
|
|
|
|
|
}, |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
# IKA/GESO: DA + postp |
154
|
|
|
|
|
|
|
sub { |
155
|
|
|
|
|
|
|
my $node = shift; |
156
|
|
|
|
|
|
|
my $prev = $node->prev or return; |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
if ($prev->surface eq 'だ' and |
159
|
|
|
|
|
|
|
$prev->text eq 'でゲソ' and |
160
|
|
|
|
|
|
|
( |
161
|
|
|
|
|
|
|
$node->pos =~ /助詞|助動詞/ or |
162
|
|
|
|
|
|
|
$node->is('接尾') |
163
|
|
|
|
|
|
|
) |
164
|
|
|
|
|
|
|
) { |
165
|
|
|
|
|
|
|
my $kana = kana2romaji($node->text); |
166
|
|
|
|
|
|
|
if ($kana =~/^(?:ze|n[aeo]|yo|wa)/) { |
167
|
|
|
|
|
|
|
$node->text(''); |
168
|
|
|
|
|
|
|
$prev->text('じゃなイカ'); |
169
|
|
|
|
|
|
|
} |
170
|
|
|
|
|
|
|
if ($kana =~ /^zo/) { |
171
|
|
|
|
|
|
|
$node->text(''); |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
}, |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
sub { |
177
|
|
|
|
|
|
|
my $node = shift; |
178
|
|
|
|
|
|
|
my $prev = $node->prev or return; |
179
|
|
|
|
|
|
|
my $latest = join "", |
180
|
|
|
|
|
|
|
$prev->prev && $prev->prev->text, |
181
|
|
|
|
|
|
|
$prev && $prev->text, |
182
|
|
|
|
|
|
|
$node->text; |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
if ($node->is('終助詞') and |
185
|
|
|
|
|
|
|
$latest =~ /(?:でゲソ|じゃなイカ)[よなね]$/) { |
186
|
|
|
|
|
|
|
$node->text(''); |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
}, |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
# IKA: IIKA |
191
|
|
|
|
|
|
|
sub { |
192
|
|
|
|
|
|
|
my $node = shift; |
193
|
|
|
|
|
|
|
my $prev = $node->prev or return; |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
if ($prev->text !~ /^(?:[いイ]{2})$/) { |
196
|
|
|
|
|
|
|
return; |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
if ($node->surface =~ /^(?:です|でしょう)$/ and |
199
|
|
|
|
|
|
|
$node->next->surface =~ /^か/) { |
200
|
|
|
|
|
|
|
$prev->text('いイ'); |
201
|
|
|
|
|
|
|
$node->text(''); |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
if ($node->surface eq 'でしょうか') { |
204
|
|
|
|
|
|
|
$prev->text('いイ'); |
205
|
|
|
|
|
|
|
$node->text('カ'); |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
}, |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
# GESO/IKA: eos |
210
|
|
|
|
|
|
|
sub { |
211
|
|
|
|
|
|
|
my $node = shift; |
212
|
|
|
|
|
|
|
my $next = $node->next or return; |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
if ($next->stat == 3 or # MECAB_EOS_NODE |
215
|
|
|
|
|
|
|
( |
216
|
|
|
|
|
|
|
$next->is('記号') and |
217
|
|
|
|
|
|
|
$next->pos1 =~ /句点|括弧閉|GESO可/ |
218
|
|
|
|
|
|
|
) |
219
|
|
|
|
|
|
|
) { |
220
|
|
|
|
|
|
|
if ($node->pos =~ /^(?:その他|記号|助詞|接頭詞|接続詞|連体詞)/) { |
221
|
|
|
|
|
|
|
return; |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
if ($node->is('助動詞') and |
225
|
|
|
|
|
|
|
$node->prev and $node->prev->text eq 'じゃ' and |
226
|
|
|
|
|
|
|
$node->surface eq 'ない') { |
227
|
|
|
|
|
|
|
$node->text('なイカ'); |
228
|
|
|
|
|
|
|
return; |
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
if ($node->pos =~ /^助動詞/ and |
232
|
|
|
|
|
|
|
$node->prev and $node->prev->text =~ /(?:ゲソ|イー?カ)/) { |
233
|
|
|
|
|
|
|
return; |
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
my $latest = join "", |
237
|
|
|
|
|
|
|
$node->prev && $node->prev->text, |
238
|
|
|
|
|
|
|
$node->text; |
239
|
|
|
|
|
|
|
if ($latest =~ /(?:ゲソ|イー?カ)$/) { |
240
|
|
|
|
|
|
|
return; |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
$node->text($node->text . 'でゲソ'); |
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
if ($node->is('動詞') and |
247
|
|
|
|
|
|
|
$node->inflection_form =~ '基本形' and |
248
|
|
|
|
|
|
|
$next->pos =~ /^助詞/) { |
249
|
|
|
|
|
|
|
$node->text($node->text . 'でゲソ'); |
250
|
|
|
|
|
|
|
} |
251
|
|
|
|
|
|
|
}, |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
# EBI: accent |
254
|
|
|
|
|
|
|
sub { |
255
|
|
|
|
|
|
|
my $node = shift; |
256
|
|
|
|
|
|
|
my $text = $node->text; |
257
|
|
|
|
|
|
|
my @ebi_accent = qw(! ♪ ♪ ♫ ♬ ♡); |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
$text =~ s{(エビ|えび|海老)}{ |
260
|
|
|
|
|
|
|
$1 . $ebi_accent[ int rand scalar @ebi_accent ]; |
261
|
|
|
|
|
|
|
}e; |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
$node->text($text); |
264
|
|
|
|
|
|
|
}, |
265
|
|
|
|
|
|
|
); |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
sub apply_rules { |
268
|
|
|
|
|
|
|
my ($self, $node) = @_; |
269
|
|
|
|
|
|
|
for my $rule (@rules) { |
270
|
|
|
|
|
|
|
$rule->($node); |
271
|
|
|
|
|
|
|
} |
272
|
|
|
|
|
|
|
} |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
1; |
275
|
|
|
|
|
|
|
__END__ |