File Coverage

blib/lib/Acme/Samurai.pm

Criterion	Covered	Total	%
statement	24	26	92.3
branch			n/a
condition			n/a
subroutine	9	9	100.0
pod			n/a
total	33	35	94.2

line	stmt	sub	time	code
1				package Acme::Samurai;
2	2	2	55291	use 5.010001;
	2		4
3	2	2	7	use strict;
	2		1
	2		30
4	2	2	13	use warnings;
	2		6
	2		40
5	2	2	503	use utf8;
	2		8
	2		10
6				our $VERSION = '0.04';
7
8	2	2	863	use File::ShareDir qw/dist_file/;
	2		8152
	2		115
9	2	2	793	use Lingua::JA::Alphabet::Yomi qw/alphabet2yomi/;
	2		1221397
	2		200
10	2	2	911	use Lingua::JA::Numbers qw/num2ja/;
	2		26542
	2		185
11	2	2	11	use Unicode::Japanese qw/unijp/;
	2		3
	2		13
12
13	2	2	942	use Text::Mecabist;
	0
	0
14
15				sub gozaru {
16				my $self = bless { }, shift;
17				my $text = shift // "";
18
19				my $parser = Text::Mecabist->new({
20				node_format => '%m,%H',
21				unk_format => '%m,%H',
22				bos_format => '%m,%H',
23				eos_format => '%m,%H',
24				userdic => dist_file('Acme-Samurai', Text::Mecabist->encoding->name . '.dic'),
25				});
26
27				# natukashi
28				$text = unijp($text)->z2hNum->h2zAlpha->getu;
29
30				my $doc = $parser->parse($text, sub {
31				my $node = shift;
32				$self->apply_rules($node);
33				});
34
35				return $self->finalize($doc);
36				}
37
38				sub apply_rules {
39				my ($self, $node) = @_;
40
41				return if not $node->readable;
42
43				my $text = $node->text;
44
45				# one to one custom dictionary
46				if ($node->extra) {
47				$text = $node->extra;
48				}
49
50				if ($node->is('名詞') or $node->is('記号')) {
51
52				# arabic number to kanji
53				if ($node->pos1 eq '数' and $node->surface =~ /^[0-9]+$/) {
54				# no 位
55				if ($node->surface =~ /^0/ or
56				$node->prev && $node->prev->surface =~ /[.．]/) {
57
58				$text = join "", map { num2ja($_) } split //, $node->surface;
59				} else {
60				$text = num2ja($node->surface); # with 位
61				}
62				}
63
64				# kanji number to more classic
65				elsif ($node->pos1 eq '数') {
66				$text =~ tr{〇一二三四五六七八九十百万}
67				{零壱弐参四伍六七八九拾佰萬};
68				}
69
70				# roman
71				elsif ($text =~ /^\p{Latin}+$/) {
72				$text = $node->pronunciation if $node->pronunciation;
73				$text = alphabet2yomi($text, 'en');
74				$text = unijp($text)->kata2hira->getu;
75				}
76				}
77
78				if ($node->is('動詞')) {
79				if ($text =~ /(.+?)(じる)$/) {
80				$text = "$1ずる";
81				}
82				if ($text eq 'い' and
83				$node->feature =~ /^動詞,非自立,[],[],一段,連用形/ and
84				$node->next and
85				$node->next->pos !~ /詞/) {
86
87				$text = 'おっ' if $node->next->lemma eq 'た';
88				$text = 'おり' if $node->next->lemma eq 'ます';
89				}
90				}
91
92				if ($node->is('形容詞')) {
93				if ($text =~ /^(.+?)(しい\|しく)$/) {
94				$text = $1 . { 'しい' => 'しき', 'しく' => 'しゅう' }->{$2};
95				}
96				}
97
98				if ($node->is('助詞')) {
99				if ($node->feature eq '助詞,終助詞,,,,,の,の,の,のか' and
100				$node->prev and
101				$node->prev->surface eq 'な') {
102				$node->prev->skip(1);
103				$text = 'なの';
104				}
105				elsif ($text eq 'ので' and
106				$node->prev and
107				$node->prev->surface eq 'な') {
108				$node->prev->skip(1);
109				$text = 'ゆえに';
110				}
111				elsif ($node->surface eq 'ね' and
112				$node->prev and
113				$node->prev->surface eq 'の') {
114				$text = 'だな';
115				}
116				}
117
118				if ($node->is('助動詞')) {
119				if ($text eq 'ない') {
120				if ($node->prev and
121				$node->prev->surface eq 'し' and
122				$node->next and
123				$node->next->surface and
124				$node->next->pos !~ /詞/) {
125				$node->prev->skip(1);
126				$text = 'せぬ';
127				}
128				if ($node->prev and
129				$node->prev->surface ne 'し' and
130				$node->prev->inflection_form eq '未然形') {
131				$text = 'ぬ';
132				}
133				}
134				elsif ($text eq 'なけれ') {
135				if ($node->prev and
136				$node->prev->surface eq 'し') {
137				$node->prev->skip(1);
138				$text = 'せね';
139				}
140				}
141				}
142
143				if ($node->is('感動詞')) {
144				if ($node->next and
145				$node->next->pos !~ /詞/) {
146				$text = $node->extra if $node->extra;
147				$text .= 'でござる';
148				}
149				}
150
151				$node->text($text);
152				}
153
154				sub finalize {
155				my ($self, $doc) = @_;
156				my $text = $doc->join('text');
157				$text =~ s/(?:ておりまする\|ていまする?)\b/ており候/g;
158				$text =~ s/(?:どうも)?かたじけない(?:ございま(?:する\|す\|した))?/かたじけない/g;
159				$text;
160				}
161
162				1;
163				__END__