File Coverage

blib/lib/Lingua/TFIDF/WordSegmenter/JA/MeCab.pm

Criterion	Covered	Total	%
statement	13	15	86.6
branch			n/a
condition			n/a
subroutine	5	5	100.0
pod			n/a
total	18	20	90.0

line	stmt	sub	time	code
1				package Lingua::TFIDF::WordSegmenter::JA::MeCab;
2
3				# ABSTRACT: Word segmenter for Japanese documents
4
5	1	1	863	use strict;
	1		2
	1		63
6	1	1	6	use warnings;
	1		1
	1		35
7	1	1	966	use Encode qw//;
	1		11784
	1		18
8	1	1	850	use Smart::Args;
	1		31460
	1		78
9	1	1	418	use Text::MeCab;
	0
	0
10
11				my $mecab_encoding = Encode::find_encoding(Text::MeCab::ENCODING);
12
13				sub new {
14				args
15				my $class => 'ClassName',
16				my $mecab => +{ isa => 'Text::MeCab', optional => 1 };
17
18				$mecab = Text::MeCab->new unless defined $mecab;
19				bless +{ mecab => $mecab } => $class;
20				}
21
22				sub mecab { $_[0]->{mecab} }
23
24				sub segment {
25				args_pos
26				my $self,
27				my $document => 'Ref \| Str';
28
29				my $input = $mecab_encoding->encode(ref $document ? $$document : $document);
30				my $node = $self->mecab->parse($input);
31				sub {
32				return unless $node and $node->stat != Text::MeCab::MECAB_EOS_NODE;
33				my $word = $mecab_encoding->decode($node->surface);
34				$node = $node->next;
35				return $word;
36				};
37				}
38
39				1;
40
41				__END__
42
43				=pod
44
45				=encoding UTF-8
46
47				=head1 NAME
48
49				Lingua::TFIDF::WordSegmenter::JA::MeCab - Word segmenter for Japanese documents
50
51				=head1 VERSION
52
53				version 0.01
54
55				=head1 SYNOPSIS
56
57				use utf8;
58				use Lingua::TFIDF::WordSegmenter::JA::MeCab;
59
60				my $segmenter = Lingua::TFIDF::WordSegmenter::JA::MeCab->new;
61				my $iter = $segmenter->segment('æ€ã„å‡ºã›ã€æ€ã„å‡ºã›ã€€11æœˆ5æ—¥ã‚’...');
62				while (defined(my $word = $iter->())) { ... }
63
64				=head1 DESCRIPTION
65
66				This class is a word segmenter for documents written in Japanese.
67
68				=head1 METHODS
69
70				=head2 new([ mecab => Text::MeCab->new ])
71
72				Constructor.
73
74				=head2 segment($document \| \$document)
75
76				Executes word segmentation on given C<$document> and returns an word iterator.
77
78				=head1 SEE ALSO
79
80				L<Text::MeCab>
81
82				=head1 AUTHOR
83
84				Koichi SATOH <sekia@cpan.org>
85
86				=head1 COPYRIGHT AND LICENSE
87
88				This software is Copyright (c) 2014 by Koichi SATOH.
89
90				This is free software, licensed under:
91
92				The MIT (X11) License
93
94				=cut