line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package WebService::KoreanSpeller; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '0.015'; |
4
|
|
|
|
|
|
|
$VERSION = eval $VERSION; |
5
|
|
|
|
|
|
|
|
6
|
2
|
|
|
2
|
|
71552
|
use Moose; |
|
2
|
|
|
|
|
941752
|
|
|
2
|
|
|
|
|
17
|
|
7
|
2
|
|
|
2
|
|
16055
|
use Moose::Util::TypeConstraints; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
21
|
|
8
|
2
|
|
|
2
|
|
5951
|
use namespace::autoclean; |
|
2
|
|
|
|
|
16677
|
|
|
2
|
|
|
|
|
9
|
|
9
|
2
|
|
|
2
|
|
1195
|
use HTTP::Request::Common qw/POST/; |
|
2
|
|
|
|
|
48815
|
|
|
2
|
|
|
|
|
148
|
|
10
|
2
|
|
|
2
|
|
1462
|
use LWP::UserAgent; |
|
2
|
|
|
|
|
46126
|
|
|
2
|
|
|
|
|
76
|
|
11
|
2
|
|
|
2
|
|
1209
|
use utf8; |
|
2
|
|
|
|
|
32
|
|
|
2
|
|
|
|
|
21
|
|
12
|
2
|
|
|
2
|
|
1189
|
use Encode qw/encode decode/; |
|
2
|
|
|
|
|
20495
|
|
|
2
|
|
|
|
|
1014
|
|
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
subtype 'UTF8FlagOnString' |
15
|
|
|
|
|
|
|
=> as 'Str' |
16
|
|
|
|
|
|
|
=> where { utf8::is_utf8($_) }; |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
has 'text' => ( is => 'ro', isa => 'UTF8FlagOnString', required => 1 ); |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
sub spellcheck { |
21
|
1
|
|
|
1
|
1
|
8
|
my ($self) = @_; |
22
|
1
|
|
|
|
|
17
|
my $ua = LWP::UserAgent->new; |
23
|
1
|
|
|
|
|
3098
|
my $text = $self->text; |
24
|
1
|
|
|
|
|
7
|
my $text1 = encode('utf8', $text); |
25
|
1
|
|
|
|
|
65
|
my $req = POST 'http://speller.cs.pusan.ac.kr/results', [ text1 => $text1 ]; |
26
|
1
|
|
|
|
|
8315
|
my $res = $ua->request($req); |
27
|
|
|
|
|
|
|
|
28
|
1
|
50
|
|
|
|
1266122
|
die unless $res->is_success; |
29
|
1
|
|
|
|
|
23
|
my $content = decode('utf8', $res->as_string); |
30
|
|
|
|
|
|
|
#print "$content"; exit; |
31
|
|
|
|
|
|
|
|
32
|
1
|
|
|
|
|
528
|
my @items; |
33
|
1
|
|
|
|
|
25
|
my ($res_json) = ( $content =~ m/\bdata = \[\{"str":.*?"errInfo":(\[.*?\])/ ); |
34
|
1
|
50
|
|
|
|
6
|
return @items unless defined $res_json; # No error |
35
|
|
|
|
|
|
|
|
36
|
1
|
|
|
|
|
12
|
my @tables = $res_json =~ m/(\{"help":.*?\})/g; |
37
|
1
|
50
|
|
|
|
5
|
return @items unless @tables; # No error |
38
|
|
|
|
|
|
|
|
39
|
1
|
|
|
|
|
5
|
foreach my $table (@tables) { |
40
|
1
|
|
|
|
|
2
|
my %item; |
41
|
|
|
|
|
|
|
@item{qw/comment position incorrect correct/} = |
42
|
|
|
|
|
|
|
( |
43
|
|
|
|
|
|
|
map { |
44
|
1
|
|
|
|
|
14
|
$_ =~s/
/\n/g; |
|
4
|
|
|
|
|
12
|
|
45
|
4
|
|
|
|
|
9
|
$_ =~s/^\s+//s; |
46
|
4
|
|
|
|
|
15
|
$_ =~s/\s+$//s; |
47
|
4
|
|
|
|
|
13
|
$_ =~s/'//gs; |
48
|
4
|
|
|
|
|
13
|
$_ |
49
|
|
|
|
|
|
|
} $table =~ m{"help":"(.*?)".*?,"start":(\d+),.*?,"orgStr":"(.*?)","candWord":"(.*?)"}sg |
50
|
|
|
|
|
|
|
); |
51
|
1
|
|
|
|
|
7
|
push @items, \%item; |
52
|
|
|
|
|
|
|
} |
53
|
|
|
|
|
|
|
|
54
|
1
|
|
|
|
|
42
|
return @items; |
55
|
|
|
|
|
|
|
} |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
1; |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
__END__ |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=pod |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=encoding utf8 |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head1 NAME |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
WebService::KoreanSpeller - Korean spellchecker |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=head1 SYNOPSIS |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
use WebService::KoreanSpeller; |
74
|
|
|
|
|
|
|
use utf8; |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
my $checker = WebService::KoreanSpeller->new( text=> 'ìë½íì¸ì? ë°©ê°ìµëë¤.' ); |
77
|
|
|
|
|
|
|
my @results = $checker->spellcheck; # returns array of hashes |
78
|
|
|
|
|
|
|
binmode STDOUT, ':encoding(UTF-8)'; |
79
|
|
|
|
|
|
|
foreach my $item (@results) { |
80
|
|
|
|
|
|
|
print $item->{position}, "\n"; # index in the original text (starting from 0) |
81
|
|
|
|
|
|
|
print $item->{incorrect}, " -> "; # incorrect spelling |
82
|
|
|
|
|
|
|
print $item->{correct}, "\n"; # correct spelling |
83
|
|
|
|
|
|
|
print $item->{comment}, "\n"; # comment about spelling |
84
|
|
|
|
|
|
|
print "------------------------------\n"; |
85
|
|
|
|
|
|
|
} |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
OUTPUT: |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
0 |
91
|
|
|
|
|
|
|
ìë½íì¸ì -> ìë
íì¸ì |
92
|
|
|
|
|
|
|
íì¤ ë°ì·íì¤ì´ ì¤ë¥ |
93
|
|
|
|
|
|
|
ì´ë¦°ì´ë¤ì ë°ìì íë´ë´ì´ 'ìë½'ì´ë¼ê³ ë§íë ì¬ëë¤ì´ ì¢
ì¢
ììµëë¤. í¹í, ê¸ì ì¸ ëìë ì´ë¬í ë¨ì´ë¥¼ ì°ì§ ìëë¡ í©ìë¤. |
94
|
|
|
|
|
|
|
------------------------------ |
95
|
|
|
|
|
|
|
7 |
96
|
|
|
|
|
|
|
ë°©ê°ìµëë¤ -> ë°ê°ìµëë¤ |
97
|
|
|
|
|
|
|
ì½ì´ ì¬ì© ì¤ë¥ |
98
|
|
|
|
|
|
|
ì¤ëë íµì ìì ì주 ì°ë ìì´ì
ëë¤. |
99
|
|
|
|
|
|
|
------------------------------ |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=head1 DESCRIPTION |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
This module provides a Perl interface to the Web-based korean speller service( ì¨ë¼ì¸ íêµì´ ë§ì¶¤ë²/ë¬¸ë² ê²ì¬ê¸° - http://speller.cs.pusan.ac.kr ). |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=head1 METHODS |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=head2 new( text => 'text for spell check' ) |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
Returns an obejct instance of this module. text should be "Unicode string"(a.k.a. perl's internal format - utf8 encoding/utf8 flag on) |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=head2 spellcheck |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
Returns results as array of hashes(if there is no error in the text, this method will return empty list), See SYNOPSIS. you can easily convert AoH to JSON or XML. |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=head1 CAUTION |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
I'm afraid we don't have a good open source korean spell checker. but there is a decent proprietary service that runs on the online website( ì¨ë¼ì¸ íêµì´ ë§ì¶¤ë²/ë¬¸ë² ê²ì¬ê¸° - http://speller.cs.pusan.ac.kr ). So I made this module with web-scrapping approach, this is easy to mess up if they change layout of the website and has same limitation(checking only 300 synatic words at once). Let me know if this does not work. *This module follows the same terms of the original service agreement.* |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
=head1 AUTHOR |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
C.H. Kang <chahkang@gmail.com> |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
This software is copyright (c) 2017 by C.H. Kang. |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
128
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=cut |