line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
=head1 NAME
|
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
Lingua::KO::Romanize::Hangul - Romanization of Korean language
|
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 SYNOPSIS
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
use Lingua::KO::Romanize::Hangul;
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
my $conv = Lingua::KO::Romanize::Hangul->new();
|
10
|
|
|
|
|
|
|
my $roman = $conv->char( $hangul );
|
11
|
|
|
|
|
|
|
printf( "%s", $hangul, $roman );
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
my @array = $conv->string( $string );
|
14
|
|
|
|
|
|
|
foreach my $pair ( @array ) {
|
15
|
|
|
|
|
|
|
my( $raw, $ruby ) = @$pair;
|
16
|
|
|
|
|
|
|
if ( defined $ruby ) {
|
17
|
|
|
|
|
|
|
printf( "%s", $raw, $ruby );
|
18
|
|
|
|
|
|
|
} else {
|
19
|
|
|
|
|
|
|
print $raw;
|
20
|
|
|
|
|
|
|
}
|
21
|
|
|
|
|
|
|
}
|
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
=head1 DESCRIPTION
|
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
Hangul is phonemic characters of the Korean language.
|
26
|
|
|
|
|
|
|
This module follows the C
|
27
|
|
|
|
|
|
|
which was released on July 7, 2000
|
28
|
|
|
|
|
|
|
as the official romanization system in South Korea.
|
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head2 $conv = Lingua::KO::Romanize::Hangul->new();
|
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
This constructer methods returns a new object.
|
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=head2 $roman = $conv->char( $hangul );
|
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
This method returns romanized letters of a Hangul character.
|
37
|
|
|
|
|
|
|
It returns undef when $hanji is not a valid Hangul character.
|
38
|
|
|
|
|
|
|
The argument's encoding must be UTF-8.
|
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head2 $roman = $conv->chars( $string );
|
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
This method returns romanized letters of Hangul characters.
|
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
=head2 @array = $conv->string( $string );
|
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
This method returns a array of referenced arrays
|
47
|
|
|
|
|
|
|
which are pairs of a Hangul chacater and its romanized letters.
|
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
$array[0] # first Korean character's pair (array)
|
50
|
|
|
|
|
|
|
$array[1][0] # secound Korean character itself
|
51
|
|
|
|
|
|
|
$array[1][1] # its romanized letters
|
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=head1 UTF-8 FLAG
|
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
This module treats utf8 flag transparently.
|
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
=head1 SEE ALSO
|
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
L for Japanese
|
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
L for Chinese
|
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
http://www.korean.go.kr/06_new/rule/rule06.jsp
|
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
http://www.kawa.net/works/perl/romanize/romanize-e.html
|
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE
|
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
Copyright (c) 1998-2008 Yusuke Kawasaki. All rights reserved.
|
70
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or
|
71
|
|
|
|
|
|
|
modify it under the same terms as Perl itself.
|
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=cut
|
74
|
|
|
|
|
|
|
# ----------------------------------------------------------------
|
75
|
|
|
|
|
|
|
package Lingua::KO::Romanize::Hangul;
|
76
|
3
|
|
|
3
|
|
3852
|
use strict;
|
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
131
|
|
77
|
3
|
|
|
3
|
|
19
|
use vars qw( $VERSION );
|
|
3
|
|
|
|
|
4
|
|
|
3
|
|
|
|
|
27727
|
|
78
|
|
|
|
|
|
|
$VERSION = "0.20";
|
79
|
|
|
|
|
|
|
my $PERL581 = 1 if ( $] >= 5.008001 );
|
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
my $INITIAL_LETTER = [map {$_ eq '-' ? '' : $_} qw(
|
82
|
|
|
|
|
|
|
g kk n d tt r m b pp s ss - j jj
|
83
|
|
|
|
|
|
|
ch k t p h
|
84
|
|
|
|
|
|
|
)];
|
85
|
|
|
|
|
|
|
my $PEAK_LETTER = [map {$_ eq '-' ? '' : $_} qw(
|
86
|
|
|
|
|
|
|
a ae ya yae eo e yeo ye o wa wae oe yo u
|
87
|
|
|
|
|
|
|
wo we wi yu eu ui i
|
88
|
|
|
|
|
|
|
)];
|
89
|
|
|
|
|
|
|
my $FINAL_LETTER = [map {$_ eq '-' ? '' : $_} qw(
|
90
|
|
|
|
|
|
|
- g kk ks n nj nh d r lg lm lb ls lt
|
91
|
|
|
|
|
|
|
lp lh m b ps s ss ng j c k t p h
|
92
|
|
|
|
|
|
|
)];
|
93
|
|
|
|
|
|
|
# my $FINAL_LETTER = [map {$_ eq '-' ? '' : $_} qw(
|
94
|
|
|
|
|
|
|
# - g kk ks n nj nh d r rg rm rb rs rt
|
95
|
|
|
|
|
|
|
# rp rh m b bs s ss ng j c k t p h
|
96
|
|
|
|
|
|
|
# )];
|
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
# ----------------------------------------------------------------
|
99
|
|
|
|
|
|
|
sub new {
|
100
|
3
|
|
|
3
|
1
|
1316
|
my $package = shift;
|
101
|
3
|
|
|
|
|
9
|
my $self = {@_};
|
102
|
3
|
|
|
|
|
11
|
bless $self, $package;
|
103
|
3
|
|
|
|
|
9
|
$self;
|
104
|
|
|
|
|
|
|
}
|
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
sub char {
|
107
|
199
|
|
|
199
|
1
|
1243
|
my $self = shift;
|
108
|
199
|
50
|
|
|
|
501
|
return $self->_char(@_) unless $PERL581;
|
109
|
199
|
|
|
|
|
387
|
my $char = shift;
|
110
|
199
|
|
|
|
|
1020
|
my $utf8 = utf8::is_utf8( $char );
|
111
|
199
|
100
|
|
|
|
1154
|
utf8::encode( $char ) if $utf8;
|
112
|
199
|
|
|
|
|
890
|
$char = $self->_char( $char );
|
113
|
199
|
100
|
|
|
|
696
|
utf8::decode( $char ) if $utf8;
|
114
|
199
|
|
|
|
|
609
|
$char;
|
115
|
|
|
|
|
|
|
}
|
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
sub _char {
|
118
|
199
|
|
|
199
|
|
764
|
my $self = shift;
|
119
|
199
|
|
|
|
|
262
|
my $char = shift;
|
120
|
199
|
|
|
|
|
728
|
my( $c1, $c2, $c3, $c4 ) = unpack("C*",$char);
|
121
|
199
|
100
|
66
|
|
|
2823
|
return if ( ! defined $c3 || defined $c4 );
|
122
|
197
|
|
|
|
|
1116
|
my $ucs2 = (($c1 & 0x0F)<<12) | (($c2 & 0x3F)<<6) | ($c3 & 0x3F);
|
123
|
197
|
50
|
|
|
|
662
|
return if ( $ucs2 < 0xAC00 );
|
124
|
197
|
50
|
|
|
|
696
|
return if ( $ucs2 > 0xD7A3 );
|
125
|
197
|
|
|
|
|
342
|
my $han = $ucs2 - 0xAC00;
|
126
|
197
|
|
|
|
|
923
|
my $init = int( $han / 21 / 28 );
|
127
|
197
|
|
|
|
|
283
|
my $peak = int( $han / 28 ) % 21;
|
128
|
197
|
|
|
|
|
816
|
my $fin = $han % 28;
|
129
|
197
|
|
|
|
|
878
|
join( "", $INITIAL_LETTER->[$init], $PEAK_LETTER->[$peak], $FINAL_LETTER->[$fin] );
|
130
|
|
|
|
|
|
|
}
|
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
sub chars {
|
133
|
73
|
|
|
73
|
1
|
61973
|
my $self = shift;
|
134
|
73
|
|
|
|
|
173
|
my @array = $self->string( shift );
|
135
|
73
|
50
|
|
|
|
130
|
join( " ", map {$#$_>0 ? $_->[1] : $_->[0]} @array );
|
|
187
|
|
|
|
|
1524
|
|
136
|
|
|
|
|
|
|
}
|
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
sub string {
|
139
|
76
|
|
|
76
|
1
|
2978
|
my $self = shift;
|
140
|
76
|
50
|
|
|
|
190
|
return $self->_string(@_) unless $PERL581;
|
141
|
76
|
|
|
|
|
410
|
my $char = shift;
|
142
|
76
|
|
|
|
|
158
|
my $flag = utf8::is_utf8( $char );
|
143
|
76
|
100
|
|
|
|
442
|
utf8::encode( $char ) if $flag;
|
144
|
76
|
|
|
|
|
160
|
my @array = $self->_string( $char );
|
145
|
76
|
100
|
|
|
|
168
|
if ( $flag ) {
|
146
|
2
|
|
|
|
|
3
|
foreach my $pair ( @array ) {
|
147
|
6
|
50
|
|
|
|
19
|
utf8::decode( $pair->[0] ) if defined $pair->[0];
|
148
|
6
|
50
|
|
|
|
23
|
utf8::decode( $pair->[1] ) if defined $pair->[1];
|
149
|
|
|
|
|
|
|
}
|
150
|
|
|
|
|
|
|
}
|
151
|
76
|
|
|
|
|
453
|
@array;
|
152
|
|
|
|
|
|
|
}
|
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
# [UCS-2] AC00-D7A3
|
155
|
|
|
|
|
|
|
# [UTF-8] EAB080-ED9EA3
|
156
|
|
|
|
|
|
|
# EA-ED are appeared only as Hangul's first character.
|
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
sub _string {
|
159
|
76
|
|
|
76
|
|
94
|
my $self = shift;
|
160
|
76
|
|
|
|
|
225
|
my $src = shift;
|
161
|
76
|
|
|
|
|
202
|
my $array = [];
|
162
|
76
|
|
|
|
|
587
|
while ( $src =~ /([\xEA-\xED][\x80-\xBF]{2})|([^\xEA-\xED]+)/sg ) {
|
163
|
193
|
50
|
|
|
|
662
|
if ( defined $1 ) {
|
164
|
193
|
|
|
|
|
598
|
my $pair = [ $1 ];
|
165
|
193
|
|
|
|
|
1823
|
my $roman = $self->char( $1 );
|
166
|
193
|
50
|
|
|
|
2001
|
$pair->[1] = $roman if defined $roman;
|
167
|
193
|
|
|
|
|
1540
|
push( @$array, $pair );
|
168
|
|
|
|
|
|
|
} else {
|
169
|
0
|
|
|
|
|
0
|
push( @$array, [ $2 ] );
|
170
|
|
|
|
|
|
|
}
|
171
|
|
|
|
|
|
|
}
|
172
|
|
|
|
|
|
|
|
173
|
76
|
|
|
|
|
373
|
for ( my $i = 0 ; $i < $#$array ; $i++ ) {
|
174
|
117
|
50
|
|
|
|
244
|
next if ( scalar @{ $array->[$i] } < 2 );
|
|
117
|
|
|
|
|
331
|
|
175
|
117
|
50
|
|
|
|
281
|
next if ( scalar @{ $array->[ $i + 1 ] } < 2 );
|
|
117
|
|
|
|
|
361
|
|
176
|
117
|
|
|
|
|
515
|
my $this = $array->[$i]->[1];
|
177
|
117
|
|
|
|
|
490
|
my $next = $array->[ $i + 1 ]->[1];
|
178
|
117
|
100
|
|
|
|
687
|
my $novowel = 1 unless ( $next =~ /^[aeouiwy]/ );
|
179
|
|
|
|
|
|
|
|
180
|
117
|
50
|
33
|
|
|
9112
|
if ( $this =~ /(tt|pp|jj)$/ && $novowel ) {
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
181
|
0
|
|
|
|
|
0
|
$array->[$i]->[1] =~ s/(tt|pp|jj)$//;
|
182
|
|
|
|
|
|
|
}
|
183
|
|
|
|
|
|
|
elsif ( $this =~ /([^n]g|kk)$/ && $novowel ) {
|
184
|
11
|
|
|
|
|
96
|
$array->[$i]->[1] =~ s/(g|kk)$/k/;
|
185
|
|
|
|
|
|
|
}
|
186
|
|
|
|
|
|
|
elsif ( $this =~ /(d|j|ch|s?s)$/ && $novowel ) {
|
187
|
2
|
|
|
|
|
18
|
$array->[$i]->[1] =~ s/(d|j|ch|s?s)$/t/;
|
188
|
|
|
|
|
|
|
}
|
189
|
|
|
|
|
|
|
elsif ( $this =~ /(b)$/ && $novowel ) {
|
190
|
4
|
|
|
|
|
28
|
$array->[$i]->[1] =~ s/(b)$/p/;
|
191
|
|
|
|
|
|
|
}
|
192
|
|
|
|
|
|
|
elsif ( $this =~ /(r)$/ && $novowel ) {
|
193
|
10
|
|
|
|
|
46
|
$array->[$i]->[1] =~ s/(r)$/l/;
|
194
|
10
|
|
|
|
|
51
|
$array->[$i+1]->[1] =~ s/^r/l/;
|
195
|
|
|
|
|
|
|
}
|
196
|
|
|
|
|
|
|
}
|
197
|
|
|
|
|
|
|
|
198
|
76
|
50
|
|
|
|
189
|
if ( scalar @$array ) {
|
199
|
76
|
|
|
|
|
207
|
my $last = $array->[$#$array];
|
200
|
76
|
|
|
|
|
125
|
my $this = $last->[1];
|
201
|
76
|
50
|
|
|
|
1692
|
if ( $this =~ /(tt|pp|jj)$/ ) {
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
202
|
0
|
|
|
|
|
0
|
$last->[1] =~ s/(tt|pp|jj)$//;
|
203
|
|
|
|
|
|
|
}
|
204
|
|
|
|
|
|
|
elsif ( $this =~ /([^n]g|kk)$/ ) {
|
205
|
4
|
|
|
|
|
22
|
$last->[1] =~ s/(g|kk)$/k/;
|
206
|
|
|
|
|
|
|
}
|
207
|
|
|
|
|
|
|
elsif ( $this =~ /(d|j|ch|s?s)$/ ) {
|
208
|
2
|
|
|
|
|
10
|
$last->[1] =~ s/(d|j|ch|s?s)$/t/;
|
209
|
|
|
|
|
|
|
}
|
210
|
|
|
|
|
|
|
elsif ( $this =~ /(b)$/ ) {
|
211
|
3
|
|
|
|
|
15
|
$last->[1] =~ s/(b)$/p/;
|
212
|
|
|
|
|
|
|
}
|
213
|
|
|
|
|
|
|
elsif ( $this =~ /(r)$/ ) {
|
214
|
7
|
|
|
|
|
29
|
$last->[1] =~ s/(r)$/l/;
|
215
|
|
|
|
|
|
|
}
|
216
|
|
|
|
|
|
|
}
|
217
|
|
|
|
|
|
|
|
218
|
76
|
|
|
|
|
384
|
@$array;
|
219
|
|
|
|
|
|
|
}
|
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
# ----------------------------------------------------------------
|
222
|
|
|
|
|
|
|
;1;
|