line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Lingua::SA; |
2
|
|
|
|
|
|
|
|
3
|
10
|
|
|
10
|
|
258620
|
use 5.008; |
|
10
|
|
|
|
|
42
|
|
|
10
|
|
|
|
|
445
|
|
4
|
10
|
|
|
10
|
|
64
|
use strict; |
|
10
|
|
|
|
|
27
|
|
|
10
|
|
|
|
|
374
|
|
5
|
10
|
|
|
10
|
|
70
|
use warnings; |
|
10
|
|
|
|
|
22
|
|
|
10
|
|
|
|
|
391
|
|
6
|
10
|
|
|
10
|
|
9396
|
use English qw{-no_match_vars}; |
|
10
|
|
|
|
|
45913
|
|
|
10
|
|
|
|
|
66
|
|
7
|
10
|
|
|
10
|
|
6147
|
use Carp; |
|
10
|
|
|
|
|
25
|
|
|
10
|
|
|
|
|
23965
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
require Exporter; |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# Items to export into callers namespace by default. Note: do not export |
14
|
|
|
|
|
|
|
# names by default without a very good reason. Use EXPORT_OK instead. |
15
|
|
|
|
|
|
|
# Do not simply export all your public functions/methods/constants. |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
# This allows declaration use Lingua::SA ':all'; |
18
|
|
|
|
|
|
|
# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK |
19
|
|
|
|
|
|
|
# will save memory. |
20
|
|
|
|
|
|
|
our %EXPORT_TAGS = ( 'all' => [ qw( |
21
|
|
|
|
|
|
|
transliterate |
22
|
|
|
|
|
|
|
vibhakti |
23
|
|
|
|
|
|
|
sandhi |
24
|
|
|
|
|
|
|
) ] ); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
our @EXPORT = qw( |
29
|
|
|
|
|
|
|
); |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
our $VERSION = '0.08'; |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# Preloaded methods go here. |
34
|
|
|
|
|
|
|
########################################################### |
35
|
|
|
|
|
|
|
sub sandhi{ |
36
|
2303
|
|
|
2303
|
0
|
314455
|
my ($in)=@_; |
37
|
2303
|
|
|
|
|
5710
|
$in=~s/ \+ ([^\[])/$1/g; # replace + and surround spaces with nothing |
38
|
2303
|
|
|
|
|
4738
|
$in=~s/aa/A/g; # replace aa with A |
39
|
2303
|
|
|
|
|
3762
|
$in=~s/ii/I/g; # replace ii with I |
40
|
2303
|
|
|
|
|
3933
|
$in=~s/uu/U/g; # replace uu with U |
41
|
2303
|
|
|
|
|
3559
|
$in=~s/Ru/R/g; # replace Ru with R |
42
|
2303
|
|
|
|
|
6764
|
return $in; |
43
|
|
|
|
|
|
|
} |
44
|
|
|
|
|
|
|
########################################################### |
45
|
|
|
|
|
|
|
sub vibhakti { |
46
|
|
|
|
|
|
|
####### This is currently only for svaraanta (halant will be 8000+) |
47
|
|
|
|
|
|
|
# USAGE: my $response=vibhakti({naam=>$noun, vibhakti=>$vibhakti, |
48
|
|
|
|
|
|
|
# linga=>$linga, vachana=>$vachana}); |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
# 2008-06-05 Fixed 3351 and 3361 from inaaH to inaH (v 0.06) |
51
|
|
|
|
|
|
|
|
52
|
467
|
|
|
467
|
0
|
9891
|
my ($arg_ref) = @_; |
53
|
|
|
|
|
|
|
|
54
|
467
|
100
|
|
|
|
2506
|
confess "Argument naam not passed to vibhakti()" if !defined $arg_ref->{naam}; |
55
|
466
|
100
|
|
|
|
2033
|
confess "Argument linga not passed to vibhakti()" if !defined $arg_ref->{linga}; |
56
|
465
|
100
|
|
|
|
1385
|
confess "Argument vibhakti not passed to vibhakti()" if !defined $arg_ref->{vibhakti}; |
57
|
464
|
100
|
|
|
|
1130
|
confess "Argument vachana not passed to vibhakti()" if !defined $arg_ref->{vachana}; |
58
|
|
|
|
|
|
|
|
59
|
463
|
|
|
|
|
1358
|
my ( $noun, $vibhakti, $linga, $vachana ) = |
60
|
|
|
|
|
|
|
( $arg_ref -> {naam}, $arg_ref -> {vibhakti}, $arg_ref -> {linga}, |
61
|
|
|
|
|
|
|
$arg_ref -> {vachana}); |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# The last character of noun is chopped to be aakaar |
64
|
|
|
|
|
|
|
# (what happens when halant nouns are included?) |
65
|
463
|
|
|
|
|
876
|
$noun = sandhi($noun); |
66
|
463
|
|
|
|
|
1062
|
my $aakaar = chop($noun); |
67
|
|
|
|
|
|
|
|
68
|
463
|
|
|
|
|
15328
|
$vibhakti = sandhi($vibhakti); |
69
|
463
|
|
|
|
|
854
|
$linga = sandhi($linga); |
70
|
|
|
|
|
|
|
|
71
|
463
|
|
|
|
|
3298
|
my %aakaar = qw(0 0 a 1 A 2 i 3 I 4 u 5 U 6 R 7); |
72
|
463
|
|
|
|
|
2402
|
my %linga = qw(puM 1 strI 2 napuMsaka 3 1 1 2 2 3 3); |
73
|
463
|
|
|
|
|
2094
|
my %vachana = qw(ekavachana 1 dvivachana 2 bahuvachana 3 1 1 2 2 3 3); |
74
|
463
|
|
|
|
|
5508
|
my %vibhakti = qw#prathamA 1 dvitIyA 2 tRtIyA 3 chaturthI 4 paJchamI 5 |
75
|
|
|
|
|
|
|
ShaShThI 6 saptamI 7 sambodhana 8 |
76
|
|
|
|
|
|
|
1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8#; |
77
|
|
|
|
|
|
|
|
78
|
463
|
100
|
|
|
|
1339
|
confess "Unsupported noun supplied to vibhakti(): $noun$aakaar ending in $aakaar" |
79
|
|
|
|
|
|
|
if !defined $aakaar{$aakaar}; |
80
|
462
|
100
|
|
|
|
1225
|
confess "Invalid linga $linga supplied to vibhakti()" |
81
|
|
|
|
|
|
|
if !defined $linga{$linga}; |
82
|
461
|
100
|
|
|
|
1255
|
confess "Invalid vibhakti $vibhakti supplied to vibhakti()" |
83
|
|
|
|
|
|
|
if !defined $vibhakti{$vibhakti}; |
84
|
460
|
100
|
|
|
|
1200
|
confess "Invalid vachana $vachana supplied to vibhakti()" |
85
|
|
|
|
|
|
|
if !defined $vachana{$vachana}; |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
# coef for swarAnt nouns range from 1111 to 7373 |
88
|
|
|
|
|
|
|
# with 7 sets of 72 coefs posible (not all taken) |
89
|
459
|
|
|
|
|
2130
|
my $coef = |
90
|
|
|
|
|
|
|
$aakaar{$aakaar} * 1000 + |
91
|
|
|
|
|
|
|
$linga{$linga} * 100 + |
92
|
|
|
|
|
|
|
$vibhakti{$vibhakti} * 10 + |
93
|
|
|
|
|
|
|
$vachana{$vachana}; |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
#### 1000 through 7000 aakaar. 1=a 2=A 3=i 4=I 5=u 6=U 7=Ru |
96
|
|
|
|
|
|
|
##### 100 puM 200 strI 300 na |
97
|
|
|
|
|
|
|
###### 10 through 80 8 vibhakti (8th being sambodhan) |
98
|
|
|
|
|
|
|
####### 1 through 3 eka, dwi, bahuvachan |
99
|
|
|
|
|
|
|
## possibilities for nouns are in the following series: |
100
|
|
|
|
|
|
|
# Masculine:1100, 3100, 5100, 7100 # examples of 2100? |
101
|
|
|
|
|
|
|
# Feminine: 2200, 3200, 4200, 5200, 6200, 7200 |
102
|
|
|
|
|
|
|
# Neutar: 1300, 3300, 5300, 7300 |
103
|
|
|
|
|
|
|
# 2100, 4100 exist, but I do not know the examples. |
104
|
|
|
|
|
|
|
# not sure of 6100, 6300 |
105
|
|
|
|
|
|
|
# 1200, 2300, 4300 do not exist |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
# These are the noun suffixes |
108
|
459
|
|
|
|
|
71861
|
my %ending = qw( |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
1111 aH 1112 au 1113 AH |
111
|
|
|
|
|
|
|
1121 am 1122 au 1123 An |
112
|
|
|
|
|
|
|
1131 ena 1132 Abhyaam 1133 aiH |
113
|
|
|
|
|
|
|
1141 Aya 1142 Abhyaam 1143 ebhyaH |
114
|
|
|
|
|
|
|
1151 At 1152 Abhyaam 1153 ebhyaH |
115
|
|
|
|
|
|
|
1161 asya 1162 ayoH 1163 Anaam |
116
|
|
|
|
|
|
|
1171 e 1172 ayoH 1173 eSu |
117
|
|
|
|
|
|
|
1181 a 1182 au 1183 AH |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
2111 AH 2112 au 2113 AH |
120
|
|
|
|
|
|
|
2121 Am 2122 au 2123 aH |
121
|
|
|
|
|
|
|
2131 A 2132 Abhyaam 2133 AbhiH |
122
|
|
|
|
|
|
|
2141 e 2142 Abhyaam 2143 AbhyaH |
123
|
|
|
|
|
|
|
2151 aH 2152 Abhyaam 2153 AbhyaH |
124
|
|
|
|
|
|
|
2161 aH 2162 oH 2163 Am |
125
|
|
|
|
|
|
|
2171 i 2172 oH 2173 Asu |
126
|
|
|
|
|
|
|
2181 Am 2182 au 2183 AH |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
3111 iH 3112 I 3113 ayaH |
129
|
|
|
|
|
|
|
3121 im 3122 I 3123 In |
130
|
|
|
|
|
|
|
3131 inaa 3132 ibhyaam 3133 ibhiH |
131
|
|
|
|
|
|
|
3141 aye 3142 ibhyaam 3143 ibhyaH |
132
|
|
|
|
|
|
|
3151 eH 3152 ibhyaam 3153 ibhyaH |
133
|
|
|
|
|
|
|
3161 eH 3162 yoH 3163 Inaam |
134
|
|
|
|
|
|
|
3171 au 3172 yoH 3173 iSu |
135
|
|
|
|
|
|
|
3181 e 3182 I 3183 ayaH |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
5111 uH 5112 U 5113 avaH |
138
|
|
|
|
|
|
|
5121 um 5122 U 5123 Un |
139
|
|
|
|
|
|
|
5131 unaa 5132 ubhyaam 5133 ubhiH |
140
|
|
|
|
|
|
|
5141 ave 5142 ubhyaam 5143 ubhyaH |
141
|
|
|
|
|
|
|
5151 oH 5152 ubhyaam 5153 ubhyaH |
142
|
|
|
|
|
|
|
5161 oH 5162 voH 5163 Unaam |
143
|
|
|
|
|
|
|
5171 au 5172 voH 5173 uSu |
144
|
|
|
|
|
|
|
5181 o 5182 U 5183 avaH |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
7111 A 7112 Arau 7113 AraH |
147
|
|
|
|
|
|
|
7121 Aram 7122 Arau 7123 RRn |
148
|
|
|
|
|
|
|
7131 raa 7132 Rbhyaam 7133 RbhiH |
149
|
|
|
|
|
|
|
7141 re 7142 Rbhyaam 7143 RbhyaH |
150
|
|
|
|
|
|
|
7151 uH 7152 Rbhyaam 7153 RbhyaH |
151
|
|
|
|
|
|
|
7161 uH 7162 roH 7163 RRNaam |
152
|
|
|
|
|
|
|
7171 ari 7172 roH 7173 RSu |
153
|
|
|
|
|
|
|
7181 aH|ar 7182 Arau 7183 AraH |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
2211 A 2212 e 2213 AH |
156
|
|
|
|
|
|
|
2221 Am 2222 e 2223 AH |
157
|
|
|
|
|
|
|
2231 ayaa 2232 Abhyaam 2233 AbhiH |
158
|
|
|
|
|
|
|
2241 Ayai 2242 Abhyaam 2243 AbhyaaH |
159
|
|
|
|
|
|
|
2251 AyaaH 2252 Abhyaam 2253 AbhyaaH |
160
|
|
|
|
|
|
|
2261 AyaaH 2262 ayoH 2263 Anaam |
161
|
|
|
|
|
|
|
2271 Ayaam 2272 ayoH 2273 Asu |
162
|
|
|
|
|
|
|
2281 e 2282 e 2283 AH |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
3211 iH 3212 I 3213 ayaH |
165
|
|
|
|
|
|
|
3221 im 3222 I 3223 IH |
166
|
|
|
|
|
|
|
3231 yaa 3232 ibhyaam 3233 ibhiH |
167
|
|
|
|
|
|
|
3241 yai|aye 3242 ibhyaam 3243 ibhyaH |
168
|
|
|
|
|
|
|
3251 yaaH|eH 3252 ibhyaam 3253 ibhyaH |
169
|
|
|
|
|
|
|
3261 yaaH|eH 3262 yoH 3263 Inaam |
170
|
|
|
|
|
|
|
3271 yaam|au 3272 yoH 3273 iSu |
171
|
|
|
|
|
|
|
3281 e 3282 I 3283 ayaH |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
4211 I 4212 yau 4213 yaH |
174
|
|
|
|
|
|
|
4221 Im 4222 yau 4223 IH |
175
|
|
|
|
|
|
|
4231 yaa 4232 Ibhyaam 4233 IbhiH |
176
|
|
|
|
|
|
|
4241 yai 4242 Ibhyaam 4243 IbhyaH |
177
|
|
|
|
|
|
|
4251 yaaH 4252 Ibhyaam 4253 IbhyaH |
178
|
|
|
|
|
|
|
4261 yaaH 4262 yoH 4263 Inaam |
179
|
|
|
|
|
|
|
4271 yaam 4272 yoH 4273 ISu |
180
|
|
|
|
|
|
|
4281 i 4282 yau 4283 yaH |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
5211 uH 5212 U 5213 avaH |
183
|
|
|
|
|
|
|
5221 um 5222 U 5223 UH |
184
|
|
|
|
|
|
|
5231 vaa 5232 ubhyaam 5233 ubhiH |
185
|
|
|
|
|
|
|
5241 ave|vai 5242 ubhyaam 5243 ubhyaH |
186
|
|
|
|
|
|
|
5251 oH|vaaH 5252 ubhyaam 5253 ubhyaH |
187
|
|
|
|
|
|
|
5261 oH|vaaH 5262 voH 5263 Unaam |
188
|
|
|
|
|
|
|
5271 au|vaam 5272 voH 5273 uSu |
189
|
|
|
|
|
|
|
5281 o 5282 U 5283 avaH |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
6211 UH 6212 vau 6213 vaH |
192
|
|
|
|
|
|
|
6221 Um 6222 vau 6223 UH |
193
|
|
|
|
|
|
|
6231 vaa 6232 Ubhyaam 6233 UbhiH |
194
|
|
|
|
|
|
|
6241 vai 6242 Ubhyaam 6243 UbhyaH |
195
|
|
|
|
|
|
|
6251 vaaH 6252 Ubhyaam 6253 UbhyaH |
196
|
|
|
|
|
|
|
6261 vaaH 6262 voH 6263 Unaam |
197
|
|
|
|
|
|
|
6271 vaam 6272 voH 6273 USu |
198
|
|
|
|
|
|
|
6281 u 6282 vau 6283 vaH |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
7211 A 7212 arau 7213 araH |
201
|
|
|
|
|
|
|
7221 aram 7222 arau 7223 RRH |
202
|
|
|
|
|
|
|
7231 raa 7232 Rbhyaam 7233 RbhiH |
203
|
|
|
|
|
|
|
7241 re 7242 Rbhyaam 7243 RbhyaH |
204
|
|
|
|
|
|
|
7251 uH 7252 Rbhyaam 7253 RbhyaH |
205
|
|
|
|
|
|
|
7261 uH 7262 roH 7263 RRNaam |
206
|
|
|
|
|
|
|
7271 ari 7272 roH 7273 RSu |
207
|
|
|
|
|
|
|
7281 aH|ar 7282 arau 7283 araH |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
1311 am 1312 e 1313 Ani |
210
|
|
|
|
|
|
|
1321 am 1322 e 1323 Ani |
211
|
|
|
|
|
|
|
1331 ena 1332 Abhyaam 1333 aiH |
212
|
|
|
|
|
|
|
1341 Aya 1342 Abhyaam 1343 ebhyaH |
213
|
|
|
|
|
|
|
1351 At 1352 Abhyaam 1353 ebhyaH |
214
|
|
|
|
|
|
|
1361 asya 1362 ayoH 1363 Anaam |
215
|
|
|
|
|
|
|
1371 e 1372 ayoH 1373 eSu |
216
|
|
|
|
|
|
|
1381 a 1382 e 1383 Ani |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
3311 i 3312 inI 3313 Ini |
219
|
|
|
|
|
|
|
3321 i 3322 inI 3323 Ini |
220
|
|
|
|
|
|
|
3331 inaa 3332 ibhyaam 3333 ibhiH |
221
|
|
|
|
|
|
|
3341 ine 3342 ibhyaam 3343 ibhyaH |
222
|
|
|
|
|
|
|
3351 inaH 3352 ibhyaam 3353 ibhyaH |
223
|
|
|
|
|
|
|
3361 inaH 3362 inoH 3363 Inaam |
224
|
|
|
|
|
|
|
3371 ini 3372 inoH 3373 iSu |
225
|
|
|
|
|
|
|
3381 i|e 3382 inI 3383 Ini |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
5311 u 5312 unI 5313 Uni |
228
|
|
|
|
|
|
|
5321 u 5322 unI 5323 Uni |
229
|
|
|
|
|
|
|
5331 unaa 5332 ubhyaam 5333 ubhiH |
230
|
|
|
|
|
|
|
5341 une 5342 ubhyaam 5343 ubhyaH |
231
|
|
|
|
|
|
|
5351 unaH 5352 ubhyaam 5353 ubhyaH |
232
|
|
|
|
|
|
|
5361 unaH 5362 unoH 5363 Unaam |
233
|
|
|
|
|
|
|
5371 uni 5372 unoH 5373 uSu |
234
|
|
|
|
|
|
|
5381 o|u 5382 unI 5383 Uni |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
7311 R 7312 RNI 7313 RRNi |
237
|
|
|
|
|
|
|
7321 R 7322 RNI 7323 RRNi |
238
|
|
|
|
|
|
|
7331 raa|RNA 7332 Rbhyaam 7333 RbhiH |
239
|
|
|
|
|
|
|
7341 re|RNe 7342 Rbhyaam 7343 RbhyaH |
240
|
|
|
|
|
|
|
7351 uH|RNaH 7352 Rbhyaam 7353 RbhyaH |
241
|
|
|
|
|
|
|
7361 uH|RNaH 7362 roH|RNoH 7363 RRNaam |
242
|
|
|
|
|
|
|
7371 ari|RNi 7372 roH|RNoH 7373 RSu |
243
|
|
|
|
|
|
|
7381 aH|R 7382 RNI 7383 RRNi |
244
|
|
|
|
|
|
|
); |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
# Is 3263 above dirgha as stated? ## Yes, it is |
247
|
|
|
|
|
|
|
|
248
|
459
|
100
|
|
|
|
1681
|
confess "$linga nouns ending in $aakaar not supported" |
249
|
|
|
|
|
|
|
if !defined $ending{$coef}; |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
### This part can cater to irregular nouns |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
# ambA, akkA, allA have a-kaaraant sambodhana |
254
|
458
|
100
|
100
|
|
|
3808
|
if($noun eq "amb" or $noun eq "akk" or $noun eq "all"){ |
|
|
|
100
|
|
|
|
|
255
|
3
|
|
|
|
|
6
|
$ending{2281} = 'a'; |
256
|
|
|
|
|
|
|
} |
257
|
|
|
|
|
|
|
|
258
|
458
|
|
|
|
|
1128
|
my $endcoef = $ending{$coef}; |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
# Natva results in converting n to N when an r, R, RR, or S are encountered in |
261
|
|
|
|
|
|
|
# the noun, and the only letters between there and end are what are in Natva |
262
|
|
|
|
|
|
|
# here (h y v k kh g gh ~N p ph b bh m and a pratyay (aa~N - not implemented) |
263
|
|
|
|
|
|
|
# Additionally, n can not be halant |
264
|
|
|
|
|
|
|
|
265
|
458
|
|
|
|
|
767
|
my $Natva = "h|y|v|k(h)?|g(h)?|G|p(h)?|b(h)?|m"; |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
# vowel is as defined in split_word |
268
|
458
|
|
|
|
|
4901
|
my $vowel = "(A|H|I|M|R(R|u)?|U|a(a|i|u)?|i(i)?|e|lR|o(M)?|u(u)?|\\:|\\|(\\|)?)"; |
269
|
|
|
|
|
|
|
|
270
|
458
|
|
|
|
|
516
|
my $inflected; |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
### This part can be expanded to include exceptions/options |
273
|
|
|
|
|
|
|
|
274
|
458
|
100
|
|
|
|
4144
|
if ($noun =~ m/[rRS][$Natva|$vowel]*$/ ) { |
275
|
120
|
|
|
|
|
392
|
$endcoef =~ s/n([a-zA-Z])/N$1/; |
276
|
|
|
|
|
|
|
} |
277
|
458
|
100
|
|
|
|
1938
|
if ( $endcoef =~ m/\|/ ) { |
278
|
36
|
|
|
|
|
334
|
my @foo = split( /\|/, $endcoef ); |
279
|
36
|
|
|
|
|
88
|
$inflected = "$noun + $foo[0]"; |
280
|
36
|
|
|
|
|
525
|
for my $counter ( 1 .. $#foo ) { |
281
|
36
|
|
|
|
|
420
|
$inflected.= " | $noun + $foo[$counter]"; |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
} |
284
|
|
|
|
|
|
|
else { |
285
|
422
|
|
|
|
|
1149
|
$inflected = "$noun + $endcoef"; |
286
|
|
|
|
|
|
|
} |
287
|
|
|
|
|
|
|
## if sambodhan, prepend he |
288
|
458
|
100
|
|
|
|
1312
|
if ( $coef % 100 > 80 ) { |
289
|
47
|
100
|
|
|
|
185
|
if ( $endcoef =~ m/\|/ ) { |
290
|
5
|
|
|
|
|
11
|
$inflected = "he \[ $inflected ]"; |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
else { |
293
|
42
|
|
|
|
|
79
|
$inflected = "he $inflected"; |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
} |
296
|
458
|
|
|
|
|
34780
|
return $inflected; |
297
|
|
|
|
|
|
|
} ## end sub vibhakti |
298
|
|
|
|
|
|
|
############################### |
299
|
|
|
|
|
|
|
sub transliterate { |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
# Takes a string as input. Separate it into words. |
302
|
|
|
|
|
|
|
# Splits each word into syllables, and for each syllable appends its |
303
|
|
|
|
|
|
|
# unicode to an array that is finally flattened and returned |
304
|
|
|
|
|
|
|
|
305
|
23
|
|
|
23
|
0
|
15347
|
my ($english) = @_; |
306
|
23
|
|
|
|
|
40
|
my @transliterated; |
307
|
23
|
|
|
|
|
75
|
my @x = split( /\s+/, $english ); # splt input string in to words |
308
|
23
|
|
|
|
|
47
|
for my $x (@x) { # get unicoded syllables for each word |
309
|
23
|
|
|
|
|
56
|
push( @transliterated, map( match_code($_), split_word($x) ), " " ); |
310
|
|
|
|
|
|
|
} |
311
|
23
|
|
|
|
|
112
|
return join( "", @transliterated ); # flatten the array before returning |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
############################### |
314
|
|
|
|
|
|
|
sub match_code { |
315
|
36
|
|
|
36
|
0
|
57
|
my ($syllable_mcc) = @_; |
316
|
36
|
|
|
|
|
1229
|
my %letter_codes = ( |
317
|
|
|
|
|
|
|
"~a", "अ", "~aa", "आ", "~A", "आ", |
318
|
|
|
|
|
|
|
"~i", "इ", "~ii", "ई", "~uu", "ऊ", |
319
|
|
|
|
|
|
|
"ii", "ी", "~I", "ई", "~u", "उ", |
320
|
|
|
|
|
|
|
"~U", "ऊ", "~R", "ऋ", "~Ru", "ऋ", |
321
|
|
|
|
|
|
|
"~lR", "ऌ", "~RR", "ॠ", "~e", "ए", |
322
|
|
|
|
|
|
|
"~ai", "ऐ", "~o", "ओ", "~au", "औ", |
323
|
|
|
|
|
|
|
"a", "", "aa", "ा", "A", "ा", |
324
|
|
|
|
|
|
|
"i", "ि", "I", "ी", "u", "ु", |
325
|
|
|
|
|
|
|
"uu", "ू", "R", "ृ", "lR", "ॢ", |
326
|
|
|
|
|
|
|
"e", "े", "ai", "ै", |
327
|
|
|
|
|
|
|
"U", "ू", "R", "ृ", "Ru", "ृ", |
328
|
|
|
|
|
|
|
"RR", "ॄ", "o", "ो", "au", "ौ", |
329
|
|
|
|
|
|
|
"k", "क", "kh", "ख", "g", "ग", |
330
|
|
|
|
|
|
|
"gh", "घ", "G", "ङ", "c", "च", |
331
|
|
|
|
|
|
|
"ch", "च", "C", "छ", "Ch", "छ", |
332
|
|
|
|
|
|
|
"j", "ज", "jh", "झ", "J", "ञ", |
333
|
|
|
|
|
|
|
"T", "ट", "Th", "ठ", "D", "ड", |
334
|
|
|
|
|
|
|
"Dh", "ढ", "N", "ण", "t", "त", |
335
|
|
|
|
|
|
|
"th", "थ", "d", "द", "dh", "ध", |
336
|
|
|
|
|
|
|
"n", "न", "p", "प", "ph", "फ", |
337
|
|
|
|
|
|
|
"b", "ब", "bh", "भ", "m", "म", |
338
|
|
|
|
|
|
|
"y", "य", "r", "र", "l", "ल", |
339
|
|
|
|
|
|
|
"L", "ळ", |
340
|
|
|
|
|
|
|
"v", "व", "z", "श", "sh", "श", |
341
|
|
|
|
|
|
|
"S", "ष", "Sh", "ष", "s", "स", |
342
|
|
|
|
|
|
|
"h", "ह", "H", "ः", ":", "ः", |
343
|
|
|
|
|
|
|
"M", "ं", "|", "।", "||", "॥", |
344
|
|
|
|
|
|
|
"oM", "ॐ", "~H", "ः", "~:", "ः", |
345
|
|
|
|
|
|
|
"~M", "ं", "~|", "।", "~||", "॥", |
346
|
|
|
|
|
|
|
"\$", "ऽ", "^", "॑", "_", "॒", |
347
|
|
|
|
|
|
|
"`", "॓", "'", "॔", "\@", "॰", |
348
|
|
|
|
|
|
|
"~oM", "ॐ", "*", "्", "CB", "ँ", |
349
|
|
|
|
|
|
|
); |
350
|
|
|
|
|
|
|
# RR 2400 lRR 2401 _lR 2402 _lRR 2403 chandra-bindu 2305 |
351
|
36
|
50
|
|
|
|
96
|
if ( defined $letter_codes{$syllable_mcc} ) { |
352
|
36
|
|
|
|
|
585
|
return $letter_codes{$syllable_mcc}; |
353
|
|
|
|
|
|
|
} |
354
|
|
|
|
|
|
|
else { |
355
|
0
|
|
|
|
|
0
|
return $syllable_mcc; |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
} ## end sub match_code |
358
|
|
|
|
|
|
|
######################################## |
359
|
|
|
|
|
|
|
sub split_word { |
360
|
23
|
|
|
23
|
0
|
34
|
my ($word) = @_; |
361
|
|
|
|
|
|
|
# vowels is copied as is in vibhakti |
362
|
23
|
|
|
|
|
37
|
my $vowels = "(A|H|I|M|R(R|u)?|U|a(a|i|u)?|i(i)?|e|lR|o(M)?|u(u)?|\\:|\\|(\\|)?)"; |
363
|
23
|
|
|
|
|
32
|
my $consonants = |
364
|
|
|
|
|
|
|
"(C(h|B)?|D(h)?|G|J|N|S(h)?|T(h)?|b(h)?|c(h)?|d(h)?|g(h)?|h|j(h)?|k(h)?|l|m|n|p(h)?|r|s(h)?|t(h)?|v|y|z|L)"; |
365
|
23
|
|
|
|
|
27
|
my @syllables; |
366
|
23
|
|
|
|
|
32
|
my $vowel_start_p = 1; |
367
|
23
|
|
|
|
|
28
|
my $matched; |
368
|
|
|
|
|
|
|
my $index; |
369
|
23
|
|
|
|
|
56
|
while ($word) { # begin out |
370
|
34
|
100
|
|
|
|
343
|
unless ( $word =~ m/$vowels/ ) { $index = length($word); } |
|
2
|
|
|
|
|
4
|
|
371
|
32
|
|
|
|
|
138
|
else { $index = length($`); } |
372
|
34
|
100
|
|
|
|
73
|
if ( $index == 0 ) { # begin 3A |
373
|
21
|
|
|
|
|
41
|
$matched = $1; |
374
|
21
|
100
|
|
|
|
37
|
if ($vowel_start_p) { # begin 0A |
375
|
10
|
|
|
|
|
26
|
push( @syllables, "~$matched" ); |
376
|
|
|
|
|
|
|
} # end 0A |
377
|
|
|
|
|
|
|
else { # begin 0B |
378
|
11
|
|
|
|
|
20
|
push( @syllables, $matched ); |
379
|
|
|
|
|
|
|
} # end 0B |
380
|
21
|
|
|
|
|
25
|
$vowel_start_p = 1; |
381
|
21
|
|
|
|
|
313
|
$word = substr( $word, length($matched) ); |
382
|
|
|
|
|
|
|
} # end 3A |
383
|
|
|
|
|
|
|
else { # begin 3B |
384
|
13
|
50
|
|
|
|
170
|
unless ( $word =~ m/$consonants/ ) { $index = length($word); } |
|
0
|
|
|
|
|
0
|
|
385
|
13
|
|
|
|
|
24
|
else { $index = length($`); } |
386
|
13
|
50
|
|
|
|
33
|
if ( $index == 0 ) { # begin 2A |
387
|
13
|
|
|
|
|
28
|
$matched = $1; |
388
|
13
|
|
|
|
|
23
|
push( @syllables, $matched ); |
389
|
13
|
|
|
|
|
18
|
$vowel_start_p = 0; |
390
|
13
|
|
|
|
|
27
|
$word = substr( $word, length($matched) ); |
391
|
13
|
100
|
|
|
|
118
|
unless ( $word =~ m/$vowels/ ) { $index = length($word); } |
|
2
|
|
|
|
|
4
|
|
392
|
11
|
|
|
|
|
23
|
else { $index = length($`); } |
393
|
13
|
100
|
66
|
|
|
109
|
if ( $index or length($word) == 0 ) { # begin 1A |
394
|
2
|
|
|
|
|
8
|
push( @syllables, "*" ); |
395
|
|
|
|
|
|
|
} # end 1A |
396
|
|
|
|
|
|
|
else { # begin 1B |
397
|
|
|
|
|
|
|
; |
398
|
|
|
|
|
|
|
} # end 1B |
399
|
|
|
|
|
|
|
} # end 2A |
400
|
|
|
|
|
|
|
else { # begin 2B |
401
|
0
|
|
|
|
|
0
|
push( @syllables, substr( $word, 0, 1 ) ); |
402
|
0
|
|
|
|
|
0
|
$word = substr( $word, 1 ); |
403
|
|
|
|
|
|
|
} # end 2B |
404
|
|
|
|
|
|
|
} # end 3B |
405
|
|
|
|
|
|
|
} # end out |
406
|
23
|
|
|
|
|
103
|
return @syllables; |
407
|
|
|
|
|
|
|
} ## end sub split_word |
408
|
|
|
|
|
|
|
########################### |
409
|
|
|
|
|
|
|
1; |
410
|
|
|
|
|
|
|
__END__ |